long(Int64)をUtf8に変換する(前編)

固定小数点型(少数桁=8桁)の実装が気になっていまして、その中で、long(Int64)をUtf8に変換するコードを、パフォーマンスについて考察しました。

ミッションは、longの値が、1234_5678_0000の時、1234.5678と解釈して、整数部分と小数部分を分離抽出する。
つまり、1234と56780000を取得する。
最終的には、Utf8で、”1234.5678″を作ることがゴールになります。

いろいろ調べたところ、Utf8JsonのWriteInt64の実装が速そうです。
私がUtf8Jsonを使っている時にも、内部でいつも使用していたのでしょう。

これをベースに、細かく調べていくと、少し早くなりそうな要素がいくつかあります。
まず、作られた時代でしょうか、Spanを使っていません。
long.MinValueの時の実装をSpan対応にしてみます。
“”u8も使っていますので、.net7.0以外では動かないかもしれませんが。

//before
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
buffer[offset++] = (byte)'-';
buffer[offset++] = (byte)'9';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'6';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'4';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'8';
return offset - startOffset;

//after
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
ReadOnlySpan<byte> minValue = "-9223372036854775808"u8;
minValue.CopyTo(buffer.AsSpan(offset));
return minValue.Length;
|      Method |     Mean |     Error |    StdDev |
|------------ |---------:|----------:|----------:|
|      Before | 4.814 us | 0.0957 us | 0.1402 us |
|      After  | 2.661 us | 0.0515 us | 0.0506 us |

値がMinValueの場合に限って、だいぶ速くなりますね。
まあ、値がMinValueの場合って、あまりなさそうですが。
まあ、ソースコードも短くなるし、よしとしましょう。
速くなるのは、配列の境界値チェックがほとんど無くなることが理由でしょうか。詳しく調べませんでしたが。

次に、内部変数で、longが多用されています。これをunitで足りる部分はuintに変えてみたところ、数%速くなりました。
次に、4桁ずつ処理していくところのif分を、万以上の桁についてはifの作りを変えます。

//before
if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
if (num2 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;

//after
if (num2 < 100) {
	if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
	BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
}
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;

この変更で、1桁/2桁/3桁/4桁の場合のIfの回数が、 1/2/3/3→ 2/2/2/2に変わって、少し改善します。
はじめの4桁については、小さい数が来る場合を優先して、元のIfの形で残します。

ここまでの変更を反映して、ベンチマークを取ってみます。
データは、1桁~20桁の整数、+-で40件です。

| Before | 6.016 us | 0.1166 us | 0.1091 us |
|  After | 5.741 us | 0.1104 us | 0.1134 us |

頑張っても、4%ほどしか速くなりませんね。元のコード、良くできていますね。
改造前/後のコードはこちら

//before
public static int WriteInt64(ref byte[] buffer, int offset, long value)
{
    var startOffset = offset;

    long num1 = value, num2, num3, num4, num5, div;

    if (value < 0)
    {
        if (value == long.MinValue) // -9223372036854775808
        {
            BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
            buffer[offset++] = (byte)'-';
            buffer[offset++] = (byte)'9';
            buffer[offset++] = (byte)'2';
            buffer[offset++] = (byte)'2';
            buffer[offset++] = (byte)'3';
            buffer[offset++] = (byte)'3';
            buffer[offset++] = (byte)'7';
            buffer[offset++] = (byte)'2';
            buffer[offset++] = (byte)'0';
            buffer[offset++] = (byte)'3';
            buffer[offset++] = (byte)'6';
            buffer[offset++] = (byte)'8';
            buffer[offset++] = (byte)'5';
            buffer[offset++] = (byte)'4';
            buffer[offset++] = (byte)'7';
            buffer[offset++] = (byte)'7';
            buffer[offset++] = (byte)'5';
            buffer[offset++] = (byte)'8';
            buffer[offset++] = (byte)'0';
            buffer[offset++] = (byte)'8';
            return offset - startOffset;
        }

        BinaryUtil.EnsureCapacity(ref buffer, offset, 1);
        buffer[offset++] = (byte)'-';
        num1 = unchecked(-value);
    }

    // WriteUInt64(inlined)

    if (num1 < 10000)
    {
        if (num1 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 1); goto L1; }
        if (num1 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 2); goto L2; }
        if (num1 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 3); goto L3; }
        BinaryUtil.EnsureCapacity(ref buffer, offset, 4); goto L4;
    }
    else
    {
        num2 = num1 / 10000;
        num1 -= num2 * 10000;
        if (num2 < 10000)
        {
            if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
            if (num2 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
            if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
            BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
        }
        else
        {
            num3 = num2 / 10000;
            num2 -= num3 * 10000;
            if (num3 < 10000)
            {
                if (num3 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 9); goto L9; }
                if (num3 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 10); goto L10; }
                if (num3 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 11); goto L11; }
                BinaryUtil.EnsureCapacity(ref buffer, offset, 12); goto L12;
            }
            else
            {
                num4 = num3 / 10000;
                num3 -= num4 * 10000;
                if (num4 < 10000)
                {
                    if (num4 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 13); goto L13; }
                    if (num4 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 14); goto L14; }
                    if (num4 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 15); goto L15; }
                    BinaryUtil.EnsureCapacity(ref buffer, offset, 16); goto L16;
                }
                else
                {
                    num5 = num4 / 10000;
                    num4 -= num5 * 10000;
                    if (num5 < 10000)
                    {
                        if (num5 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 17); goto L17; }
                        if (num5 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 18); goto L18; }
                        if (num5 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 19); goto L19; }
                        BinaryUtil.EnsureCapacity(ref buffer, offset, 20); goto L20;
                    }
                L20:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 8389L) >> 23));
                    num5 -= div * 1000;
                L19:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 5243L) >> 19));
                    num5 -= div * 100;
                L18:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 6554L) >> 16));
                    num5 -= div * 10;
                L17:
                    buffer[offset++] = (byte)('0' + (num5));
                }
            L16:
                buffer[offset++] = (byte)('0' + (div = (num4 * 8389L) >> 23));
                num4 -= div * 1000;
            L15:
                buffer[offset++] = (byte)('0' + (div = (num4 * 5243L) >> 19));
                num4 -= div * 100;
            L14:
                buffer[offset++] = (byte)('0' + (div = (num4 * 6554L) >> 16));
                num4 -= div * 10;
            L13:
                buffer[offset++] = (byte)('0' + (num4));
            }
        L12:
            buffer[offset++] = (byte)('0' + (div = (num3 * 8389L) >> 23));
            num3 -= div * 1000;
        L11:
            buffer[offset++] = (byte)('0' + (div = (num3 * 5243L) >> 19));
            num3 -= div * 100;
        L10:
            buffer[offset++] = (byte)('0' + (div = (num3 * 6554L) >> 16));
            num3 -= div * 10;
        L9:
            buffer[offset++] = (byte)('0' + (num3));
        }
    L8:
        buffer[offset++] = (byte)('0' + (div = (num2 * 8389L) >> 23));
        num2 -= div * 1000;
    L7:
        buffer[offset++] = (byte)('0' + (div = (num2 * 5243L) >> 19));
        num2 -= div * 100;
    L6:
        buffer[offset++] = (byte)('0' + (div = (num2 * 6554L) >> 16));
        num2 -= div * 10;
    L5:
        buffer[offset++] = (byte)('0' + (num2));
    }
L4:
    buffer[offset++] = (byte)('0' + (div = (num1 * 8389L) >> 23));
    num1 -= div * 1000;
L3:
    buffer[offset++] = (byte)('0' + (div = (num1 * 5243L) >> 19));
    num1 -= div * 100;
L2:
    buffer[offset++] = (byte)('0' + (div = (num1 * 6554L) >> 16));
    num1 -= div * 10;
L1:
    buffer[offset++] = (byte)('0' + (num1));

    return offset - startOffset;
}
//after
public static int WriteInt64New(ref byte[] buffer, int offset, long value)
{
    var startOffset = offset;

    uint num1, num2, num3, num4, num5, div;
    ulong valueA, valueB;

    if (value < 0)
    {
        if (value == long.MinValue) // -9223372036854775808
        {
            BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
            ReadOnlySpan<byte> minValue = "-9223372036854775808"u8;
            minValue.CopyTo(buffer.AsSpan(offset));
            return minValue.Length;
        }

        BinaryUtil.EnsureCapacity(ref buffer, offset, 1);
        buffer[offset++] = (byte)'-';
        valueA = (ulong)(unchecked(-value));
    }
    else
    {
        valueA = (ulong)value;
    }

    // WriteUInt64(inlined)

    if (valueA < 10000)
    {
        num1 = (uint)valueA;
        if (num1 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 1); goto L1; }
        if (num1 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 2); goto L2; }
        if (num1 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 3); goto L3; }
        BinaryUtil.EnsureCapacity(ref buffer, offset, 4); goto L4;
    }
    else
    {
        valueB = valueA / 10000;
        num1 = (uint)(valueA - valueB * 10000);
        if (valueB < 10000)
        {
            num2 = (uint)valueB;
            if (num2 < 100)
            {
                if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
                BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6;
            }
            if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
            BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
        }
        else
        {
            valueA = valueB / 10000;
            num2 = (uint)(valueB - valueA * 10000);
            if (valueA < 10000)
            {
                num3 = (uint)valueA;
                if (num3 < 100)
                {
                    if (num3 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 9); goto L9; }
                    BinaryUtil.EnsureCapacity(ref buffer, offset, 10); goto L10;
                }
                if (num3 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 11); goto L11; }
                BinaryUtil.EnsureCapacity(ref buffer, offset, 12); goto L12;
            }
            else
            {
                valueB = valueA / 10000;
                num3 = (uint)(valueA - valueB * 10000);
                if (valueB < 10000)
                {
                    num4 = (uint)valueB;
                    if (num4 < 100)
                    {
                        if (num4 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 13); goto L13; }
                        BinaryUtil.EnsureCapacity(ref buffer, offset, 14); goto L14;
                    }
                    if (num4 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 15); goto L15; }
                    BinaryUtil.EnsureCapacity(ref buffer, offset, 16); goto L16;
                }
                else
                {
                    valueA = valueB / 10000;
                    num4 = (uint)(valueB - valueA * 10000);
                    //if (value2 < 10000)
                    {
                        num5 = (uint)valueA;
                        if (num5 < 100)
                        {
                            if (num5 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 17); goto L17; }
                            BinaryUtil.EnsureCapacity(ref buffer, offset, 18); goto L18;
                        }
                        if (num5 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 19); goto L19; }
                        BinaryUtil.EnsureCapacity(ref buffer, offset, 20); goto L20;
                    }
                L20:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 8389) >> 23));
                    num5 -= div * 1000;
                L19:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 5243) >> 19));
                    num5 -= div * 100;
                L18:
                    buffer[offset++] = (byte)('0' + (div = (num5 * 6554) >> 16));
                    num5 -= div * 10;
                L17:
                    buffer[offset++] = (byte)('0' + (num5));
                }
            L16:
                buffer[offset++] = (byte)('0' + (div = (num4 * 8389) >> 23));
                num4 -= div * 1000;
            L15:
                buffer[offset++] = (byte)('0' + (div = (num4 * 5243) >> 19));
                num4 -= div * 100;
            L14:
                buffer[offset++] = (byte)('0' + (div = (num4 * 6554) >> 16));
                num4 -= div * 10;
            L13:
                buffer[offset++] = (byte)('0' + (num4));
            }
        L12:
            buffer[offset++] = (byte)('0' + (div = (num3 * 8389) >> 23));
            num3 -= div * 1000;
        L11:
            buffer[offset++] = (byte)('0' + (div = (num3 * 5243) >> 19));
            num3 -= div * 100;
        L10:
            buffer[offset++] = (byte)('0' + (div = (num3 * 6554) >> 16));
            num3 -= div * 10;
        L9:
            buffer[offset++] = (byte)('0' + (num3));
        }
    L8:
        buffer[offset++] = (byte)('0' + (div = (num2 * 8389) >> 23));
        num2 -= div * 1000;
    L7:
        buffer[offset++] = (byte)('0' + (div = (num2 * 5243) >> 19));
        num2 -= div * 100;
    L6:
        buffer[offset++] = (byte)('0' + (div = (num2 * 6554) >> 16));
        num2 -= div * 10;
    L5:
        buffer[offset++] = (byte)('0' + (num2));
    }
L4:
    buffer[offset++] = (byte)('0' + (div = (num1 * 8389) >> 23));
    num1 -= div * 1000;
L3:
    buffer[offset++] = (byte)('0' + (div = (num1 * 5243) >> 19));
    num1 -= div * 100;
L2:
    buffer[offset++] = (byte)('0' + (div = (num1 * 6554) >> 16));
    num1 -= div * 10;
L1:
    buffer[offset++] = (byte)('0' + (num1));

    return offset - startOffset;
}

さて、長くなってきたので、続きは後編で。

投稿日:
カテゴリー: C#

1件のコメント

コメントする

メールアドレスが公開されることはありません。 が付いている欄は必須項目です