固定小数点型(少数桁=8桁)の実装が気になっていまして、その中で、long(Int64)をUtf8に変換するコードを、パフォーマンスについて考察しました。
ミッションは、longの値が、1234_5678_0000の時、1234.5678と解釈して、整数部分と小数部分を分離抽出する。
つまり、1234と56780000を取得する。
最終的には、Utf8で、”1234.5678″を作ることがゴールになります。
いろいろ調べたところ、Utf8JsonのWriteInt64の実装が速そうです。
私がUtf8Jsonを使っている時にも、内部でいつも使用していたのでしょう。
これをベースに、細かく調べていくと、少し早くなりそうな要素がいくつかあります。
まず、作られた時代でしょうか、Spanを使っていません。
long.MinValueの時の実装をSpan対応にしてみます。
“”u8も使っていますので、.net7.0以外では動かないかもしれませんが。
//before
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
buffer[offset++] = (byte)'-';
buffer[offset++] = (byte)'9';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'6';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'4';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'8';
return offset - startOffset;
//after
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
ReadOnlySpan<byte> minValue = "-9223372036854775808"u8;
minValue.CopyTo(buffer.AsSpan(offset));
return minValue.Length;
| Method | Mean | Error | StdDev |
|------------ |---------:|----------:|----------:|
| Before | 4.814 us | 0.0957 us | 0.1402 us |
| After | 2.661 us | 0.0515 us | 0.0506 us |
値がMinValueの場合に限って、だいぶ速くなりますね。
まあ、値がMinValueの場合って、あまりなさそうですが。
まあ、ソースコードも短くなるし、よしとしましょう。
速くなるのは、配列の境界値チェックがほとんど無くなることが理由でしょうか。詳しく調べませんでしたが。
次に、内部変数で、longが多用されています。これをunitで足りる部分はuintに変えてみたところ、数%速くなりました。
次に、4桁ずつ処理していくところのif分を、万以上の桁についてはifの作りを変えます。
//before
if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
if (num2 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
//after
if (num2 < 100) {
if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
}
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
この変更で、1桁/2桁/3桁/4桁の場合のIfの回数が、 1/2/3/3→ 2/2/2/2に変わって、少し改善します。
はじめの4桁については、小さい数が来る場合を優先して、元のIfの形で残します。
ここまでの変更を反映して、ベンチマークを取ってみます。
データは、1桁~20桁の整数、+-で40件です。
| Before | 6.016 us | 0.1166 us | 0.1091 us |
| After | 5.741 us | 0.1104 us | 0.1134 us |
頑張っても、4%ほどしか速くなりませんね。元のコード、良くできていますね。
改造前/後のコードはこちら
//before
public static int WriteInt64(ref byte[] buffer, int offset, long value)
{
var startOffset = offset;
long num1 = value, num2, num3, num4, num5, div;
if (value < 0)
{
if (value == long.MinValue) // -9223372036854775808
{
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
buffer[offset++] = (byte)'-';
buffer[offset++] = (byte)'9';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'2';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'3';
buffer[offset++] = (byte)'6';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'4';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'7';
buffer[offset++] = (byte)'5';
buffer[offset++] = (byte)'8';
buffer[offset++] = (byte)'0';
buffer[offset++] = (byte)'8';
return offset - startOffset;
}
BinaryUtil.EnsureCapacity(ref buffer, offset, 1);
buffer[offset++] = (byte)'-';
num1 = unchecked(-value);
}
// WriteUInt64(inlined)
if (num1 < 10000)
{
if (num1 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 1); goto L1; }
if (num1 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 2); goto L2; }
if (num1 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 3); goto L3; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 4); goto L4;
}
else
{
num2 = num1 / 10000;
num1 -= num2 * 10000;
if (num2 < 10000)
{
if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
if (num2 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6; }
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
}
else
{
num3 = num2 / 10000;
num2 -= num3 * 10000;
if (num3 < 10000)
{
if (num3 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 9); goto L9; }
if (num3 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 10); goto L10; }
if (num3 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 11); goto L11; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 12); goto L12;
}
else
{
num4 = num3 / 10000;
num3 -= num4 * 10000;
if (num4 < 10000)
{
if (num4 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 13); goto L13; }
if (num4 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 14); goto L14; }
if (num4 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 15); goto L15; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 16); goto L16;
}
else
{
num5 = num4 / 10000;
num4 -= num5 * 10000;
if (num5 < 10000)
{
if (num5 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 17); goto L17; }
if (num5 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 18); goto L18; }
if (num5 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 19); goto L19; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 20); goto L20;
}
L20:
buffer[offset++] = (byte)('0' + (div = (num5 * 8389L) >> 23));
num5 -= div * 1000;
L19:
buffer[offset++] = (byte)('0' + (div = (num5 * 5243L) >> 19));
num5 -= div * 100;
L18:
buffer[offset++] = (byte)('0' + (div = (num5 * 6554L) >> 16));
num5 -= div * 10;
L17:
buffer[offset++] = (byte)('0' + (num5));
}
L16:
buffer[offset++] = (byte)('0' + (div = (num4 * 8389L) >> 23));
num4 -= div * 1000;
L15:
buffer[offset++] = (byte)('0' + (div = (num4 * 5243L) >> 19));
num4 -= div * 100;
L14:
buffer[offset++] = (byte)('0' + (div = (num4 * 6554L) >> 16));
num4 -= div * 10;
L13:
buffer[offset++] = (byte)('0' + (num4));
}
L12:
buffer[offset++] = (byte)('0' + (div = (num3 * 8389L) >> 23));
num3 -= div * 1000;
L11:
buffer[offset++] = (byte)('0' + (div = (num3 * 5243L) >> 19));
num3 -= div * 100;
L10:
buffer[offset++] = (byte)('0' + (div = (num3 * 6554L) >> 16));
num3 -= div * 10;
L9:
buffer[offset++] = (byte)('0' + (num3));
}
L8:
buffer[offset++] = (byte)('0' + (div = (num2 * 8389L) >> 23));
num2 -= div * 1000;
L7:
buffer[offset++] = (byte)('0' + (div = (num2 * 5243L) >> 19));
num2 -= div * 100;
L6:
buffer[offset++] = (byte)('0' + (div = (num2 * 6554L) >> 16));
num2 -= div * 10;
L5:
buffer[offset++] = (byte)('0' + (num2));
}
L4:
buffer[offset++] = (byte)('0' + (div = (num1 * 8389L) >> 23));
num1 -= div * 1000;
L3:
buffer[offset++] = (byte)('0' + (div = (num1 * 5243L) >> 19));
num1 -= div * 100;
L2:
buffer[offset++] = (byte)('0' + (div = (num1 * 6554L) >> 16));
num1 -= div * 10;
L1:
buffer[offset++] = (byte)('0' + (num1));
return offset - startOffset;
}
//after
public static int WriteInt64New(ref byte[] buffer, int offset, long value)
{
var startOffset = offset;
uint num1, num2, num3, num4, num5, div;
ulong valueA, valueB;
if (value < 0)
{
if (value == long.MinValue) // -9223372036854775808
{
BinaryUtil.EnsureCapacity(ref buffer, offset, 20);
ReadOnlySpan<byte> minValue = "-9223372036854775808"u8;
minValue.CopyTo(buffer.AsSpan(offset));
return minValue.Length;
}
BinaryUtil.EnsureCapacity(ref buffer, offset, 1);
buffer[offset++] = (byte)'-';
valueA = (ulong)(unchecked(-value));
}
else
{
valueA = (ulong)value;
}
// WriteUInt64(inlined)
if (valueA < 10000)
{
num1 = (uint)valueA;
if (num1 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 1); goto L1; }
if (num1 < 100) { BinaryUtil.EnsureCapacity(ref buffer, offset, 2); goto L2; }
if (num1 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 3); goto L3; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 4); goto L4;
}
else
{
valueB = valueA / 10000;
num1 = (uint)(valueA - valueB * 10000);
if (valueB < 10000)
{
num2 = (uint)valueB;
if (num2 < 100)
{
if (num2 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 5); goto L5; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 6); goto L6;
}
if (num2 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 7); goto L7; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 8); goto L8;
}
else
{
valueA = valueB / 10000;
num2 = (uint)(valueB - valueA * 10000);
if (valueA < 10000)
{
num3 = (uint)valueA;
if (num3 < 100)
{
if (num3 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 9); goto L9; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 10); goto L10;
}
if (num3 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 11); goto L11; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 12); goto L12;
}
else
{
valueB = valueA / 10000;
num3 = (uint)(valueA - valueB * 10000);
if (valueB < 10000)
{
num4 = (uint)valueB;
if (num4 < 100)
{
if (num4 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 13); goto L13; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 14); goto L14;
}
if (num4 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 15); goto L15; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 16); goto L16;
}
else
{
valueA = valueB / 10000;
num4 = (uint)(valueB - valueA * 10000);
//if (value2 < 10000)
{
num5 = (uint)valueA;
if (num5 < 100)
{
if (num5 < 10) { BinaryUtil.EnsureCapacity(ref buffer, offset, 17); goto L17; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 18); goto L18;
}
if (num5 < 1000) { BinaryUtil.EnsureCapacity(ref buffer, offset, 19); goto L19; }
BinaryUtil.EnsureCapacity(ref buffer, offset, 20); goto L20;
}
L20:
buffer[offset++] = (byte)('0' + (div = (num5 * 8389) >> 23));
num5 -= div * 1000;
L19:
buffer[offset++] = (byte)('0' + (div = (num5 * 5243) >> 19));
num5 -= div * 100;
L18:
buffer[offset++] = (byte)('0' + (div = (num5 * 6554) >> 16));
num5 -= div * 10;
L17:
buffer[offset++] = (byte)('0' + (num5));
}
L16:
buffer[offset++] = (byte)('0' + (div = (num4 * 8389) >> 23));
num4 -= div * 1000;
L15:
buffer[offset++] = (byte)('0' + (div = (num4 * 5243) >> 19));
num4 -= div * 100;
L14:
buffer[offset++] = (byte)('0' + (div = (num4 * 6554) >> 16));
num4 -= div * 10;
L13:
buffer[offset++] = (byte)('0' + (num4));
}
L12:
buffer[offset++] = (byte)('0' + (div = (num3 * 8389) >> 23));
num3 -= div * 1000;
L11:
buffer[offset++] = (byte)('0' + (div = (num3 * 5243) >> 19));
num3 -= div * 100;
L10:
buffer[offset++] = (byte)('0' + (div = (num3 * 6554) >> 16));
num3 -= div * 10;
L9:
buffer[offset++] = (byte)('0' + (num3));
}
L8:
buffer[offset++] = (byte)('0' + (div = (num2 * 8389) >> 23));
num2 -= div * 1000;
L7:
buffer[offset++] = (byte)('0' + (div = (num2 * 5243) >> 19));
num2 -= div * 100;
L6:
buffer[offset++] = (byte)('0' + (div = (num2 * 6554) >> 16));
num2 -= div * 10;
L5:
buffer[offset++] = (byte)('0' + (num2));
}
L4:
buffer[offset++] = (byte)('0' + (div = (num1 * 8389) >> 23));
num1 -= div * 1000;
L3:
buffer[offset++] = (byte)('0' + (div = (num1 * 5243) >> 19));
num1 -= div * 100;
L2:
buffer[offset++] = (byte)('0' + (div = (num1 * 6554) >> 16));
num1 -= div * 10;
L1:
buffer[offset++] = (byte)('0' + (num1));
return offset - startOffset;
}
さて、長くなってきたので、続きは後編で。
1件のコメント