Skip to content

Commit a31a541

Browse files
Merge pull request #1303 from SixLabors/js/jpeg-decode-perf
Optimize jpeg decoder based on traces.
2 parents 521fae3 + 1eaaf1a commit a31a541

File tree

4 files changed

+61
-80
lines changed

4 files changed

+61
-80
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.cs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
using System.Numerics;
77
using System.Runtime.CompilerServices;
88
using System.Runtime.InteropServices;
9+
#if SUPPORTS_RUNTIME_INTRINSICS
10+
using System.Runtime.Intrinsics;
11+
using System.Runtime.Intrinsics.X86;
12+
#endif
913

1014
namespace SixLabors.ImageSharp
1115
{
@@ -28,7 +32,7 @@ internal static partial class SimdUtils
2832
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2933
internal static Vector4 PseudoRound(this Vector4 v)
3034
{
31-
var sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1));
35+
Vector4 sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1));
3236

3337
return v + (sign * 0.5f);
3438
}
@@ -44,13 +48,24 @@ internal static Vector4 PseudoRound(this Vector4 v)
4448
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4549
internal static Vector<float> FastRound(this Vector<float> v)
4650
{
47-
var magic0 = new Vector<int>(int.MinValue); // 0x80000000
48-
Vector<float> sgn0 = Vector.AsVectorSingle(magic0);
49-
Vector<float> and0 = Vector.BitwiseAnd(sgn0, v);
50-
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
51-
Vector<float> add0 = Vector.Add(v, or0);
52-
Vector<float> sub0 = Vector.Subtract(add0, or0);
53-
return sub0;
51+
#if SUPPORTS_RUNTIME_INTRINSICS
52+
53+
if (Avx2.IsSupported)
54+
{
55+
ref Vector256<float> v256 = ref Unsafe.As<Vector<float>, Vector256<float>>(ref v);
56+
Vector256<float> vRound = Avx.RoundToNearestInteger(v256);
57+
return Unsafe.As<Vector256<float>, Vector<float>>(ref vRound);
58+
}
59+
else
60+
#endif
61+
{
62+
var magic0 = new Vector<int>(int.MinValue); // 0x80000000
63+
var sgn0 = Vector.AsVectorSingle(magic0);
64+
var and0 = Vector.BitwiseAnd(sgn0, v);
65+
var or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
66+
var add0 = Vector.Add(v, or0);
67+
return Vector.Subtract(add0, or0);
68+
}
5469
}
5570

5671
/// <summary>

src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,25 +93,24 @@ public void FillBuffer()
9393
public unsafe int DecodeHuffman(ref HuffmanTable h)
9494
{
9595
this.CheckBits();
96-
int v = this.PeekBits(JpegConstants.Huffman.LookupBits);
97-
int symbol = h.LookaheadValue[v];
98-
int size = h.LookaheadSize[v];
96+
int index = this.PeekBits(JpegConstants.Huffman.LookupBits);
97+
int size = h.LookaheadSize[index];
9998

100-
if (size == JpegConstants.Huffman.SlowBits)
99+
if (size < JpegConstants.Huffman.SlowBits)
101100
{
102-
ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits);
103-
while (x > h.MaxCode[size])
104-
{
105-
size++;
106-
}
101+
this.remainingBits -= size;
102+
return h.LookaheadValue[index];
103+
}
107104

108-
v = (int)(x >> (JpegConstants.Huffman.RegisterSize - size));
109-
symbol = h.Values[(h.ValOffset[size] + v) & 0xFF];
105+
ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits);
106+
while (x > h.MaxCode[size])
107+
{
108+
size++;
110109
}
111110

112111
this.remainingBits -= size;
113112

114-
return symbol;
113+
return h.Values[(h.ValOffset[size] + (int)(x >> (JpegConstants.Huffman.RegisterSize - size))) & 0xFF];
115114
}
116115

117116
[MethodImpl(InliningOptions.ShortMethod)]

tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

4-
using System.Drawing;
54
using System.IO;
65
using BenchmarkDotNet.Attributes;
76

@@ -15,7 +14,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
1514
[Config(typeof(Config.ShortClr))]
1615
public class DecodeJpegParseStreamOnly
1716
{
18-
[Params(TestImages.Jpeg.Baseline.Jpeg420Exif)]
17+
[Params(TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr)]
1918
public string TestImage { get; set; }
2019

2120
private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage);
@@ -37,7 +36,7 @@ public SDSize JpegSystemDrawing()
3736
}
3837

3938
[Benchmark(Description = "JpegDecoderCore.ParseStream")]
40-
public void ParseStreamPdfJs()
39+
public void ParseStream()
4140
{
4241
using var memoryStream = new MemoryStream(this.jpegBytes);
4342
using var bufferedStream = new BufferedReadStream(Configuration.Default, memoryStream);
@@ -46,22 +45,18 @@ public void ParseStreamPdfJs()
4645
decoder.ParseStream(bufferedStream);
4746
decoder.Dispose();
4847
}
49-
50-
// RESULTS (2019 April 23):
51-
//
52-
// BenchmarkDotNet=v0.11.3, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5)
53-
// Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores
54-
// .NET Core SDK=2.2.202
55-
// [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
56-
// Clr : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3362.0
57-
// Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
58-
//
59-
// | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
60-
// |---------------------------- |----- |-------- |--------------------- |---------:|---------:|----------:|------:|--------:|---------:|------:|------:|----------:|
61-
// | 'System.Drawing FULL' | Clr | Clr | Jpg/b(...)f.jpg [28] | 18.69 ms | 8.273 ms | 0.4535 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.89 KB |
62-
// | JpegDecoderCore.ParseStream | Clr | Clr | Jpg/b(...)f.jpg [28] | 15.76 ms | 4.266 ms | 0.2339 ms | 0.84 | 0.03 | - | - | - | 11.83 KB |
63-
// | | | | | | | | | | | | | |
64-
// | 'System.Drawing FULL' | Core | Core | Jpg/b(...)f.jpg [28] | 17.68 ms | 2.711 ms | 0.1486 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB |
65-
// | JpegDecoderCore.ParseStream | Core | Core | Jpg/b(...)f.jpg [28] | 14.27 ms | 3.671 ms | 0.2012 ms | 0.81 | 0.00 | - | - | - | 11.76 KB |
6648
}
49+
50+
/*
51+
| Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
52+
|---------------------------- |----------- |-------------- |--------------------- |---------:|----------:|----------:|------:|--------:|------:|------:|----------:|
53+
| 'System.Drawing FULL' | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.828 ms | 0.9885 ms | 0.0542 ms | 1.00 | 46.8750 | - | - | 211566 B |
54+
| JpegDecoderCore.ParseStream | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.833 ms | 0.2923 ms | 0.0160 ms | 1.00 | - | - | - | 12416 B |
55+
| | | | | | | | | | | | |
56+
| 'System.Drawing FULL' | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 6.018 ms | 2.1374 ms | 0.1172 ms | 1.00 | 46.8750 | - | - | 210768 B |
57+
| JpegDecoderCore.ParseStream | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 4.382 ms | 0.9009 ms | 0.0494 ms | 0.73 | - | - | - | 12360 B |
58+
| | | | | | | | | | | | |
59+
| 'System.Drawing FULL' | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 5.714 ms | 0.4078 ms | 0.0224 ms | 1.00 | - | - | - | 176 B |
60+
| JpegDecoderCore.ParseStream | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 4.239 ms | 1.0943 ms | 0.0600 ms | 0.74 | - | - | - | 12406 B |
61+
*/
6762
}

tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -90,45 +90,17 @@ public Size JpegImageSharp()
9090
}
9191
}
9292

93-
// RESULTS (2018 November 4):
94-
//
95-
// BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
96-
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
97-
// Frequency=2742191 Hz, Resolution=364.6719 ns, Timer=TSC
98-
// .NET Core SDK=2.1.403
99-
// [Host] : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
100-
//
101-
// Method | TestImage | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
102-
// ------------------------------- |-------------------------------------------- |-----------:|-----------:|----------:|-------:|---------:|----------:|---------:|---------:|------------:|
103-
// 'Decode Jpeg - System.Drawing' | Jpg/baseline/Lake.jpg | 6.117 ms | 0.3923 ms | 0.0222 ms | 1.00 | 0.00 | 62.5000 | - | - | 205.83 KB |
104-
// 'Decode Jpeg - ImageSharp' | Jpg/baseline/Lake.jpg | 18.126 ms | 0.6023 ms | 0.0340 ms | 2.96 | 0.01 | - | - | - | 19.97 KB |
105-
// | | | | | | | | | | |
106-
// 'Decode Jpeg - System.Drawing' | Jpg/baseline/jpeg420exif.jpg | 17.063 ms | 2.6096 ms | 0.1474 ms | 1.00 | 0.00 | 218.7500 | - | - | 757.04 KB |
107-
// 'Decode Jpeg - ImageSharp' | Jpg/baseline/jpeg420exif.jpg | 41.366 ms | 1.0115 ms | 0.0572 ms | 2.42 | 0.02 | - | - | - | 21.94 KB |
108-
// | | | | | | | | | | |
109-
// 'Decode Jpeg - System.Drawing' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 428.282 ms | 94.9163 ms | 5.3629 ms | 1.00 | 0.00 | 2375.0000 | - | - | 7403.76 KB |
110-
// 'Decode Jpeg - ImageSharp' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 386.698 ms | 33.0065 ms | 1.8649 ms | 0.90 | 0.01 | 125.0000 | 125.0000 | 125.0000 | 35186.97 KB |
111-
// | | | | | | | | | | |
112-
// 'Decode Jpeg - System.Drawing' | Jpg/issues/issue750-exif-tranform.jpg | 95.192 ms | 3.1762 ms | 0.1795 ms | 1.00 | 0.00 | 1750.0000 | - | - | 5492.63 KB |
113-
// 'Decode Jpeg - ImageSharp' | Jpg/issues/issue750-exif-tranform.jpg | 230.158 ms | 48.8128 ms | 2.7580 ms | 2.42 | 0.02 | 312.5000 | 312.5000 | 312.5000 | 58834.66 KB |
114-
115-
// RESULTS (2019 April 23):
116-
//
117-
// BenchmarkDotNet=v0.11.5, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5)
118-
// Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores
119-
// .NET Core SDK=2.2.202
120-
// [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
121-
// Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
122-
//
123-
// | Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
124-
// |------------------------------- |--------------------- |-----------:|-----------:|-----------:|------:|--------:|----------:|------:|------:|------------:|
125-
// | 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 6.957 ms | 9.618 ms | 0.5272 ms | 1.00 | 0.00 | 93.7500 | - | - | 205.83 KB |
126-
// | 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 18.348 ms | 8.876 ms | 0.4865 ms | 2.65 | 0.23 | - | - | - | 14.49 KB |
127-
// | | | | | | | | | | | |
128-
// | 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 18.687 ms | 11.632 ms | 0.6376 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB |
129-
// | 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 41.990 ms | 25.514 ms | 1.3985 ms | 2.25 | 0.10 | - | - | - | 15.48 KB |
130-
// | | | | | | | | | | | |
131-
// | 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 477.265 ms | 732.126 ms | 40.1303 ms | 1.00 | 0.00 | 3000.0000 | - | - | 7403.76 KB |
132-
// | 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 348.545 ms | 91.480 ms | 5.0143 ms | 0.73 | 0.06 | - | - | - | 35177.21 KB |
93+
/*
94+
| Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
95+
|------------------------------- |--------------------- |-----------:|------------:|-----------:|------:|--------:|------:|------:|------:|-----------:|
96+
| 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 5.122 ms | 1.3978 ms | 0.0766 ms | 1.00 | 0.00 | - | - | - | 176 B |
97+
| 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 11.991 ms | 0.2514 ms | 0.0138 ms | 2.34 | 0.03 | - | - | - | 15816 B |
98+
| | | | | | | | | | | |
99+
| 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 14.943 ms | 1.8410 ms | 0.1009 ms | 1.00 | 0.00 | - | - | - | 176 B |
100+
| 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 29.759 ms | 1.5452 ms | 0.0847 ms | 1.99 | 0.01 | - | - | - | 16824 B |
101+
| | | | | | | | | | | |
102+
| 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 388.229 ms | 382.8946 ms | 20.9877 ms | 1.00 | 0.00 | - | - | - | 216 B |
103+
| 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 276.490 ms | 195.5104 ms | 10.7166 ms | 0.71 | 0.01 | - | - | - | 36022368 B |
104+
*/
133105
}
134106
}

0 commit comments

Comments
 (0)