|
| 1 | +using System; |
| 2 | +using System.Linq.Expressions; |
| 3 | +using BenchmarkDotNet.Attributes; |
| 4 | +using BenchmarkDotNet.Diagnosers; |
| 5 | +using BenchmarkDotNet.Jobs; |
| 6 | + |
| 7 | +namespace FastExpressionCompiler.Benchmarks; |
| 8 | + |
| 9 | +/* |
| 10 | +## Base line with the static method, it seems to be a wrong idea for the improvement, because the closure-bound method is faster as I did discovered a long ago. |
| 11 | +
|
| 12 | +BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.3775) |
| 13 | +Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores |
| 14 | +.NET SDK 9.0.203 |
| 15 | + [Host] : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 |
| 16 | + .NET 8.0 : .NET 8.0.15 (8.0.1525.16413), X64 RyuJIT AVX2 |
| 17 | + .NET 9.0 : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 |
| 18 | +
|
| 19 | +
|
| 20 | +| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | CacheMisses/Op | BranchMispredictions/Op | Allocated | Alloc Ratio | |
| 21 | +|------------------- |--------- |--------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|---------------:|------------------------:|----------:|------------:| |
| 22 | +| InvokeCompiled | .NET 8.0 | .NET 8.0 | 0.4365 ns | 0.0246 ns | 0.0192 ns | 1.00 | 0.06 | 1 | 1 | -0 | -0 | - | NA | |
| 23 | +| InvokeCompiledFast | .NET 8.0 | .NET 8.0 | 1.0837 ns | 0.0557 ns | 0.0991 ns | 2.49 | 0.25 | 2 | 2 | 0 | 0 | - | NA | |
| 24 | +| | | | | | | | | | | | | | | |
| 25 | +| InvokeCompiled | .NET 9.0 | .NET 9.0 | 0.5547 ns | 0.0447 ns | 0.0871 ns | 1.02 | 0.22 | 1 | 1 | -0 | -0 | - | NA | |
| 26 | +| InvokeCompiledFast | .NET 9.0 | .NET 9.0 | 1.1920 ns | 0.0508 ns | 0.0450 ns | 2.20 | 0.34 | 2 | 2 | 0 | -0 | - | NA | |
| 27 | +
|
| 28 | +
|
| 29 | +## Sealing the closure type does not help |
| 30 | +
|
| 31 | +| Method | Job | Runtime | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | |
| 32 | +|------------------- |--------- |--------- |----------:|----------:|----------:|----------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| |
| 33 | +| InvokeCompiledFast | .NET 8.0 | .NET 8.0 | 1.0066 ns | 0.0209 ns | 0.0233 ns | 0.9973 ns | 1.00 | 0.03 | 2 | 2 | 0 | 0 | - | NA | |
| 34 | +| InvokeCompiled | .NET 8.0 | .NET 8.0 | 0.5040 ns | 0.0217 ns | 0.0169 ns | 0.5016 ns | 0.50 | 0.02 | 1 | 1 | -0 | -0 | - | NA | |
| 35 | +| | | | | | | | | | | | | | | | |
| 36 | +| InvokeCompiledFast | .NET 9.0 | .NET 9.0 | 1.0640 ns | 0.0539 ns | 0.0929 ns | 1.0106 ns | 1.01 | 0.12 | 2 | 2 | 0 | 0 | - | NA | |
| 37 | +| InvokeCompiled | .NET 9.0 | .NET 9.0 | 0.5897 ns | 0.0451 ns | 0.0858 ns | 0.6156 ns | 0.56 | 0.09 | 1 | 1 | -0 | -0 | - | NA | |
| 38 | +
|
| 39 | +
|
| 40 | +## Steel the same speed with the minimal IL of 2 instructions |
| 41 | +
|
| 42 | +Job=.NET 8.0 Runtime=.NET 8.0 |
| 43 | +
|
| 44 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | Allocated | Alloc Ratio | |
| 45 | +|------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------:|------------:| |
| 46 | +| InvokeCompiled | 0.4647 ns | 0.0321 ns | 0.0268 ns | 1.00 | 0.08 | 1 | - | NA | |
| 47 | +| InvokeCompiledFast | 0.9739 ns | 0.0433 ns | 0.0481 ns | 2.10 | 0.15 | 2 | - | NA | |
| 48 | +
|
| 49 | +
|
| 50 | +## But the Func speed is faster, hmm |
| 51 | +
|
| 52 | +Job=.NET 8.0 Runtime=.NET 8.0 |
| 53 | +
|
| 54 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | Allocated | Alloc Ratio | |
| 55 | +|--------------- |----------:|----------:|----------:|------:|--------:|-----:|----------:|------------:| |
| 56 | +| InvokeCompiled | 0.2685 ns | 0.0210 ns | 0.0186 ns | 1.00 | 0.09 | 2 | - | NA | |
| 57 | +| JustFunc | 0.1711 ns | 0.0310 ns | 0.0305 ns | 0.64 | 0.12 | 1 | - | NA | |
| 58 | +
|
| 59 | +
|
| 60 | +## HERE IS THE REASON: |
| 61 | +
|
| 62 | +FEC creates the DynamicMethod with `owner` param, but System compile uses the different overload without owner and internally with `transparentMethod: true`. |
| 63 | +Using this latter (System) overload drastically slows down the compilation but removes the additional branch instruction in the invocation, making a super simple delegates faster. |
| 64 | +But for the delegates doing actual/more work, having additional branch instruction is negligible and usually does not show in the invocation performance. |
| 65 | +
|
| 66 | +2x slow: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, typeof(ArrayClosure), true);` |
| 67 | + ^^^^^^^^^^^^^^^^^^^^ |
| 68 | +parity: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, true);` |
| 69 | +
|
| 70 | +Job=.NET 8.0 Runtime=.NET 8.0 |
| 71 | +
|
| 72 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio | |
| 73 | +|------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:| |
| 74 | +| InvokeCompiled | 0.5075 ns | 0.0153 ns | 0.0143 ns | 1.00 | 0.04 | 1 | 1 | - | NA | |
| 75 | +| InvokeCompiledFast | 0.5814 ns | 0.0433 ns | 0.0699 ns | 1.15 | 0.14 | 1 | 1 | - | NA | |
| 76 | +
|
| 77 | +
|
| 78 | +## Not with full eval before Compile the results are funny in the good way |
| 79 | +
|
| 80 | +Job=.NET 8.0 Runtime=.NET 8.0 |
| 81 | +
|
| 82 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio | |
| 83 | +|------------------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:| |
| 84 | +| InvokeCompiled | 0.5071 ns | 0.0289 ns | 0.0242 ns | 1.00 | 0.06 | 2 | 1 | - | NA | |
| 85 | +| InvokeCompiledFastWithEvalFlag | 0.0804 ns | 0.0341 ns | 0.0351 ns | 0.16 | 0.07 | 1 | 1 | - | NA | |
| 86 | +
|
| 87 | +
|
| 88 | +## Fastest so far |
| 89 | +
|
| 90 | +DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 |
| 91 | +
|
| 92 | +| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio | |
| 93 | +|-------------------------------------- |----------:|----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:| |
| 94 | +| InvokeCompiled | 0.5088 ns | 0.0399 ns | 0.0842 ns | 0.4707 ns | 1.02 | 0.22 | 2 | 1 | - | NA | |
| 95 | +| InvokeCompiledFast | 0.1105 ns | 0.0360 ns | 0.0799 ns | 0.0689 ns | 0.22 | 0.16 | 1 | 1 | - | NA | |
| 96 | +| InvokeCompiledFast_DisableInterpreter | 1.0607 ns | 0.0540 ns | 0.0887 ns | 1.0301 ns | 2.13 | 0.34 | 3 | 2 | - | NA | |
| 97 | +
|
| 98 | +*/ |
| 99 | +[MemoryDiagnoser, RankColumn] |
| 100 | +[HardwareCounters(HardwareCounter.BranchInstructions)] |
| 101 | +// [SimpleJob(RuntimeMoniker.Net90)] |
| 102 | +// [SimpleJob(RuntimeMoniker.Net80)] |
| 103 | +public class Issue468_InvokeCompiled_vs_InvokeCompiledFast |
| 104 | +{ |
| 105 | + Func<bool> _compiled, _compiledFast, _compiledFast_DisableInterpreter, _justFunc = static () => true; |
| 106 | + |
| 107 | + [GlobalSetup] |
| 108 | + public void Setup() |
| 109 | + { |
| 110 | + var expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression(); |
| 111 | + _compiled = expr.CompileSys(); |
| 112 | + _compiledFast = expr.CompileFast(); |
| 113 | + _compiledFast_DisableInterpreter = expr.CompileFast(flags: CompilerFlags.DisableInterpreter); |
| 114 | + } |
| 115 | + |
| 116 | + [Benchmark(Baseline = true)] |
| 117 | + public bool InvokeCompiled() |
| 118 | + { |
| 119 | + return _compiled(); |
| 120 | + } |
| 121 | + |
| 122 | + [Benchmark] |
| 123 | + public bool InvokeCompiledFast() |
| 124 | + { |
| 125 | + return _compiledFast(); |
| 126 | + } |
| 127 | + |
| 128 | + [Benchmark] |
| 129 | + public bool InvokeCompiledFast_DisableInterpreter() |
| 130 | + { |
| 131 | + return _compiledFast_DisableInterpreter(); |
| 132 | + } |
| 133 | + |
| 134 | + // [Benchmark] |
| 135 | + public bool JustFunc() |
| 136 | + { |
| 137 | + return _justFunc(); |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +/* |
| 142 | +## Baseline. Does not look good. There is actually a regression I need to find and fix. |
| 143 | +
|
| 144 | +| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio | |
| 145 | +|------------- |--------- |--------- |---------:|---------:|---------:|------:|--------:|-----:|-------:|-------:|----------:|------------:| |
| 146 | +| Compiled | .NET 8.0 | .NET 8.0 | 23.51 us | 0.468 us | 0.715 us | 1.00 | 0.04 | 2 | 0.6714 | 0.6409 | 4.13 KB | 1.00 | |
| 147 | +| CompiledFast | .NET 8.0 | .NET 8.0 | 17.63 us | 0.156 us | 0.146 us | 0.75 | 0.02 | 1 | 0.1831 | 0.1526 | 1.16 KB | 0.28 | |
| 148 | +| | | | | | | | | | | | | | |
| 149 | +| Compiled | .NET 9.0 | .NET 9.0 | 21.27 us | 0.114 us | 0.106 us | 1.00 | 0.01 | 2 | 0.6714 | 0.6409 | 4.13 KB | 1.00 | |
| 150 | +| CompiledFast | .NET 9.0 | .NET 9.0 | 16.82 us | 0.199 us | 0.186 us | 0.79 | 0.01 | 1 | 0.1831 | 0.1526 | 1.16 KB | 0.28 | |
| 151 | +
|
| 152 | +
|
| 153 | +## After reverting the regression |
| 154 | +
|
| 155 | +| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio | |
| 156 | +|-------------------------- |--------- |--------- |----------:|----------:|----------:|------:|--------:|-----:|-------:|-------:|----------:|------------:| |
| 157 | +| Compiled | .NET 8.0 | .NET 8.0 | 25.093 us | 0.4979 us | 1.1034 us | 1.00 | 0.06 | 2 | 0.6714 | 0.6104 | 4.13 KB | 1.00 | |
| 158 | +| CompiledFast | .NET 8.0 | .NET 8.0 | 3.433 us | 0.0680 us | 0.0603 us | 0.14 | 0.01 | 1 | 0.1678 | 0.1526 | 1.12 KB | 0.27 | |
| 159 | +| CompiledFast_WithEvalFlag | .NET 8.0 | .NET 8.0 | 3.419 us | 0.0675 us | 0.1409 us | 0.14 | 0.01 | 1 | 0.2365 | 0.2289 | 1.48 KB | 0.36 | |
| 160 | +| | | | | | | | | | | | | | |
| 161 | +| Compiled | .NET 9.0 | .NET 9.0 | 25.491 us | 0.4667 us | 0.4137 us | 1.00 | 0.02 | 2 | 0.6714 | 0.6104 | 4.13 KB | 1.00 | |
| 162 | +| CompiledFast | .NET 9.0 | .NET 9.0 | 3.337 us | 0.0634 us | 0.0593 us | 0.13 | 0.00 | 1 | 0.1793 | 0.1755 | 1.12 KB | 0.27 | |
| 163 | +| CompiledFast_WithEvalFlag | .NET 9.0 | .NET 9.0 | 3.198 us | 0.0628 us | 0.0588 us | 0.13 | 0.00 | 1 | 0.2365 | 0.2289 | 1.48 KB | 0.36 | |
| 164 | +
|
| 165 | +
|
| 166 | +## Funny results after adding eval before compile |
| 167 | +
|
| 168 | +Job=.NET 8.0 Runtime=.NET 8.0 |
| 169 | +
|
| 170 | +| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio | |
| 171 | +|-------------------------- |------------:|----------:|----------:|------------:|-------:|--------:|-----:|-------:|-------:|----------:|------------:| |
| 172 | +| Compiled | 22,507.0 ns | 435.99 ns | 652.57 ns | 22,519.1 ns | 131.40 | 8.03 | 3 | 0.6714 | 0.6409 | 4232 B | 11.02 | |
| 173 | +| CompiledFast | 3,051.9 ns | 59.71 ns | 55.86 ns | 3,036.6 ns | 17.82 | 1.01 | 2 | 0.1755 | 0.1678 | 1143 B | 2.98 | |
| 174 | +| CompiledFast_WithEvalFlag | 171.8 ns | 3.49 ns | 9.44 ns | 167.6 ns | 1.00 | 0.08 | 1 | 0.0610 | - | 384 B | 1.00 | |
| 175 | +
|
| 176 | +
|
| 177 | +## Now we're talking (after small interpretator optimizations) |
| 178 | +
|
| 179 | +DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 |
| 180 | +
|
| 181 | +| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio | |
| 182 | +|-------------------------------- |-------------:|-----------:|-----------:|-------------:|-------:|--------:|-----:|-------:|-------:|----------:|------------:| |
| 183 | +| Compiled | 22,937.50 ns | 447.883 ns | 784.432 ns | 22,947.67 ns | 230.86 | 14.14 | 3 | 0.6714 | 0.6409 | 4232 B | 88.17 | |
| 184 | +| CompiledFast | 99.62 ns | 2.044 ns | 5.275 ns | 97.03 ns | 1.00 | 0.07 | 1 | 0.0076 | - | 48 B | 1.00 | |
| 185 | +| CompiledFast_DisableInterpreter | 3,010.37 ns | 60.174 ns | 91.893 ns | 3,010.03 ns | 30.30 | 1.80 | 2 | 0.1755 | 0.1678 | 1143 B | 23.81 | |
| 186 | +*/ |
| 187 | +[MemoryDiagnoser, RankColumn] |
| 188 | +// [SimpleJob(RuntimeMoniker.Net90)] |
| 189 | +// [SimpleJob(RuntimeMoniker.Net80)] |
| 190 | +public class Issue468_Compile_vs_FastCompile |
| 191 | +{ |
| 192 | + Expression<Func<bool>> _expr; |
| 193 | + |
| 194 | + [GlobalSetup] |
| 195 | + public void Setup() |
| 196 | + { |
| 197 | + _expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression(); |
| 198 | + } |
| 199 | + |
| 200 | + [Benchmark] |
| 201 | + public object Compiled() |
| 202 | + { |
| 203 | + return _expr.Compile(); |
| 204 | + } |
| 205 | + |
| 206 | + [Benchmark(Baseline = true)] |
| 207 | + public object CompiledFast() |
| 208 | + { |
| 209 | + return _expr.CompileFast(); |
| 210 | + } |
| 211 | + |
| 212 | + [Benchmark] |
| 213 | + public object CompiledFast_DisableInterpreter() |
| 214 | + { |
| 215 | + return _expr.CompileFast(flags: CompilerFlags.DisableInterpreter); |
| 216 | + } |
| 217 | +} |
| 218 | + |
| 219 | +[MemoryDiagnoser, RankColumn] |
| 220 | +// [SimpleJob(RuntimeMoniker.Net90)] |
| 221 | +// [SimpleJob(RuntimeMoniker.Net80)] |
| 222 | +public class Issue468_Eval_Optimization |
| 223 | +{ |
| 224 | + Expression<Func<bool>> _expr; |
| 225 | + |
| 226 | + [GlobalSetup] |
| 227 | + public void Setup() |
| 228 | + { |
| 229 | + _expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression(); |
| 230 | + } |
| 231 | + |
| 232 | + // [Benchmark(Baseline = true)] |
| 233 | + // public object Baseline() |
| 234 | + // { |
| 235 | + // return ExpressionCompiler.Interpreter.TryEvalPrimitive_OLD(out var result, _expr) ? result : null; |
| 236 | + // } |
| 237 | + |
| 238 | + [Benchmark] |
| 239 | + public object Optimized() |
| 240 | + { |
| 241 | + return ExpressionCompiler.Interpreter.TryInterpretPrimitive(out var result, _expr) ? result : null; |
| 242 | + } |
| 243 | +} |
0 commit comments