Skip to content

Commit ccdad8e

Browse files
committed
found culprit
1 parent f827d3a commit ccdad8e

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

src/FastExpressionCompiler/FastExpressionCompiler.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ public static Func<T1, T2, R> CompileFast<T1, T2, R>(this Expression<Func<T1, T2
240240
#else
241241
lambdaExpr.Parameters,
242242
#endif
243-
new[] { typeof(ArrayClosure), typeof(T1), typeof(T2) },
243+
new[] { typeof(ArrayClosure), typeof(T1), typeof(T2) }, // todo: @perf rent and return the array of types to pool
244244
typeof(R), flags) ?? (ifFastFailedReturnNull ? null : lambdaExpr.CompileSys());
245245

246246
/// <summary>Compiles lambda expression to delegate. Use ifFastFailedReturnNull parameter to Not fallback to Expression.Compile, useful for testing.</summary>
@@ -514,6 +514,9 @@ internal static object TryCompileBoundToFirstClosureParam(Type delegateType, Exp
514514
closure = new DebugArrayClosure(constantsAndNestedLambdas, debugExpr);
515515
}
516516

517+
// todo: @slow this is what System.Compiles does and which makes the compilation significally slower 10x, but the invocation become faster by a single branch instruction
518+
// var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, true);
519+
// this is FEC way, significantly faster compilation, but +1 branch instruction in the invocation
517520
var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, typeof(ArrayClosure), true);
518521

519522
// todo: @perf can we just count the Expressions in the TryCollect phase and use it as N * 4 or something?
@@ -536,6 +539,7 @@ internal static object TryCompileBoundToFirstClosureParam(Type delegateType, Exp
536539
private static readonly Type[] _closureAsASingleParamType = { typeof(ArrayClosure) };
537540
private static readonly Type[][] _closureTypePlusParamTypesPool = new Type[8][]; // todo: @perf @mem could we use this for other Type arrays?
538541

542+
// todo: @perf optimize
539543
#if LIGHT_EXPRESSION
540544
private static Type[] RentOrNewClosureTypeToParamTypes(IParameterProvider paramExprs)
541545
{

test/FastExpressionCompiler.Benchmarks/Issue468_Compile_vs_FastCompile.cs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,27 @@ .NET SDK 9.0.203
5656
| InvokeCompiled | 0.2685 ns | 0.0210 ns | 0.0186 ns | 1.00 | 0.09 | 2 | - | NA |
5757
| JustFunc | 0.1711 ns | 0.0310 ns | 0.0305 ns | 0.64 | 0.12 | 1 | - | NA |
5858
59+
## HERE IS THE REASON:
60+
61+
FEC creates the DynamicMethod with `owner` param, but System compile uses the different overload without owner and internally with `transparentMethod: true`.
62+
Using this latter (System) overload drastically slows down the compilation but removes the additional branch instruction in the invocation, making a super simple delegates faster.
63+
But for the delegates doing actual/more work, having additional branch instruction is neglegible and usually does not show in the invocation performance.
64+
65+
2x slowleness: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, typeof(ArrayClosure), true);`
66+
^^^^^^^^^^^^^^^^^^^^
67+
parity: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, true);`
68+
69+
70+
Job=.NET 8.0 Runtime=.NET 8.0
71+
72+
| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio |
73+
|------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:|
74+
| InvokeCompiled | 0.5075 ns | 0.0153 ns | 0.0143 ns | 1.00 | 0.04 | 1 | 1 | - | NA |
75+
| InvokeCompiledFast | 0.5814 ns | 0.0433 ns | 0.0699 ns | 1.15 | 0.14 | 1 | 1 | - | NA |
76+
5977
*/
6078
[MemoryDiagnoser, RankColumn]
61-
// [HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchMispredictions, HardwareCounter.BranchInstructions)]
79+
[HardwareCounters(HardwareCounter.BranchInstructions)]
6280
// [SimpleJob(RuntimeMoniker.Net90)]
6381
[SimpleJob(RuntimeMoniker.Net80)]
6482
public class Issue468_InvokeCompiled_vs_InvokeCompiledFast
@@ -79,13 +97,13 @@ public bool InvokeCompiled()
7997
return _compiled();
8098
}
8199

82-
// [Benchmark]
100+
[Benchmark]
83101
public bool InvokeCompiledFast()
84102
{
85103
return _compiledFast();
86104
}
87105

88-
[Benchmark]
106+
// [Benchmark]
89107
public bool JustFunc()
90108
{
91109
return _justFunc();

0 commit comments

Comments
 (0)