Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions src/FastExpressionCompiler.LightExpression/Expression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,6 @@ public static ConstantExpression ConstantNull(Type type = null) =>
public static ConstantExpression ConstantOf<T>(T value) =>
value == null ? ConstantNull<T>() : new ValueConstantExpression<T>(value);

[MethodImpl((MethodImplOptions)256)]
public static int TryGetIntConstantValue(Expression e) => ((IntConstantExpression)e).IntValue;

[RequiresUnreferencedCode(Trimming.Message)]
public static NewExpression New(Type type)
{
Expand Down Expand Up @@ -3914,9 +3911,8 @@ public sealed class TypedValueConstantExpression : ConstantExpression
public sealed class IntConstantExpression : ConstantExpression
{
public override Type Type => typeof(int);
public override object Value => IntValue;
public readonly int IntValue;
internal IntConstantExpression(int value) => IntValue = value;
public override object Value { get; }
internal IntConstantExpression(int value) => Value = value;
}

public class NewExpression : Expression, IArgumentProvider
Expand Down
463 changes: 444 additions & 19 deletions src/FastExpressionCompiler/FastExpressionCompiler.cs

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/FastExpressionCompiler/TestTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public static class TestTools
public static bool AllowPrintIL = false;
public static bool AllowPrintCS = false;
public static bool AllowPrintExpression = false;
public static bool DisableAssertOpCodes = false;

static TestTools()
{
Expand All @@ -43,6 +44,8 @@ public static void AssertOpCodes(this Delegate @delegate, params OpCode[] expect

public static void AssertOpCodes(this MethodInfo method, params OpCode[] expectedCodes)
{
if (DisableAssertOpCodes) return;

var ilReader = ILReaderFactory.Create(method);
if (ilReader is null)
{
Expand Down Expand Up @@ -952,6 +955,8 @@ public sealed class TestRun
public SmallList<TestStats> Stats;
public SmallList<TestFailure> Failures;

// todo: @wip put the output under the feature flag
/// <summary>Will output the failures while running</summary>
public void Run<T>(T test, TestTracking tracking = TestTracking.TrackFailedTestsOnly) where T : ITestX
{
var totalTestCount = TotalTestCount;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>$(LatestSupportedNet)</TargetFrameworks>
<TargetFrameworks>$(LatestSupportedNet);net8.0</TargetFrameworks>

<OutputType>Exe</OutputType>
<IsTestProject>false</IsTestProject>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
using System;
using System.Linq.Expressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Jobs;

namespace FastExpressionCompiler.Benchmarks;

/*
## Base line with the static method, it seems to be a wrong idea for the improvement, because the closure-bound method is faster as I did discovered a long ago.

BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.3775)
Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores
.NET SDK 9.0.203
[Host] : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2
.NET 8.0 : .NET 8.0.15 (8.0.1525.16413), X64 RyuJIT AVX2
.NET 9.0 : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2


| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | CacheMisses/Op | BranchMispredictions/Op | Allocated | Alloc Ratio |
|------------------- |--------- |--------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|---------------:|------------------------:|----------:|------------:|
| InvokeCompiled | .NET 8.0 | .NET 8.0 | 0.4365 ns | 0.0246 ns | 0.0192 ns | 1.00 | 0.06 | 1 | 1 | -0 | -0 | - | NA |
| InvokeCompiledFast | .NET 8.0 | .NET 8.0 | 1.0837 ns | 0.0557 ns | 0.0991 ns | 2.49 | 0.25 | 2 | 2 | 0 | 0 | - | NA |
| | | | | | | | | | | | | | |
| InvokeCompiled | .NET 9.0 | .NET 9.0 | 0.5547 ns | 0.0447 ns | 0.0871 ns | 1.02 | 0.22 | 1 | 1 | -0 | -0 | - | NA |
| InvokeCompiledFast | .NET 9.0 | .NET 9.0 | 1.1920 ns | 0.0508 ns | 0.0450 ns | 2.20 | 0.34 | 2 | 2 | 0 | -0 | - | NA |


## Sealing the closure type does not help

| Method | Job | Runtime | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio |
|------------------- |--------- |--------- |----------:|----------:|----------:|----------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:|
| InvokeCompiledFast | .NET 8.0 | .NET 8.0 | 1.0066 ns | 0.0209 ns | 0.0233 ns | 0.9973 ns | 1.00 | 0.03 | 2 | 2 | 0 | 0 | - | NA |
| InvokeCompiled | .NET 8.0 | .NET 8.0 | 0.5040 ns | 0.0217 ns | 0.0169 ns | 0.5016 ns | 0.50 | 0.02 | 1 | 1 | -0 | -0 | - | NA |
| | | | | | | | | | | | | | | |
| InvokeCompiledFast | .NET 9.0 | .NET 9.0 | 1.0640 ns | 0.0539 ns | 0.0929 ns | 1.0106 ns | 1.01 | 0.12 | 2 | 2 | 0 | 0 | - | NA |
| InvokeCompiled | .NET 9.0 | .NET 9.0 | 0.5897 ns | 0.0451 ns | 0.0858 ns | 0.6156 ns | 0.56 | 0.09 | 1 | 1 | -0 | -0 | - | NA |


## Steel the same speed with the minimal IL of 2 instructions

Job=.NET 8.0 Runtime=.NET 8.0

| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | Allocated | Alloc Ratio |
|------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------:|------------:|
| InvokeCompiled | 0.4647 ns | 0.0321 ns | 0.0268 ns | 1.00 | 0.08 | 1 | - | NA |
| InvokeCompiledFast | 0.9739 ns | 0.0433 ns | 0.0481 ns | 2.10 | 0.15 | 2 | - | NA |


## But the Func speed is faster, hmm

Job=.NET 8.0 Runtime=.NET 8.0

| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | Allocated | Alloc Ratio |
|--------------- |----------:|----------:|----------:|------:|--------:|-----:|----------:|------------:|
| InvokeCompiled | 0.2685 ns | 0.0210 ns | 0.0186 ns | 1.00 | 0.09 | 2 | - | NA |
| JustFunc | 0.1711 ns | 0.0310 ns | 0.0305 ns | 0.64 | 0.12 | 1 | - | NA |


## HERE IS THE REASON:

FEC creates the DynamicMethod with `owner` param, but System compile uses the different overload without owner and internally with `transparentMethod: true`.
Using this latter (System) overload drastically slows down the compilation but removes the additional branch instruction in the invocation, making a super simple delegates faster.
But for the delegates doing actual/more work, having additional branch instruction is negligible and usually does not show in the invocation performance.

2x slow: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, typeof(ArrayClosure), true);`
^^^^^^^^^^^^^^^^^^^^
parity: `var method = new DynamicMethod(string.Empty, returnType, closurePlusParamTypes, true);`

Job=.NET 8.0 Runtime=.NET 8.0

| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio |
|------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:|
| InvokeCompiled | 0.5075 ns | 0.0153 ns | 0.0143 ns | 1.00 | 0.04 | 1 | 1 | - | NA |
| InvokeCompiledFast | 0.5814 ns | 0.0433 ns | 0.0699 ns | 1.15 | 0.14 | 1 | 1 | - | NA |


## Not with full eval before Compile the results are funny in the good way

Job=.NET 8.0 Runtime=.NET 8.0

| Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio |
|------------------------------- |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:|
| InvokeCompiled | 0.5071 ns | 0.0289 ns | 0.0242 ns | 1.00 | 0.06 | 2 | 1 | - | NA |
| InvokeCompiledFastWithEvalFlag | 0.0804 ns | 0.0341 ns | 0.0351 ns | 0.16 | 0.07 | 1 | 1 | - | NA |


## Fastest so far

DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2

| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | Allocated | Alloc Ratio |
|-------------------------------------- |----------:|----------:|----------:|----------:|------:|--------:|-----:|----------------------:|----------:|------------:|
| InvokeCompiled | 0.5088 ns | 0.0399 ns | 0.0842 ns | 0.4707 ns | 1.02 | 0.22 | 2 | 1 | - | NA |
| InvokeCompiledFast | 0.1105 ns | 0.0360 ns | 0.0799 ns | 0.0689 ns | 0.22 | 0.16 | 1 | 1 | - | NA |
| InvokeCompiledFast_DisableInterpreter | 1.0607 ns | 0.0540 ns | 0.0887 ns | 1.0301 ns | 2.13 | 0.34 | 3 | 2 | - | NA |

*/
[MemoryDiagnoser, RankColumn]
[HardwareCounters(HardwareCounter.BranchInstructions)]
// [SimpleJob(RuntimeMoniker.Net90)]
// [SimpleJob(RuntimeMoniker.Net80)]
public class Issue468_InvokeCompiled_vs_InvokeCompiledFast
{
Func<bool> _compiled, _compiledFast, _compiledFast_DisableInterpreter, _justFunc = static () => true;

[GlobalSetup]
public void Setup()
{
var expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression();
_compiled = expr.CompileSys();
_compiledFast = expr.CompileFast();
_compiledFast_DisableInterpreter = expr.CompileFast(flags: CompilerFlags.DisableInterpreter);
}

[Benchmark(Baseline = true)]
public bool InvokeCompiled()
{
return _compiled();
}

[Benchmark]
public bool InvokeCompiledFast()
{
return _compiledFast();
}

[Benchmark]
public bool InvokeCompiledFast_DisableInterpreter()
{
return _compiledFast_DisableInterpreter();
}

// [Benchmark]
public bool JustFunc()
{
return _justFunc();
}
}

/*
## Baseline. Does not look good. There is actually a regression I need to find and fix.

| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio |
|------------- |--------- |--------- |---------:|---------:|---------:|------:|--------:|-----:|-------:|-------:|----------:|------------:|
| Compiled | .NET 8.0 | .NET 8.0 | 23.51 us | 0.468 us | 0.715 us | 1.00 | 0.04 | 2 | 0.6714 | 0.6409 | 4.13 KB | 1.00 |
| CompiledFast | .NET 8.0 | .NET 8.0 | 17.63 us | 0.156 us | 0.146 us | 0.75 | 0.02 | 1 | 0.1831 | 0.1526 | 1.16 KB | 0.28 |
| | | | | | | | | | | | | |
| Compiled | .NET 9.0 | .NET 9.0 | 21.27 us | 0.114 us | 0.106 us | 1.00 | 0.01 | 2 | 0.6714 | 0.6409 | 4.13 KB | 1.00 |
| CompiledFast | .NET 9.0 | .NET 9.0 | 16.82 us | 0.199 us | 0.186 us | 0.79 | 0.01 | 1 | 0.1831 | 0.1526 | 1.16 KB | 0.28 |


## After reverting the regression

| Method | Job | Runtime | Mean | Error | StdDev | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio |
|-------------------------- |--------- |--------- |----------:|----------:|----------:|------:|--------:|-----:|-------:|-------:|----------:|------------:|
| Compiled | .NET 8.0 | .NET 8.0 | 25.093 us | 0.4979 us | 1.1034 us | 1.00 | 0.06 | 2 | 0.6714 | 0.6104 | 4.13 KB | 1.00 |
| CompiledFast | .NET 8.0 | .NET 8.0 | 3.433 us | 0.0680 us | 0.0603 us | 0.14 | 0.01 | 1 | 0.1678 | 0.1526 | 1.12 KB | 0.27 |
| CompiledFast_WithEvalFlag | .NET 8.0 | .NET 8.0 | 3.419 us | 0.0675 us | 0.1409 us | 0.14 | 0.01 | 1 | 0.2365 | 0.2289 | 1.48 KB | 0.36 |
| | | | | | | | | | | | | |
| Compiled | .NET 9.0 | .NET 9.0 | 25.491 us | 0.4667 us | 0.4137 us | 1.00 | 0.02 | 2 | 0.6714 | 0.6104 | 4.13 KB | 1.00 |
| CompiledFast | .NET 9.0 | .NET 9.0 | 3.337 us | 0.0634 us | 0.0593 us | 0.13 | 0.00 | 1 | 0.1793 | 0.1755 | 1.12 KB | 0.27 |
| CompiledFast_WithEvalFlag | .NET 9.0 | .NET 9.0 | 3.198 us | 0.0628 us | 0.0588 us | 0.13 | 0.00 | 1 | 0.2365 | 0.2289 | 1.48 KB | 0.36 |


## Funny results after adding eval before compile

Job=.NET 8.0 Runtime=.NET 8.0

| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio |
|-------------------------- |------------:|----------:|----------:|------------:|-------:|--------:|-----:|-------:|-------:|----------:|------------:|
| Compiled | 22,507.0 ns | 435.99 ns | 652.57 ns | 22,519.1 ns | 131.40 | 8.03 | 3 | 0.6714 | 0.6409 | 4232 B | 11.02 |
| CompiledFast | 3,051.9 ns | 59.71 ns | 55.86 ns | 3,036.6 ns | 17.82 | 1.01 | 2 | 0.1755 | 0.1678 | 1143 B | 2.98 |
| CompiledFast_WithEvalFlag | 171.8 ns | 3.49 ns | 9.44 ns | 167.6 ns | 1.00 | 0.08 | 1 | 0.0610 | - | 384 B | 1.00 |


## Now we're talking (after small interpretator optimizations)

DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2

| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | Gen0 | Gen1 | Allocated | Alloc Ratio |
|-------------------------------- |-------------:|-----------:|-----------:|-------------:|-------:|--------:|-----:|-------:|-------:|----------:|------------:|
| Compiled | 22,937.50 ns | 447.883 ns | 784.432 ns | 22,947.67 ns | 230.86 | 14.14 | 3 | 0.6714 | 0.6409 | 4232 B | 88.17 |
| CompiledFast | 99.62 ns | 2.044 ns | 5.275 ns | 97.03 ns | 1.00 | 0.07 | 1 | 0.0076 | - | 48 B | 1.00 |
| CompiledFast_DisableInterpreter | 3,010.37 ns | 60.174 ns | 91.893 ns | 3,010.03 ns | 30.30 | 1.80 | 2 | 0.1755 | 0.1678 | 1143 B | 23.81 |
*/
[MemoryDiagnoser, RankColumn]
// [SimpleJob(RuntimeMoniker.Net90)]
// [SimpleJob(RuntimeMoniker.Net80)]
public class Issue468_Compile_vs_FastCompile
{
Expression<Func<bool>> _expr;

[GlobalSetup]
public void Setup()
{
_expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression();
}

[Benchmark]
public object Compiled()
{
return _expr.Compile();
}

[Benchmark(Baseline = true)]
public object CompiledFast()
{
return _expr.CompileFast();
}

[Benchmark]
public object CompiledFast_DisableInterpreter()
{
return _expr.CompileFast(flags: CompilerFlags.DisableInterpreter);
}
}

[MemoryDiagnoser, RankColumn]
// [SimpleJob(RuntimeMoniker.Net90)]
// [SimpleJob(RuntimeMoniker.Net80)]
public class Issue468_Eval_Optimization
{
Expression<Func<bool>> _expr;

[GlobalSetup]
public void Setup()
{
_expr = IssueTests.Issue468_Optimize_the_delegate_access_to_the_Closure_object_for_the_modern_NET.CreateExpression();
}

// [Benchmark(Baseline = true)]
// public object Baseline()
// {
// return ExpressionCompiler.Interpreter.TryEvalPrimitive_OLD(out var result, _expr) ? result : null;
// }

[Benchmark]
public object Optimized()
{
return ExpressionCompiler.Interpreter.TryInterpretPrimitive(out var result, _expr) ? result : null;
}
}
7 changes: 5 additions & 2 deletions test/FastExpressionCompiler.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ public static void Main()
// BenchmarkRunner.Run<ManuallyComposedLambdaBenchmark.Create>(); // not included in README.md, may be it needs to
// BenchmarkRunner.Run<ManuallyComposedLambdaBenchmark.Create_and_Compile>(); // not included in README.md, may be it needs to

BenchmarkRunner.Run<LightExprVsExpr_Create_ComplexExpr>();
BenchmarkRunner.Run<LightExprVsExpr_CreateAndCompile_ComplexExpr>();
// BenchmarkRunner.Run<LightExprVsExpr_Create_ComplexExpr>();
// BenchmarkRunner.Run<LightExprVsExpr_CreateAndCompile_ComplexExpr>();

//--------------------------------------------

// BenchmarkRunner.Run<Issue468_Compile_vs_FastCompile>();
BenchmarkRunner.Run<Issue468_InvokeCompiled_vs_InvokeCompiledFast>();
// BenchmarkRunner.Run<Issue468_Eval_Optimization>();

// BenchmarkRunner.Run<AccessByRef_vs_ByIGetRefStructImpl>();

Expand Down
Loading