Skip to content

Commit 294320b

Browse files
authored
Improved and Cross platform disassembler (#1332)
* update ClrMD, install it in main project * add native code size metric * remove the possibility to print IL to simplify the code * use decoded instruction information to handle direct and indirect calls, including jumps * always print full ASM (it's hard to tell what is prolog|epilog and native code size metric should not lie) * remove CopiedDataContracts (not needed anymore) and IDisassemblyDiagnoser (there is always a single implementation of the abstraction) * print error for older .NET Core versions * fix x86 tests * clean way of synthesizing jump labels, use ISymbolResolver * move formatting from disassembler to exporters * handle ptrace_scope values * properly handle methods which have not been jitted yet * properly handle extern methods * don't use metadata tokens to compare methods, they return the same values for different generic instantiations of the same method * handle Method Descriptors (MD_)
1 parent 3defd78 commit 294320b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1194
-1572
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ tests/output/*
4848
artifacts/*
4949
BDN.Generated
5050
BenchmarkDotNet.Samples/Properties/launchSettings.json
51-
src/BenchmarkDotNet/Disassemblers/*
51+
src/BenchmarkDotNet/Disassemblers/net461/*
5252

5353
# Visual Studio 2015 cache/options directory
5454
.vs/
Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,33 @@
11
using BenchmarkDotNet.Attributes;
2-
using BenchmarkDotNet.Environments;
3-
using BenchmarkDotNet.Jobs;
2+
using System.Linq;
43

54
namespace BenchmarkDotNet.Samples
65
{
7-
[DryJob(RuntimeMoniker.NetCoreApp21)]
8-
[DryJob(RuntimeMoniker.Mono)]
9-
[DryJob(RuntimeMoniker.Net461, Jit.LegacyJit, Platform.X86)]
106
[DisassemblyDiagnoser]
117
public class IntroDisassembly
128
{
9+
int[] field = Enumerable.Range(0, 100).ToArray();
10+
11+
[Benchmark]
12+
public int SumLocal()
13+
{
14+
var local = field; // we use local variable that points to the field
15+
16+
int sum = 0;
17+
for (int i = 0; i < local.Length; i++)
18+
sum += local[i];
19+
20+
return sum;
21+
}
22+
1323
[Benchmark]
14-
public double Sum()
24+
public int SumField()
1525
{
16-
double res = 0;
17-
for (int i = 0; i < 64; i++)
18-
res += i;
19-
return res;
26+
int sum = 0;
27+
for (int i = 0; i < field.Length; i++)
28+
sum += field[i];
29+
30+
return sum;
2031
}
2132
}
2233
}

samples/BenchmarkDotNet.Samples/IntroDisassemblyAllJits.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public MultipleJits()
2727
// RyuJit for .NET Core 2.1
2828
AddJob(Job.ShortRun.WithJit(Jit.RyuJit).WithPlatform(Platform.X64).WithRuntime(CoreRuntime.Core21));
2929

30-
AddDiagnoser(DisassemblyDiagnoser.Create(new DisassemblyDiagnoserConfig(printAsm: true, printPrologAndEpilog: true, recursiveDepth: 3, printDiff: true)));
30+
AddDiagnoser(new DisassemblyDiagnoser(new DisassemblyDiagnoserConfig(maxDepth: 3, exportDiff: true)));
3131
}
3232
}
3333

samples/BenchmarkDotNet.Samples/IntroDisassemblyDry.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public JustDisassembly()
1616
AddJob(Job.Dry.WithJit(Jit.RyuJit).WithPlatform(Platform.X64).WithRuntime(CoreRuntime.Core20));
1717
AddJob(Job.Dry.WithJit(Jit.RyuJit).WithPlatform(Platform.X64).WithRuntime(CoreRuntime.Core21));
1818

19-
AddDiagnoser(DisassemblyDiagnoser.Create(new DisassemblyDiagnoserConfig(printAsm: true, printPrologAndEpilog: true, recursiveDepth: 3)));
19+
AddDiagnoser(new DisassemblyDiagnoser(new DisassemblyDiagnoserConfig(maxDepth: 3)));
2020
}
2121
}
2222

samples/BenchmarkDotNet.Samples/IntroDisassemblyRyuJit.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
namespace BenchmarkDotNet.Samples
55
{
6-
[DisassemblyDiagnoser(printAsm: true, printSource: true)]
6+
[DisassemblyDiagnoser(printSource: true)]
77
[RyuJitX64Job]
88
public class IntroDisassemblyRyuJit
99
{

src/BenchmarkDotNet.Disassembler.x64/BenchmarkDotNet.Disassembler.x64.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
<RootNamespace>BenchmarkDotNet.Disassembler</RootNamespace>
1515
</PropertyGroup>
1616
<ItemGroup>
17-
<PackageReference Include="Microsoft.Diagnostics.Runtime" Version="0.9.180305.1" />
18-
<PackageReference Include="Mono.Cecil" Version="0.10.1" />
17+
<PackageReference Include="Iced" Version="1.4.0" />
18+
<PackageReference Include="Microsoft.Diagnostics.Runtime" Version="1.1.57604" />
1919
</ItemGroup>
2020
</Project>
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
using Iced.Intel;
2+
using Microsoft.Diagnostics.Runtime;
3+
using Microsoft.Diagnostics.Runtime.Interop;
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
8+
namespace BenchmarkDotNet.Disassemblers
9+
{
10+
internal static class ClrMdDisassembler
11+
{
12+
internal static DisassemblyResult AttachAndDisassemble(Settings settings)
13+
{
14+
using (var dataTarget = DataTarget.AttachToProcess(
15+
settings.ProcessId,
16+
(uint)TimeSpan.FromSeconds(5).TotalMilliseconds,
17+
AttachFlag.Passive))
18+
{
19+
var runtime = dataTarget.ClrVersions.Single().CreateRuntime();
20+
21+
// Per https://github.com/microsoft/clrmd/issues/303
22+
dataTarget.DataReader.Flush();
23+
24+
ConfigureSymbols(dataTarget);
25+
26+
var state = new State(runtime);
27+
28+
var typeWithBenchmark = state.Runtime.Heap.GetTypeByName(settings.TypeName);
29+
30+
state.Todo.Enqueue(
31+
new MethodInfo(
32+
// the Disassembler Entry Method is always parameterless, so check by name is enough
33+
typeWithBenchmark.Methods.Single(method => method.IsPublic && method.Name == settings.MethodName),
34+
0));
35+
36+
var disassembledMethods = Disassemble(settings, state);
37+
38+
// we don't want to export the disassembler entry point method which is just an artificial method added to get generic types working
39+
var filteredMethods = disassembledMethods.Length == 1
40+
? disassembledMethods // if there is only one method we want to return it (most probably benchmark got inlined)
41+
: disassembledMethods.Where(method => !method.Name.Contains(DisassemblerConstants.DisassemblerEntryMethodName)).ToArray();
42+
43+
return new DisassemblyResult
44+
{
45+
Methods = filteredMethods,
46+
SerializedAddressToNameMapping = state.AddressToNameMapping.Select(x => new DisassemblyResult.MutablePair { Key = x.Key, Value = x.Value }).ToArray(),
47+
PointerSize = (uint)IntPtr.Size
48+
};
49+
}
50+
}
51+
52+
private static void ConfigureSymbols(DataTarget dataTarget)
53+
{
54+
// code copied from https://github.com/Microsoft/clrmd/issues/34#issuecomment-161926535
55+
var symbols = dataTarget.DebuggerInterface as IDebugSymbols;
56+
symbols?.SetSymbolPath("http://msdl.microsoft.com/download/symbols");
57+
var control = dataTarget.DebuggerInterface as IDebugControl;
58+
control?.Execute(DEBUG_OUTCTL.NOT_LOGGED, ".reload", DEBUG_EXECUTE.NOT_LOGGED);
59+
}
60+
61+
private static DisassembledMethod[] Disassemble(Settings settings, State state)
62+
{
63+
var result = new List<DisassembledMethod>();
64+
65+
while (state.Todo.Count != 0)
66+
{
67+
var methodInfo = state.Todo.Dequeue();
68+
69+
if (!state.HandledMethods.Add(methodInfo.Method)) // add it now to avoid StackOverflow for recursive methods
70+
continue; // already handled
71+
72+
if (settings.MaxDepth >= methodInfo.Depth)
73+
result.Add(DisassembleMethod(methodInfo, state, settings));
74+
}
75+
76+
return result.ToArray();
77+
}
78+
79+
private static DisassembledMethod DisassembleMethod(MethodInfo methodInfo, State state, Settings settings)
80+
{
81+
var method = methodInfo.Method;
82+
83+
if ((method.ILOffsetMap is null || method.ILOffsetMap.Length == 0) && (method.HotColdInfo is null || method.HotColdInfo.HotStart == 0 || method.HotColdInfo.HotSize == 0))
84+
{
85+
if (method.IsPInvoke)
86+
return CreateEmpty(method, "PInvoke method");
87+
if (method.IL is null || method.IL.Length == 0)
88+
return CreateEmpty(method, "Extern method");
89+
if (method.CompilationType == MethodCompilationType.None)
90+
return CreateEmpty(method, "Method was not JITted yet.");
91+
92+
return CreateEmpty(method, $"No valid {nameof(method.ILOffsetMap)} and {nameof(method.HotColdInfo)}");
93+
}
94+
95+
var codes = new List<SourceCode>();
96+
if (settings.PrintSource && !(method.ILOffsetMap is null))
97+
{
98+
// we use HashSet to prevent from duplicates
99+
var uniqueSourceCodeLines = new HashSet<Sharp>(new SharpComparer());
100+
// for getting C# code we always use the original ILOffsetMap
101+
foreach (var map in method.ILOffsetMap.Where(map => map.StartAddress < map.EndAddress && map.ILOffset >= 0).OrderBy(map => map.StartAddress))
102+
foreach (var sharp in SourceCodeProvider.GetSource(method, map))
103+
uniqueSourceCodeLines.Add(sharp);
104+
105+
codes.AddRange(uniqueSourceCodeLines);
106+
}
107+
108+
// for getting ASM we try to use data from HotColdInfo if available (better for decoding)
109+
foreach (var map in GetCompleteNativeMap(method))
110+
codes.AddRange(Decode(map.StartAddress, (uint)(map.EndAddress - map.StartAddress), state, methodInfo.Depth, method));
111+
112+
Map[] maps = settings.PrintSource
113+
? codes.GroupBy(code => code.InstructionPointer).OrderBy(group => group.Key).Select(group => new Map() { SourceCodes = group.ToArray() }).ToArray()
114+
: new [] { new Map() { SourceCodes = codes.ToArray() } };
115+
116+
return new DisassembledMethod
117+
{
118+
Maps = maps,
119+
Name = method.GetFullSignature(),
120+
NativeCode = method.NativeCode
121+
};
122+
}
123+
124+
private static IEnumerable<Asm> Decode(ulong startAddress, uint size, State state, int depth, ClrMethod currentMethod)
125+
{
126+
byte[] code = new byte[size];
127+
if (!state.Runtime.DataTarget.ReadProcessMemory(startAddress, code, code.Length, out int bytesRead) || bytesRead == 0)
128+
yield break;
129+
130+
var reader = new ByteArrayCodeReader(code, 0, bytesRead);
131+
var decoder = Decoder.Create(state.Runtime.PointerSize * 8, reader);
132+
decoder.IP = startAddress;
133+
134+
while (reader.CanReadByte)
135+
{
136+
decoder.Decode(out var instruction);
137+
138+
TryTranslateAddressToName(instruction, state, depth, currentMethod);
139+
140+
yield return new Asm
141+
{
142+
InstructionPointer = instruction.IP,
143+
Instruction = instruction
144+
};
145+
}
146+
}
147+
148+
private static void TryTranslateAddressToName(Instruction instruction, State state, int depth, ClrMethod currentMethod)
149+
{
150+
var runtime = state.Runtime;
151+
152+
if (!TryGetReferencedAddress(instruction, (uint)runtime.PointerSize, out ulong address))
153+
return;
154+
155+
if (state.AddressToNameMapping.ContainsKey(address))
156+
return;
157+
158+
var jitHelperFunctionName = runtime.GetJitHelperFunctionName(address);
159+
if (!string.IsNullOrEmpty(jitHelperFunctionName))
160+
{
161+
state.AddressToNameMapping.Add(address, jitHelperFunctionName);
162+
return;
163+
}
164+
165+
var methodTableName = runtime.GetMethodTableName(address);
166+
if (!string.IsNullOrEmpty(methodTableName))
167+
{
168+
state.AddressToNameMapping.Add(address, $"MT_{methodTableName}");
169+
return;
170+
}
171+
172+
var methodDescriptor = runtime.GetMethodByHandle(address);
173+
if (!(methodDescriptor is null))
174+
{
175+
state.AddressToNameMapping.Add(address, $"MD_{methodDescriptor.GetFullSignature()}");
176+
return;
177+
}
178+
179+
var method = runtime.GetMethodByAddress(address);
180+
if (method is null && (address & ((uint)runtime.PointerSize - 1)) == 0)
181+
{
182+
if (runtime.ReadPointer(address, out ulong newAddress) && newAddress > ushort.MaxValue)
183+
method = runtime.GetMethodByAddress(newAddress);
184+
}
185+
186+
if (method is null)
187+
return;
188+
189+
if (method.NativeCode == currentMethod.NativeCode && method.GetFullSignature() == currentMethod.GetFullSignature())
190+
return; // in case of a call which is just a jump within the method or a recursive call
191+
192+
if (!state.HandledMethods.Contains(method))
193+
state.Todo.Enqueue(new MethodInfo(method, depth + 1));
194+
195+
var methodName = method.GetFullSignature();
196+
if (!methodName.Any(c => c == '.')) // the method name does not contain namespace and type name
197+
methodName = $"{method.Type.Name}.{method.GetFullSignature()}";
198+
state.AddressToNameMapping.Add(address, methodName);
199+
}
200+
201+
internal static bool TryGetReferencedAddress(Instruction instruction, uint pointerSize, out ulong referencedAddress)
202+
{
203+
for (int i = 0; i < instruction.OpCount; i++)
204+
{
205+
switch (instruction.GetOpKind(i))
206+
{
207+
case OpKind.NearBranch16:
208+
case OpKind.NearBranch32:
209+
case OpKind.NearBranch64:
210+
referencedAddress = instruction.NearBranchTarget;
211+
return referencedAddress > ushort.MaxValue;
212+
case OpKind.Immediate16:
213+
case OpKind.Immediate8to16:
214+
case OpKind.Immediate8to32:
215+
case OpKind.Immediate8to64:
216+
case OpKind.Immediate32to64:
217+
case OpKind.Immediate32 when pointerSize == 4:
218+
case OpKind.Immediate64:
219+
referencedAddress = instruction.GetImmediate(i);
220+
return referencedAddress > ushort.MaxValue;
221+
case OpKind.Memory64:
222+
referencedAddress = instruction.MemoryAddress64;
223+
return referencedAddress > ushort.MaxValue;
224+
case OpKind.Memory when instruction.IsIPRelativeMemoryOperand:
225+
referencedAddress = instruction.IPRelativeMemoryAddress;
226+
return referencedAddress > ushort.MaxValue;
227+
case OpKind.Memory:
228+
referencedAddress = instruction.MemoryDisplacement;
229+
return referencedAddress > ushort.MaxValue;
230+
}
231+
}
232+
233+
referencedAddress = default;
234+
return false;
235+
}
236+
237+
private static ILToNativeMap[] GetCompleteNativeMap(ClrMethod method)
238+
{
239+
// it's better to use one single map rather than few small ones
240+
// it's simply easier to get next instruction when decoding ;)
241+
var hotColdInfo = method.HotColdInfo;
242+
if (!(hotColdInfo is null) && hotColdInfo.HotSize > 0 && hotColdInfo.HotStart > 0)
243+
{
244+
return hotColdInfo.ColdSize <= 0
245+
? new[] { new ILToNativeMap() { StartAddress = hotColdInfo.HotStart, EndAddress = hotColdInfo.HotStart + hotColdInfo.HotSize, ILOffset = -1 } }
246+
: new[]
247+
{
248+
new ILToNativeMap() { StartAddress = hotColdInfo.HotStart, EndAddress = hotColdInfo.HotStart + hotColdInfo.HotSize, ILOffset = -1 },
249+
new ILToNativeMap() { StartAddress = hotColdInfo.ColdStart, EndAddress = hotColdInfo.ColdStart + hotColdInfo.ColdSize, ILOffset = -1 }
250+
};
251+
}
252+
253+
return method.ILOffsetMap
254+
.Where(map => map.StartAddress < map.EndAddress) // some maps have 0 length?
255+
.OrderBy(map => map.StartAddress) // we need to print in the machine code order, not IL! #536
256+
.ToArray();
257+
}
258+
259+
private static DisassembledMethod CreateEmpty(ClrMethod method, string reason)
260+
=> DisassembledMethod.Empty(method.GetFullSignature(), method.NativeCode, reason);
261+
262+
private class SharpComparer : IEqualityComparer<Sharp>
263+
{
264+
public bool Equals(Sharp x, Sharp y)
265+
{
266+
// sometimes some C# code lines are duplicated because the same line is the best match for multiple ILToNativeMaps
267+
// we don't want to confuse the users, so this must also be removed
268+
return x.FilePath == y.FilePath && x.LineNumber == y.LineNumber;
269+
}
270+
271+
public int GetHashCode(Sharp obj) => obj.FilePath.GetHashCode() ^ obj.LineNumber;
272+
}
273+
}
274+
}

0 commit comments

Comments
 (0)