Skip to content

Commit f210bc0

Browse files
bcardosolopesakadutta
authored andcommitted
[MLIR][LLVM] Add bytecode support for several attributes (llvm#162577)
For a total of 20 attributes, 18 debug information related + 2 regular ones (loop and alias_scope). Quick background on how this work: if a given attribute isn't supported, by default its textual form is dumped into the bytecode. In order to get proper encoding, an attribute needs a tablegen description of it and its element. There's an additional rule here: if an attribute is only used by another attribute, it's user need also to have an encoding in order for it to be encoded. (e.g. `DICompileUnitAttr` only gets encoded while in `DISubprogramAttr` if the later also has an encoded form), otherwise text is used. For this reason, this PR does a bunch at the same time, otherwise there isn't really much to test (easy to break it down if needed though). The PR is tested against some of our internal apps, successfully round-tripping around 14Gb of llvm dialect text. Some interesting findings include a 800K mlir textual file that used to become 1.2G in bytecode format - now down to 100K due to proper encoding of debug info attributes. In the future we should find a way to merge this together in the attribute definitions (perhaps autogenerate the entries from LLVM attribute descriptions), seems like we can benefit from the boilerplate. It's not clear yet how to solve some of the tablegen issues; some fields require manual translation of flag values using `LocalVar`, others require custom getters, etc. Ideas on that front are welcome. A next natural step here is to add type support, LLVM structs can also lead to non-neglible disk footprint.
1 parent 3b8ebdf commit f210bc0

File tree

9 files changed

+582
-3
lines changed

9 files changed

+582
-3
lines changed

mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ mlir_tablegen(LLVMIntrinsicFromLLVMIRConversions.inc -gen-intr-from-llvmir-conve
4848
mlir_tablegen(LLVMConvertibleLLVMIRIntrinsics.inc -gen-convertible-llvmir-intrinsics)
4949
add_mlir_dialect_tablegen_target(MLIRLLVMIntrinsicConversionsIncGen)
5050

51+
set(LLVM_TARGET_DEFINITIONS LLVMDialectBytecode.td)
52+
mlir_tablegen(LLVMDialectBytecode.cpp.inc -gen-bytecode -bytecode-dialect="LLVM")
53+
add_public_tablegen_target(MLIRLLVMDialectBytecodeIncGen)
54+
5155
set(LLVM_TARGET_DEFINITIONS BasicPtxBuilderInterface.td)
5256
mlir_tablegen(BasicPtxBuilderInterface.h.inc -gen-op-interface-decls)
5357
mlir_tablegen(BasicPtxBuilderInterface.cpp.inc -gen-op-interface-defs)
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
//===-- LLVMDialectBytecode.td - LLVM bytecode defs --------*- tablegen -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This is the LLVM bytecode reader/writer definition file.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_DIALECT_BYTECODE
14+
#define LLVM_DIALECT_BYTECODE
15+
16+
include "mlir/IR/BytecodeBase.td"
17+
18+
//===----------------------------------------------------------------------===//
19+
// Bytecode classes for attributes and types.
20+
//===----------------------------------------------------------------------===//
21+
22+
def String :
23+
WithParser <"succeeded($_reader.readString($_var))",
24+
WithBuilder<"$_args",
25+
WithPrinter<"$_writer.writeOwnedString($_getter)",
26+
WithType <"StringRef">>>>;
27+
28+
class Attr<string type> : WithType<type, Attribute>;
29+
30+
class OptionalAttribute<string type> :
31+
WithParser <"succeeded($_reader.readOptionalAttribute($_var))",
32+
WithPrinter<"$_writer.writeOptionalAttribute($_getter)",
33+
WithType<type, Attribute>>>;
34+
35+
class OptionalInt<string type> :
36+
WithParser <"succeeded(readOptionalInt($_reader, $_var))",
37+
WithPrinter<"writeOptionalInt($_writer, $_getter)",
38+
WithType<"std::optional<" # type # ">", VarInt>>>;
39+
40+
class OptionalArrayRef<string eltType> :
41+
WithParser <"succeeded(readOptionalArrayRef<"
42+
# eltType # ">($_reader, $_var))",
43+
WithPrinter<"writeOptionalArrayRef<"
44+
# eltType # ">($_writer, $_getter)",
45+
WithType<"SmallVector<"
46+
# eltType # ">", Attribute>>>;
47+
48+
class EnumClassFlag<string flag, string getter> :
49+
WithParser<"succeeded($_reader.readVarInt($_var))",
50+
WithBuilder<"(" # flag # ")$_args",
51+
WithPrinter<"$_writer.writeVarInt((uint64_t)$_name." # getter # ")",
52+
WithType<"uint64_t", VarInt>>>>;
53+
54+
//===----------------------------------------------------------------------===//
55+
// General notes
56+
// - For each attribute or type entry, the argument names should match
57+
// LLVMAttrDefs.td
58+
// - The mnemonics are either LLVM or builtin MLIR attributes and types, but
59+
// regular C++ types are also allowed to match builders and parsers.
60+
// - DIScopeAttr and DINodeAttr are empty base classes, custom encoding not
61+
// needed.
62+
//===----------------------------------------------------------------------===//
63+
64+
//===----------------------------------------------------------------------===//
65+
// DIBasicTypeAttr
66+
//===----------------------------------------------------------------------===//
67+
68+
def DIBasicTypeAttr : DialectAttribute<(attr
69+
VarInt:$tag,
70+
String:$name,
71+
VarInt:$sizeInBits,
72+
VarInt:$encoding
73+
)>;
74+
75+
//===----------------------------------------------------------------------===//
76+
// DIExpressionAttr, DIExpressionElemAttr
77+
//===----------------------------------------------------------------------===//
78+
79+
def DIExpressionElemAttr : DialectAttribute<(attr
80+
VarInt:$opcode,
81+
OptionalArrayRef<"uint64_t">:$arguments
82+
)>;
83+
84+
def DIExpressionAttr : DialectAttribute<(attr
85+
OptionalArrayRef<"DIExpressionElemAttr">:$operations
86+
)>;
87+
88+
//===----------------------------------------------------------------------===//
89+
// DIFileAttr
90+
//===----------------------------------------------------------------------===//
91+
92+
def DIFileAttr : DialectAttribute<(attr
93+
String:$name,
94+
String:$directory
95+
)>;
96+
97+
//===----------------------------------------------------------------------===//
98+
// DILocalVariableAttr
99+
//===----------------------------------------------------------------------===//
100+
101+
def DILocalVariableAttr : DialectAttribute<(attr
102+
Attr<"DIScopeAttr">:$scope,
103+
OptionalAttribute<"StringAttr">:$name,
104+
OptionalAttribute<"DIFileAttr">:$file,
105+
VarInt:$line,
106+
VarInt:$arg,
107+
VarInt:$alignInBits,
108+
OptionalAttribute<"DITypeAttr">:$type,
109+
EnumClassFlag<"DIFlags", "getFlags()">:$_rawflags,
110+
LocalVar<"DIFlags", "(DIFlags)_rawflags">:$flags
111+
)> {
112+
// DILocalVariableAttr direct getter uses a `StringRef` for `name`. Since the
113+
// more direct getter is prefered during bytecode reading, force the base one
114+
// and prevent crashes for empty `StringAttr`.
115+
let cBuilder = "$_resultType::get(context, $_args)";
116+
}
117+
118+
//===----------------------------------------------------------------------===//
119+
// DISubroutineTypeAttr
120+
//===----------------------------------------------------------------------===//
121+
122+
def DISubroutineTypeAttr : DialectAttribute<(attr
123+
VarInt:$callingConvention,
124+
OptionalArrayRef<"DITypeAttr">:$types
125+
)>;
126+
127+
//===----------------------------------------------------------------------===//
128+
// DICompileUnitAttr
129+
//===----------------------------------------------------------------------===//
130+
131+
def DICompileUnitAttr : DialectAttribute<(attr
132+
Attr<"DistinctAttr">:$id,
133+
VarInt:$sourceLanguage,
134+
Attr<"DIFileAttr">:$file,
135+
OptionalAttribute<"StringAttr">:$producer,
136+
Bool:$isOptimized,
137+
EnumClassFlag<"DIEmissionKind", "getEmissionKind()">:$_rawEmissionKind,
138+
LocalVar<"DIEmissionKind", "(DIEmissionKind)_rawEmissionKind">:$emissionKind,
139+
EnumClassFlag<"DINameTableKind", "getNameTableKind()">:$_rawNameTableKind,
140+
LocalVar<"DINameTableKind",
141+
"(DINameTableKind)_rawNameTableKind">:$nameTableKind
142+
)>;
143+
144+
//===----------------------------------------------------------------------===//
145+
// DISubprogramAttr
146+
//===----------------------------------------------------------------------===//
147+
148+
def DISubprogramAttr : DialectAttribute<(attr
149+
OptionalAttribute<"DistinctAttr">:$recId,
150+
Bool:$isRecSelf,
151+
OptionalAttribute<"DistinctAttr">:$id,
152+
OptionalAttribute<"DICompileUnitAttr">:$compileUnit,
153+
OptionalAttribute<"DIScopeAttr">:$scope,
154+
OptionalAttribute<"StringAttr">:$name,
155+
OptionalAttribute<"StringAttr">:$linkageName,
156+
OptionalAttribute<"DIFileAttr">:$file,
157+
VarInt:$line,
158+
VarInt:$scopeLine,
159+
EnumClassFlag<"DISubprogramFlags", "getSubprogramFlags()">:$_rawflags,
160+
LocalVar<"DISubprogramFlags", "(DISubprogramFlags)_rawflags">:$subprogramFlags,
161+
OptionalAttribute<"DISubroutineTypeAttr">:$type,
162+
OptionalArrayRef<"DINodeAttr">:$retainedNodes,
163+
OptionalArrayRef<"DINodeAttr">:$annotations
164+
)>;
165+
166+
//===----------------------------------------------------------------------===//
167+
// DICompositeTypeAttr
168+
//===----------------------------------------------------------------------===//
169+
170+
def DICompositeTypeAttr : DialectAttribute<(attr
171+
OptionalAttribute<"DistinctAttr">:$recId,
172+
Bool:$isRecSelf,
173+
VarInt:$tag,
174+
OptionalAttribute<"StringAttr">:$name,
175+
OptionalAttribute<"DIFileAttr">:$file,
176+
VarInt:$line,
177+
OptionalAttribute<"DIScopeAttr">:$scope,
178+
OptionalAttribute<"DITypeAttr">:$baseType,
179+
EnumClassFlag<"DIFlags", "getFlags()">:$_rawflags,
180+
LocalVar<"DIFlags", "(DIFlags)_rawflags">:$flags,
181+
VarInt:$sizeInBits,
182+
VarInt:$alignInBits,
183+
OptionalAttribute<"DIExpressionAttr">:$dataLocation,
184+
OptionalAttribute<"DIExpressionAttr">:$rank,
185+
OptionalAttribute<"DIExpressionAttr">:$allocated,
186+
OptionalAttribute<"DIExpressionAttr">:$associated,
187+
OptionalArrayRef<"DINodeAttr">:$elements
188+
)>;
189+
190+
//===----------------------------------------------------------------------===//
191+
// DIDerivedTypeAttr
192+
//===----------------------------------------------------------------------===//
193+
194+
def DIDerivedTypeAttr : DialectAttribute<(attr
195+
VarInt:$tag,
196+
OptionalAttribute<"StringAttr">:$name,
197+
OptionalAttribute<"DITypeAttr">:$baseType,
198+
VarInt:$sizeInBits,
199+
VarInt:$alignInBits,
200+
VarInt:$offsetInBits,
201+
OptionalInt<"unsigned">:$dwarfAddressSpace,
202+
OptionalAttribute<"DINodeAttr">:$extraData
203+
)>;
204+
205+
//===----------------------------------------------------------------------===//
206+
// DIImportedEntityAttr
207+
//===----------------------------------------------------------------------===//
208+
209+
def DIImportedEntityAttr : DialectAttribute<(attr
210+
VarInt:$tag,
211+
Attr<"DIScopeAttr">:$scope,
212+
Attr<"DINodeAttr">:$entity,
213+
OptionalAttribute<"DIFileAttr">:$file,
214+
VarInt:$line,
215+
OptionalAttribute<"StringAttr">:$name,
216+
OptionalArrayRef<"DINodeAttr">:$elements
217+
)>;
218+
219+
//===----------------------------------------------------------------------===//
220+
// DIGlobalVariableAttr, DIGlobalVariableExpressionAttr
221+
//===----------------------------------------------------------------------===//
222+
223+
def DIGlobalVariableAttr : DialectAttribute<(attr
224+
OptionalAttribute<"DIScopeAttr">:$scope,
225+
OptionalAttribute<"StringAttr">:$name,
226+
OptionalAttribute<"StringAttr">:$linkageName,
227+
Attr<"DIFileAttr">:$file,
228+
VarInt:$line,
229+
Attr<"DITypeAttr">:$type,
230+
Bool:$isLocalToUnit,
231+
Bool:$isDefined,
232+
VarInt:$alignInBits
233+
)>;
234+
235+
def DIGlobalVariableExpressionAttr : DialectAttribute<(attr
236+
Attr<"DIGlobalVariableAttr">:$var,
237+
OptionalAttribute<"DIExpressionAttr">:$expr
238+
)>;
239+
240+
//===----------------------------------------------------------------------===//
241+
// DILabelAttr
242+
//===----------------------------------------------------------------------===//
243+
244+
def DILabelAttr : DialectAttribute<(attr
245+
Attr<"DIScopeAttr">:$scope,
246+
OptionalAttribute<"StringAttr">:$name,
247+
OptionalAttribute<"DIFileAttr">:$file,
248+
VarInt:$line
249+
)> {
250+
// DILabelAttr direct getter uses a `StringRef` for `name`. Since the
251+
// more direct getter is prefered during bytecode reading, force the base one
252+
// and prevent crashes for empty `StringAttr`.
253+
let cBuilder = "$_resultType::get(context, $_args)";
254+
}
255+
256+
//===----------------------------------------------------------------------===//
257+
// DILexicalBlockAttr, DILexicalBlockFileAttr
258+
//===----------------------------------------------------------------------===//
259+
260+
def DILexicalBlockAttr : DialectAttribute<(attr
261+
Attr<"DIScopeAttr">:$scope,
262+
OptionalAttribute<"DIFileAttr">:$file,
263+
VarInt:$line,
264+
VarInt:$column
265+
)>;
266+
267+
def DILexicalBlockFileAttr : DialectAttribute<(attr
268+
Attr<"DIScopeAttr">:$scope,
269+
OptionalAttribute<"DIFileAttr">:$file,
270+
VarInt:$discriminator
271+
)>;
272+
273+
//===----------------------------------------------------------------------===//
274+
// DINamespaceAttr
275+
//===----------------------------------------------------------------------===//
276+
277+
def DINamespaceAttr : DialectAttribute<(attr
278+
OptionalAttribute<"StringAttr">:$name,
279+
OptionalAttribute<"DIScopeAttr">:$scope,
280+
Bool:$exportSymbols
281+
)>;
282+
283+
//===----------------------------------------------------------------------===//
284+
// DISubrangeAttr
285+
//===----------------------------------------------------------------------===//
286+
287+
def DISubrangeAttr : DialectAttribute<(attr
288+
OptionalAttribute<"Attribute">:$count,
289+
OptionalAttribute<"Attribute">:$lowerBound,
290+
OptionalAttribute<"Attribute">:$upperBound,
291+
OptionalAttribute<"Attribute">:$stride
292+
)>;
293+
294+
//===----------------------------------------------------------------------===//
295+
// LoopAnnotationAttr
296+
//===----------------------------------------------------------------------===//
297+
298+
def LoopAnnotationAttr : DialectAttribute<(attr
299+
OptionalAttribute<"BoolAttr">:$disableNonforced,
300+
OptionalAttribute<"LoopVectorizeAttr">:$vectorize,
301+
OptionalAttribute<"LoopInterleaveAttr">:$interleave,
302+
OptionalAttribute<"LoopUnrollAttr">:$unroll,
303+
OptionalAttribute<"LoopUnrollAndJamAttr">:$unrollAndJam,
304+
OptionalAttribute<"LoopLICMAttr">:$licm,
305+
OptionalAttribute<"LoopDistributeAttr">:$distribute,
306+
OptionalAttribute<"LoopPipelineAttr">:$pipeline,
307+
OptionalAttribute<"LoopPeeledAttr">:$peeled,
308+
OptionalAttribute<"LoopUnswitchAttr">:$unswitch,
309+
OptionalAttribute<"BoolAttr">:$mustProgress,
310+
OptionalAttribute<"BoolAttr">:$isVectorized,
311+
OptionalAttribute<"FusedLoc">:$startLoc,
312+
OptionalAttribute<"FusedLoc">:$endLoc,
313+
OptionalArrayRef<"AccessGroupAttr">:$parallelAccesses
314+
)>;
315+
316+
//===----------------------------------------------------------------------===//
317+
// Attributes & Types with custom bytecode handling.
318+
//===----------------------------------------------------------------------===//
319+
320+
// All the attributes with custom bytecode handling.
321+
def LLVMDialectAttributes : DialectAttributes<"LLVM"> {
322+
let elems = [
323+
DIBasicTypeAttr,
324+
DICompileUnitAttr,
325+
DICompositeTypeAttr,
326+
DIDerivedTypeAttr,
327+
DIExpressionElemAttr,
328+
DIExpressionAttr,
329+
DIFileAttr,
330+
DIGlobalVariableAttr,
331+
DIGlobalVariableExpressionAttr,
332+
DIImportedEntityAttr,
333+
DILabelAttr,
334+
DILexicalBlockAttr,
335+
DILexicalBlockFileAttr,
336+
DILocalVariableAttr,
337+
DINamespaceAttr,
338+
DISubprogramAttr,
339+
DISubrangeAttr,
340+
DISubroutineTypeAttr,
341+
LoopAnnotationAttr
342+
// Referenced attributes currently missing support:
343+
// AccessGroupAttr, LoopVectorizeAttr, LoopInterleaveAttr, LoopUnrollAttr,
344+
// LoopUnrollAndJamAttr, LoopLICMAttr, LoopDistributeAttr, LoopPipelineAttr,
345+
// LoopPeeledAttr, LoopUnswitchAttr
346+
];
347+
}
348+
349+
def LLVMDialectTypes : DialectTypes<"LLVM"> {
350+
let elems = [];
351+
}
352+
353+
#endif // LLVM_DIALECT_BYTECODE

mlir/lib/Dialect/LLVMIR/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ add_mlir_dialect_library(MLIRLLVMDialect
88
IR/LLVMMemorySlot.cpp
99
IR/LLVMTypes.cpp
1010
IR/LLVMTypeSyntax.cpp
11+
IR/LLVMDialectBytecode.cpp
1112

1213
ADDITIONAL_HEADER_DIRS
1314
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
1415

1516
DEPENDS
17+
MLIRLLVMDialectBytecodeIncGen
1618
MLIRLLVMOpsIncGen
1719
MLIRLLVMTypesIncGen
1820
MLIRLLVMIntrinsicOpsIncGen

mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "llvm/IR/DataLayout.h"
3030
#include "llvm/Support/Error.h"
3131

32+
#include "LLVMDialectBytecode.h"
33+
3234
#include <numeric>
3335
#include <optional>
3436

@@ -4237,6 +4239,7 @@ void LLVMDialect::initialize() {
42374239
// Support unknown operations because not all LLVM operations are registered.
42384240
allowUnknownOperations();
42394241
declarePromisedInterface<DialectInlinerInterface, LLVMDialect>();
4242+
detail::addBytecodeInterface(this);
42404243
}
42414244

42424245
#define GET_OP_CLASSES

0 commit comments

Comments
 (0)