Skip to content

Commit 0bcf8f0

Browse files
authored
Simplify IR2 language (#1443)
This PR introduces several changes to the shape of the IR2 language used to compute bindings and typing, to reduce the tree depth and improve its usability. - Function types are unified under a single IR2 type `FunctionDefinition`, which now includes a `kind` discriminator. Regular functions, constructors, modifiers, and special functions are now all represented under this type in the IR2 tree. - Collapsed several non-terminal types which consist of a single content field and possibly punctuation or fixed unique keywords: parameters and returns declarations, import aliases, else branches, index access expressions, etc. - Simplified arguments for function calls to a choice type with a vector of expressions for positional parameters, or `NamedArgument` which is an identifier and the corresponding expression. - Simplified string literals, which no longer distinguish between single and double quoted values. This applies to normal strings, hex strings and unicode strings. Also, the `StringExpression` (for a string literal value) is now a choice between vectors of strings, hex strings or unicode strings and the (deprecated in newer Solidity) single value variants are represented using one element vectors. - Function and state variable attributes are flattened and categorized into separate fields: visibility, mutability, override specifiers and modifier invocations (for functions). - All optional unique terminals are now represented as a boolean to indicate the presence of the keyword. As a result, the number of IR2 node types is reduced and the tree structure is more shallow, making the passes code simpler.
1 parent 71e710f commit 0bcf8f0

File tree

33 files changed

+2654
-3644
lines changed

33 files changed

+2654
-3644
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
use crate::ir::{IrModel, IrModelMutator, ModelWithBuilder};
2+
3+
pub(super) fn build_from(cst_model: &IrModel) -> ModelWithBuilder {
4+
let mut mutator = IrModelMutator::create_from(cst_model);
5+
6+
// remove fields from sequences that contain redundant terminal nodes
7+
for (sequence_id, sequence) in &cst_model.sequences {
8+
if sequence.multiple_operators {
9+
// don't remove terminals if the sequence is modelling a precedence
10+
// expression with multiple variant operators
11+
continue;
12+
}
13+
for field in &sequence.fields {
14+
if !field.is_optional
15+
&& field.r#type.is_terminal()
16+
&& cst_model.terminals[field.r#type.as_identifier()]
17+
{
18+
mutator.remove_sequence_field(sequence_id, &field.label);
19+
}
20+
}
21+
}
22+
23+
mutator.into()
24+
}
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
use crate::ir::{IrModel, IrModelMutator, ModelWithTransformer};
2+
3+
pub(super) fn build_from(structured_ast_model: &IrModel) -> ModelWithTransformer {
4+
let mut mutator = IrModelMutator::create_from(structured_ast_model);
5+
6+
flatten_contract_specifiers(&mut mutator);
7+
unify_function_types(&mut mutator);
8+
flatten_function_attributes(&mut mutator);
9+
flatten_state_variable_attributes(&mut mutator);
10+
collapse_redundant_node_types(&mut mutator);
11+
simplify_string_literals(&mut mutator);
12+
13+
mutator.into()
14+
}
15+
16+
fn flatten_contract_specifiers(mutator: &mut IrModelMutator) {
17+
// Flatten contract specifiers and bring the inherited types and storage
18+
// layout to the contract definition itself.
19+
mutator.remove_type("ContractSpecifiers");
20+
mutator.remove_type("ContractSpecifier");
21+
mutator.collapse_sequence("InheritanceSpecifier");
22+
mutator.collapse_sequence("StorageLayoutSpecifier");
23+
mutator.add_sequence_field(
24+
"ContractDefinition",
25+
"inheritance_types",
26+
"InheritanceTypes",
27+
false,
28+
);
29+
mutator.add_sequence_field("ContractDefinition", "storage_layout", "Expression", true);
30+
}
31+
32+
fn unify_function_types(mutator: &mut IrModelMutator) {
33+
// Unifiy function definition types
34+
mutator.add_enum_type(
35+
"FunctionKind",
36+
&[
37+
"Regular",
38+
"Constructor",
39+
"Unnamed",
40+
"Fallback",
41+
"Receive",
42+
"Modifier",
43+
],
44+
);
45+
46+
// Add the kind to the FunctionDefinition type, which will now hold all kinds
47+
mutator.add_sequence_field("FunctionDefinition", "kind", "FunctionKind", false);
48+
49+
// Then remove other specific function types and related attributes
50+
mutator.remove_type("ConstructorDefinition");
51+
mutator.remove_type("ConstructorAttributes");
52+
mutator.remove_type("ConstructorAttribute");
53+
54+
mutator.remove_type("UnnamedFunctionDefinition");
55+
mutator.remove_type("UnnamedFunctionAttributes");
56+
mutator.remove_type("UnnamedFunctionAttribute");
57+
58+
mutator.remove_type("FallbackFunctionDefinition");
59+
mutator.remove_type("FallbackFunctionAttributes");
60+
mutator.remove_type("FallbackFunctionAttribute");
61+
62+
mutator.remove_type("ReceiveFunctionDefinition");
63+
mutator.remove_type("ReceiveFunctionAttributes");
64+
mutator.remove_type("ReceiveFunctionAttribute");
65+
66+
mutator.remove_type("ModifierDefinition");
67+
mutator.remove_type("ModifierAttributes");
68+
mutator.remove_type("ModifierAttribute");
69+
70+
// This also requires modifying the name and body fields
71+
mutator.remove_sequence_field("FunctionDefinition", "name");
72+
mutator.add_sequence_field("FunctionDefinition", "name", "Identifier", true);
73+
mutator.remove_sequence_field("FunctionDefinition", "body");
74+
mutator.add_sequence_field("FunctionDefinition", "body", "Block", true);
75+
76+
// We don't need FunctionName or FunctionBody anymore
77+
mutator.remove_type("FunctionName");
78+
mutator.remove_type("FunctionBody");
79+
}
80+
81+
fn flatten_function_attributes(mutator: &mut IrModelMutator) {
82+
// Function visibility, computed from a subset of the attributes
83+
mutator.add_enum_type(
84+
"FunctionVisibility",
85+
&["Public", "Private", "Internal", "External"],
86+
);
87+
88+
// Function mutability, computed from a subset of the attributes
89+
mutator.add_enum_type(
90+
"FunctionMutability",
91+
&["Pure", "View", "NonPayable", "Payable"],
92+
);
93+
94+
mutator.add_sequence_field(
95+
"FunctionDefinition",
96+
"visibility",
97+
"FunctionVisibility",
98+
false,
99+
);
100+
mutator.add_sequence_field(
101+
"FunctionDefinition",
102+
"mutability",
103+
"FunctionMutability",
104+
false,
105+
);
106+
// We use an optional unique terminal to effectively have a boolean
107+
mutator.add_sequence_field(
108+
"FunctionDefinition",
109+
"virtual_keyword",
110+
"VirtualKeyword",
111+
true,
112+
);
113+
114+
// Flatten list of override specifiers and modifier invocations
115+
mutator.add_sequence_field(
116+
"FunctionDefinition",
117+
"override_specifier",
118+
"OverridePaths",
119+
true,
120+
);
121+
mutator.add_collection_type("ModifierInvocations", "ModifierInvocation");
122+
mutator.add_sequence_field(
123+
"FunctionDefinition",
124+
"modifier_invocations",
125+
"ModifierInvocations",
126+
false,
127+
);
128+
129+
// And remove the list of attributes
130+
mutator.remove_type("FunctionAttributes");
131+
mutator.remove_type("FunctionAttribute");
132+
133+
// For `FunctionType` we need visibility and mutability
134+
mutator.add_sequence_field("FunctionType", "visibility", "FunctionVisibility", false);
135+
mutator.add_sequence_field("FunctionType", "mutability", "FunctionMutability", false);
136+
mutator.remove_type("FunctionTypeAttributes");
137+
mutator.remove_type("FunctionTypeAttribute");
138+
}
139+
140+
fn flatten_state_variable_attributes(mutator: &mut IrModelMutator) {
141+
// Function visibility, computed from a subset of the attributes
142+
mutator.add_enum_type(
143+
"StateVariableVisibility",
144+
&["Public", "Private", "Internal"],
145+
);
146+
147+
// Function mutability, computed from a subset of the attributes
148+
mutator.add_enum_type(
149+
"StateVariableMutability",
150+
&["Mutable", "Constant", "Immutable", "Transient"],
151+
);
152+
153+
mutator.add_sequence_field(
154+
"StateVariableDefinition",
155+
"visibility",
156+
"StateVariableVisibility",
157+
false,
158+
);
159+
mutator.add_sequence_field(
160+
"StateVariableDefinition",
161+
"mutability",
162+
"StateVariableMutability",
163+
false,
164+
);
165+
mutator.add_sequence_field(
166+
"StateVariableDefinition",
167+
"override_specifier",
168+
"OverridePaths",
169+
true,
170+
);
171+
172+
// And remove the list of attributes
173+
mutator.remove_type("StateVariableAttributes");
174+
mutator.remove_type("StateVariableAttribute");
175+
}
176+
177+
fn collapse_redundant_node_types(mutator: &mut IrModelMutator) {
178+
// Collapse redundant node types
179+
mutator.collapse_sequence("ParametersDeclaration");
180+
mutator.collapse_sequence("ReturnsDeclaration");
181+
mutator.collapse_sequence("YulParametersDeclaration");
182+
mutator.collapse_sequence("YulReturnsDeclaration");
183+
mutator.collapse_sequence("EventParametersDeclaration");
184+
mutator.collapse_sequence("ErrorParametersDeclaration");
185+
mutator.collapse_sequence("ImportAlias");
186+
mutator.collapse_sequence("ElseBranch");
187+
mutator.collapse_sequence("UsingAlias");
188+
mutator.collapse_sequence("StateVariableDefinitionValue");
189+
mutator.collapse_sequence("OverridePathsDeclaration");
190+
mutator.collapse_sequence("VariableDeclarationValue");
191+
mutator.collapse_sequence("NamedArgumentGroup");
192+
193+
// Collapse IndexAccessEnd manually (requires code in the transformer
194+
// implementation) because it's an optional containing an optional, and that
195+
// complicates automatic code generation in the transformer.
196+
mutator.remove_type("IndexAccessEnd");
197+
mutator.add_sequence_field("IndexAccessExpression", "end", "Expression", true);
198+
199+
// Collapse the middle node in ArgumentsDeclaration
200+
mutator.remove_type("PositionalArgumentsDeclaration");
201+
mutator.remove_type("NamedArgumentsDeclaration");
202+
mutator.add_choice_variant("ArgumentsDeclaration", "PositionalArguments");
203+
mutator.add_choice_variant("ArgumentsDeclaration", "NamedArguments");
204+
}
205+
206+
fn simplify_string_literals(mutator: &mut IrModelMutator) {
207+
// Remove all existing types, as we will simplify them to 3 variants
208+
mutator.remove_type("StringLiterals");
209+
mutator.remove_type("StringLiteral");
210+
mutator.remove_type("HexStringLiterals");
211+
mutator.remove_type("HexStringLiteral");
212+
mutator.remove_type("UnicodeStringLiterals");
213+
mutator.remove_type("UnicodeStringLiteral");
214+
215+
// Re-declare `StringLiteral`, `HexStringLiteral` and `UnicodeStringLiteral`
216+
// as non-unique terminals
217+
mutator.add_non_unique_terminal("StringLiteral");
218+
mutator.add_non_unique_terminal("HexStringLiteral");
219+
mutator.add_non_unique_terminal("UnicodeStringLiteral");
220+
221+
// Create the collection types using the double-quoted variants.
222+
// The choice is irrelevant because we only care that it's a non-unique
223+
// terminal, which is represented by an `Rc<TerminalNode>` anyway.
224+
mutator.add_collection_type("Strings", "StringLiteral");
225+
mutator.add_collection_type("HexStrings", "HexStringLiteral");
226+
mutator.add_collection_type("UnicodeStrings", "UnicodeStringLiteral");
227+
228+
// Now we add the variants to the expression type
229+
mutator.add_choice_variant("StringExpression", "Strings");
230+
mutator.add_choice_variant("StringExpression", "HexStrings");
231+
mutator.add_choice_variant("StringExpression", "UnicodeStrings");
232+
233+
// Update other uses of StringLiteral
234+
mutator.add_sequence_field("PathImport", "path", "StringLiteral", false);
235+
mutator.add_sequence_field("NamedImport", "path", "StringLiteral", false);
236+
mutator.add_sequence_field("ImportDeconstruction", "path", "StringLiteral", false);
237+
mutator.add_choice_variant("ExperimentalFeature", "StringLiteral");
238+
mutator.add_choice_variant("YulLiteral", "StringLiteral");
239+
mutator.add_choice_variant("YulLiteral", "HexStringLiteral");
240+
241+
// For `AssemblyFlags`, also remove the enclosing declaration structure
242+
mutator.remove_type("AssemblyFlagsDeclaration");
243+
mutator.add_collection_type("AssemblyFlags", "StringLiteral");
244+
mutator.add_sequence_field("AssemblyStatement", "flags", "AssemblyFlags", false);
245+
mutator.add_sequence_field("AssemblyStatement", "label", "StringLiteral", true);
246+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use std::collections::BTreeMap;
2+
3+
use language_definition::model::Language;
4+
use serde::Serialize;
5+
6+
use super::{IrModel, ModelWithBuilder, ModelWithTransformer};
7+
8+
mod ir1_structured_ast;
9+
mod ir2_flat_contracts;
10+
11+
#[derive(Serialize)]
12+
#[serde(untagged)]
13+
pub enum GenericModel {
14+
ModelWithBuilder(ModelWithBuilder),
15+
ModelWithTransformer(ModelWithTransformer),
16+
}
17+
18+
pub fn build_ir_models(language: &Language) -> BTreeMap<String, GenericModel> {
19+
let mut ir_models = BTreeMap::new();
20+
21+
// IR0: CST:
22+
let cst_model = IrModel::from_language(language);
23+
24+
// IR1: structured AST:
25+
let ir1_structured_ast_model = ir1_structured_ast::build_from(&cst_model);
26+
27+
// IR2: flat contract specifiers:
28+
let ir2_flat_contracts_model = ir2_flat_contracts::build_from(&ir1_structured_ast_model.target);
29+
30+
ir_models.insert(
31+
"ir1_structured_ast".to_string(),
32+
GenericModel::ModelWithBuilder(ir1_structured_ast_model),
33+
);
34+
ir_models.insert(
35+
"ir2_flat_contracts".to_string(),
36+
GenericModel::ModelWithTransformer(ir2_flat_contracts_model),
37+
);
38+
39+
ir_models
40+
}

0 commit comments

Comments
 (0)