Skip to content

Commit e495eb2

Browse files
authored
split language sections into LexicalContexts (#1520)
First of two PRs, splitting the langauge sections into different contexts, each with their own `Identifier`: - Solidity: main context - Pragma: since lexing version specifiers conflict with number literals in Solidity/Yul - Yul: since lexing identifiers conflicts with Solidity's own identifiers, as they can contain dots. The next PR will add validation to actually enforce boundaries between them, and make sure each context nonterminals can only reference terminals in the same context.
1 parent 24ecf61 commit e495eb2

File tree

49 files changed

+6284
-6288
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+6284
-6288
lines changed

crates/codegen-v2/parser/src/lexer/builder.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ impl LexerModelBuilder {
4949
// For now, we just collect trivia and duplicate them into all contexts:
5050
let mut common_trivia = Vec::<Lexeme>::new();
5151

52-
for topic in language.topics() {
53-
let context_name = topic.lexical_context.to_string();
54-
let context = non_trivia.entry(context_name).or_default();
52+
for context in &language.contexts {
53+
let context_name = context.name.to_string();
54+
let entry = non_trivia.entry(context_name).or_default();
5555

56-
for item in &topic.items {
56+
for item in context.items() {
5757
match item {
5858
Item::Struct { .. } => {}
5959
Item::Enum { .. } => {}
@@ -62,8 +62,8 @@ impl LexerModelBuilder {
6262
Item::Precedence { .. } => {}
6363

6464
Item::Trivia { item } => common_trivia.push(self.convert_trivia(item)),
65-
Item::Keyword { item } => context.extend(self.convert_keyword(item)),
66-
Item::Token { item } => context.extend(self.convert_token(item)),
65+
Item::Keyword { item } => entry.extend(self.convert_keyword(item)),
66+
Item::Token { item } => entry.extend(self.convert_token(item)),
6767

6868
Item::Fragment { .. } => {}
6969
}

crates/language-v2/definition/src/compiler/analysis/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ impl Analysis {
6666

6767
impl SpannedLanguage {
6868
fn items(&self) -> impl Iterator<Item = &SpannedItem> {
69-
self.sections
69+
self.contexts
7070
.iter()
71+
.flat_map(|context| &context.sections)
7172
.flat_map(|section| &section.topics)
7273
.flat_map(|topic| &topic.items)
7374
}

crates/language-v2/definition/src/compiler/analysis/p2_version_specifiers/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ fn check_fields(analysis: &mut Analysis, fields: &IndexMap<Spanned<Identifier>,
168168
fn check_keyword(analysis: &mut Analysis, item: &SpannedKeywordItem) {
169169
let SpannedKeywordItem {
170170
name: _,
171-
identifier: _,
172171
enabled,
173172
definitions,
174173
} = item;

crates/language-v2/definition/src/compiler/analysis/p3_references/mod.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -296,15 +296,12 @@ fn check_trivia(analysis: &mut Analysis, item: &SpannedTriviaItem, enablement: &
296296

297297
fn check_keyword(analysis: &mut Analysis, item: &SpannedKeywordItem, enablement: &VersionSet) {
298298
let SpannedKeywordItem {
299-
name,
300-
identifier,
299+
name: _,
301300
enabled,
302301
definitions: _,
303302
} = item;
304303

305-
let enablement = update_enablement(analysis, enablement, enabled.as_ref());
306-
307-
check_reference(analysis, Some(name), identifier, &enablement, &[Token]);
304+
let _ = update_enablement(analysis, enablement, enabled.as_ref());
308305
}
309306

310307
fn check_token(analysis: &mut Analysis, item: &SpannedTokenItem, enablement: &VersionSet) {

crates/language-v2/definition/src/model/manifest.rs

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,17 @@ pub struct Language {
2727
/// The supported versions of the language
2828
pub versions: IndexSet<Version>,
2929

30-
/// The sections of the language
31-
pub sections: Vec<Section>,
30+
/// The lexical contexts of the language, splitting grammar elements based on which lexer can recognize their terminals.
31+
pub contexts: Vec<LexicalContext>,
3232

3333
/// The built-in contexts
3434
pub built_ins: Vec<BuiltInContext>,
3535
}
3636

3737
impl Language {
38-
/// Returns every topic in the language definition (across all sections).
39-
pub fn topics(&self) -> impl Iterator<Item = &Topic> {
40-
self.sections.iter().flat_map(|section| &section.topics)
41-
}
42-
43-
/// Returns every item in the language definition (across all sections and topics).
38+
/// Returns every item in the language definition (across all contexts).
4439
pub fn items(&self) -> impl Iterator<Item = &Item> {
45-
self.topics().flat_map(|topic| &topic.items)
40+
self.contexts.iter().flat_map(|context| context.items())
4641
}
4742

4843
/// Collects all versions that change the language in a breaking way.
@@ -168,6 +163,21 @@ impl Language {
168163
}
169164

170165
/// A section is a named container for topics, used for organizing the large grammar definition in user documentation.
166+
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
167+
#[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)]
168+
pub struct LexicalContext {
169+
pub name: Identifier,
170+
pub identifier_token: Option<Identifier>,
171+
pub sections: Vec<Section>,
172+
}
173+
174+
impl LexicalContext {
175+
/// Returns every item in that context (across all sections).
176+
pub fn items(&self) -> impl Iterator<Item = &Item> {
177+
self.sections.iter().flat_map(|section| section.items())
178+
}
179+
}
180+
171181
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
172182
#[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)]
173183
pub struct Section {
@@ -187,7 +197,6 @@ impl Section {
187197
#[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)]
188198
pub struct Topic {
189199
pub title: String,
190-
pub lexical_context: Identifier,
191200
pub items: Vec<Item>,
192201
}
193202

crates/language-v2/definition/src/model/terminals/keyword.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use crate::model::{Identifier, Scanner, VersionSpecifier};
88
#[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)]
99
pub struct KeywordItem {
1010
pub name: Identifier,
11-
pub identifier: Identifier,
1211

1312
#[serde(skip_serializing_if = "Option::is_none")]
1413
pub enabled: Option<VersionSpecifier>,

crates/language-v2/internal_macros/src/derive/spanned.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ fn get_spanned_type(input: Type) -> Type {
127127
| "KeywordDefinition"
128128
| "KeywordItem"
129129
| "KeywordValue"
130+
| "LexicalContext"
130131
| "PrecedenceExpression"
131132
| "PrecedenceItem"
132133
| "PrecedenceOperator"

crates/language-v2/tests/src/fail/p0_parsing/duplicate_map_key/test.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,24 @@ language_v2_macros::compile!(Language(
66
leading_trivia = Sequence([]),
77
trailing_trivia = Sequence([]),
88
versions = ["1.0.0", "2.0.0", "3.0.0"],
9-
sections = [Section(
10-
title = "Section One",
11-
topics = [Topic(
12-
title = "Topic One",
13-
lexical_context = Foo,
14-
items = [
15-
Struct(
16-
name = Bar,
17-
fields = (
18-
field_1 = Required(Baz),
19-
field_1 = Required(Baz),
20-
field_3 = Required(Baz)
21-
)
22-
),
23-
Token(name = Baz, definitions = [TokenDefinition(Atom("baz"))])
24-
]
9+
contexts = [LexicalContext(
10+
name = Foo,
11+
sections = [Section(
12+
title = "Section One",
13+
topics = [Topic(
14+
title = "Topic One",
15+
items = [
16+
Struct(
17+
name = Bar,
18+
fields = (
19+
field_1 = Required(Baz),
20+
field_1 = Required(Baz),
21+
field_3 = Required(Baz)
22+
)
23+
),
24+
Token(name = Baz, definitions = [TokenDefinition(Atom("baz"))])
25+
]
26+
)]
2527
)]
2628
)],
2729
built_ins = []
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error: Duplicate map key.
2-
--> src/fail/p0_parsing/duplicate_map_key/test.rs:19:25
2+
--> src/fail/p0_parsing/duplicate_map_key/test.rs:20:29
33
|
4-
19 | field_1 = Required(Baz),
5-
| ^^^^^^^
4+
20 | ... field_1 = Required(Baz),
5+
| ^^^^^^^

crates/language-v2/tests/src/fail/p0_parsing/duplicate_set_entry/test.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@ language_v2_macros::compile!(Language(
66
leading_trivia = Sequence([]),
77
trailing_trivia = Sequence([]),
88
versions = ["1.0.0", "1.0.0", "3.0.0"],
9-
sections = [Section(
10-
title = "Section One",
11-
topics = [Topic(
12-
title = "Topic One",
13-
lexical_context = Foo,
14-
items = [Token(
15-
name = Bar,
16-
definitions = [TokenDefinition(Atom("bar"))]
9+
contexts = [LexicalContext(
10+
name = Foo,
11+
sections = [Section(
12+
title = "Section One",
13+
topics = [Topic(
14+
title = "Topic One",
15+
items = [Token(
16+
name = Bar,
17+
definitions = [TokenDefinition(Atom("bar"))]
18+
)]
1719
)]
1820
)]
1921
)],

0 commit comments

Comments
 (0)