Skip to content

Commit 38e6ca6

Browse files
authored
Fix C++ syntax collision (#150)
* Fix C++ syntax collision * Add tests for C++ syntax, fix failing test * Add more tests for the possible regex cases * Make JSON Attribute Tests pass * Change Node version in CI * Make node version match mine
1 parent 4a28cda commit 38e6ca6

File tree

7 files changed

+113
-23
lines changed

7 files changed

+113
-23
lines changed

.github/workflows/node.js.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515

1616
strategy:
1717
matrix:
18-
node-version: [20.x]
18+
node-version: [21.x]
1919
# See supported Node.js release schedule at https://nodejs.org/en/about/releases/
2020

2121
steps:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
auto something = SomeClass::someProperty;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
auto something = SomeClass::state::something;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// :snippet-start: cpp-test
2+
auto something = SomeClass::someProperty;
3+
// :snippet-end:
4+
5+
// :snippet-start: failing-cpp-test
6+
auto something = SomeClass::state::something;
7+
// :snippet-end:

src/bluehawk/parser/lexer/lexer.test.ts

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,77 @@ this is used to replace
6565
"Newline",
6666
]);
6767
});
68+
69+
it("does not misinterpret C++ syntax as tokens", () => {
70+
const result = lexer.tokenize(`SomeClass::state::something;`);
71+
expect(result.errors.length).toBe(0);
72+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
73+
expect(tokenNames).toBeNull;
74+
});
75+
76+
it("does not make a token from content that starts with ::", () => {
77+
const result = lexer.tokenize(`::SomeClass::state::something;`);
78+
expect(result.errors.length).toBe(0);
79+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
80+
expect(tokenNames).toBeNull;
81+
});
82+
83+
it("does not make a token from content that ends with ::", () => {
84+
const result = lexer.tokenize(`SomeClass::state::something::`);
85+
expect(result.errors.length).toBe(0);
86+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
87+
expect(tokenNames).toBeNull;
88+
});
89+
90+
it("does not make a token from content that starts and ends with ::", () => {
91+
const result = lexer.tokenize(`::SomeClass::state::something::`);
92+
expect(result.errors.length).toBe(0);
93+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
94+
expect(tokenNames).toBeNull;
95+
});
96+
97+
it("does not make a token with a space in the state tag", () => {
98+
const result = lexer.tokenize(`
99+
// :state -start: state-identifier
100+
SomeClass::state::something;
101+
// :state-end:
102+
`);
103+
expect(result.errors.length).toBe(0);
104+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
105+
expect(tokenNames).toBeNull;
106+
});
107+
108+
it("does not make a token with a space after the start colon", () => {
109+
const result = lexer.tokenize(`
110+
// : state-start: state-identifier
111+
SomeClass::state::something;
112+
// :state-end:
113+
`);
114+
expect(result.errors.length).toBe(0);
115+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
116+
expect(tokenNames).toBeNull;
117+
});
118+
119+
it("Correctly tokenizes C++ syntax within a tag", () => {
120+
const result = lexer.tokenize(`
121+
// :state-start: state-identifier
122+
SomeClass::state::something;
123+
// :state-end:
124+
`);
125+
expect(result.errors.length).toBe(0);
126+
const tokenNames = result.tokens.map((token) => token.tokenType.name);
127+
expect(tokenNames).toStrictEqual([
128+
"Newline",
129+
"LineComment",
130+
"TagStart",
131+
"Identifier",
132+
"Newline",
133+
"Newline",
134+
"LineComment",
135+
"TagEnd",
136+
"Newline",
137+
]);
138+
});
68139
});
69140

70141
describe("custom comment lexer", () => {
@@ -116,9 +187,13 @@ describe("custom comment lexer", () => {
116187

117188
it("rejects comment patterns that conflict with other tokens", () => {
118189
expect(() => {
119-
makeLexer([makeLineCommentToken(TAG_PATTERN)]);
120-
}).toThrowError(`Errors detected in definition of Lexer:
121-
The same RegExp pattern ->/:([A-z0-9-]+):[^\\S\\r\\n]*/<-has been used in all of the following Token Types: Tag, LineComment <-`);
190+
try {
191+
makeLexer([makeLineCommentToken(TAG_PATTERN)]);
192+
} catch (e) {
193+
expect(e.message).toBe(`Errors detected in definition of Lexer:
194+
The same RegExp pattern ->/(?<!:):([A-z0-9-]+):(?!:)[^\\S\\r\\n]*/<-has been used in all of the following Token Types: Tag, LineComment <-`);
195+
}
196+
});
122197
});
123198
});
124199

src/bluehawk/parser/lexer/tokens.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,24 +82,30 @@ const Text = createToken({
8282
// TODO: Allow any amount of non-newline white space (/[^\S\r\n]*/) to be
8383
// included before or after the actual tag name to make stripping it out
8484
// much easier.
85-
const TAG_START_PATTERN /**/ = /:([A-z0-9-]+)-start:/;
86-
const TAG_END_PATTERN /* */ = /:([A-z0-9-]+)-end:/;
87-
const TAG_PATTERN /* */ = /:([A-z0-9-]+):[^\S\r\n]*/;
85+
const TAG_START_PATTERN /**/ = /(?<!:):([A-z0-9-]+)-start:(?!:)/;
86+
const TAG_END_PATTERN /* */ = /(?<!:):([A-z0-9-]+)-end:(?!:)/;
87+
const TAG_PATTERN /* */ = /(?<!:):([A-z0-9-]+):(?!:)[^\S\r\n]*/;
8888

8989
const TagStart = createToken({
9090
name: "TagStart",
9191
pattern: TAG_START_PATTERN,
9292
push_mode: "TagAttributesMode",
93+
line_breaks: false,
94+
start_chars_hint: [":"],
9395
});
9496

9597
const TagEnd = createToken({
9698
name: "TagEnd",
9799
pattern: TAG_END_PATTERN,
100+
line_breaks: false,
101+
start_chars_hint: [":"],
98102
});
99103

100104
const Tag = createToken({
101105
name: "Tag",
102106
pattern: TAG_PATTERN,
107+
line_breaks: false,
108+
start_chars_hint: [":"],
103109
});
104110

105111
const Identifier = createToken({

src/bluehawk/parser/visitor/jsonAttributeList.test.ts

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ describe("JSON attribute lists", () => {
8484
const result = visitor.visit(cst, source);
8585
expect(result.tagNodes[0].attributes).toBeUndefined();
8686
expect(result.errors[0].message).toBe(
87-
"Expected double-quoted property name in JSON"
87+
"Expected double-quoted property name in JSON at position 19 (line 5 column 3)"
8888
);
8989
expect(result.errors[0].location).toStrictEqual({
90-
line: 5,
91-
column: 3,
92-
offset: 33,
90+
line: 1,
91+
column: 15,
92+
offset: 14,
9393
});
9494
});
9595

@@ -133,12 +133,12 @@ describe("JSON attribute lists", () => {
133133
const visitor = makeCstVisitor(parser);
134134
const result = visitor.visit(cst, source);
135135
expect(result.errors[0].location).toStrictEqual({
136-
line: 5,
137-
column: 1,
138-
offset: 23,
136+
line: 1,
137+
column: 15,
138+
offset: 14,
139139
});
140140
expect(result.errors[0].message).toBe(
141-
"Expected property name or '}' in JSON"
141+
"Expected property name or '}' in JSON at position 9 (line 5 column 1)"
142142
);
143143
});
144144

@@ -158,12 +158,12 @@ describe("JSON attribute lists", () => {
158158
const visitor = makeCstVisitor(parser);
159159
const result = visitor.visit(cst, source);
160160
expect(result.errors[0].location).toStrictEqual({
161-
line: 5,
162-
column: 1,
163-
offset: 21,
161+
line: 1,
162+
column: 15,
163+
offset: 14,
164164
});
165165
expect(result.errors[0].message).toBe(
166-
"Expected property name or '}' in JSON"
166+
"Expected property name or '}' in JSON at position 7 (line 5 column 1)"
167167
);
168168
});
169169

@@ -250,12 +250,12 @@ describe("JSON attribute lists", () => {
250250
const visitor = makeCstVisitor(parser);
251251
const result = visitor.visit(cst, source);
252252
expect(result.errors[0].location).toStrictEqual({
253-
line: 3,
254-
column: 4,
255-
offset: 39,
253+
line: 1,
254+
column: 18,
255+
offset: 17,
256256
});
257257
expect(result.errors[0].message).toBe(
258-
"Expected double-quoted property name in JSON"
258+
"Expected double-quoted property name in JSON at position 22 (line 3 column 4)"
259259
);
260260
});
261261
});

0 commit comments

Comments
 (0)