Skip to content

Commit 410fc81

Browse files
authored
feat: Add extractExamples to extras (#445)
Adds the ability to include inline examples in the comments of an Ohm grammar. `extractExamples` can then parse these examples out, so they can be used in unit tests, etc.
1 parent 49e69dc commit 410fc81

File tree

8 files changed

+365
-3
lines changed

8 files changed

+365
-3
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import * as ohm from 'ohm-js';
2+
3+
export const grammarsSource = String.raw`
4+
/*
5+
Superset of the Ohm grammar that allows examples to be embedded in comments.
6+
Any valid Ohm grammar will also be matched by this grammar.
7+
*/
8+
9+
// Example:
10+
//+ "//+ \"x\"\nG {\n//- \"\"\nstart = \"x\"}"
11+
OhmWithExamples <: Ohm {
12+
// The default start rule for Ohm is 'Grammars', which is syntactic rule.
13+
// When the start rule is a syntactic rule, there's no way to get access to
14+
// leading space (including comments). So, for this grammar to be useful,
15+
// you have to explicit use this rule as the start rule.
16+
grammarsWithExamples = (exampleComments applySyntactic<Grammar>)*
17+
18+
Grammar := ident SuperGrammar? "{" (#exampleComments Rule)* "}"
19+
20+
exampleComments = (spacesNoExampleComment exampleComment)*
21+
22+
// Examples:
23+
//+ "//+ \"blah\""
24+
//+ "//- \"one\", \"two\""
25+
//- "// - \"x\", "//-\"x\"
26+
exampleComment
27+
= "//+" examples -- positive
28+
| "//-" examples -- negative
29+
30+
examples = spaceNoNl+ nonemptyListOf<jsonString, exampleSep> spaceNoNl*
31+
exampleSep = "," spaces
32+
33+
exampleCommentPrefix = "//+" | "//-"
34+
35+
spaceNoNl = ~"\n" space
36+
spacesNoExampleComment = (~exampleCommentPrefix space)*
37+
38+
jsonString = "\"" jsonChar* "\""
39+
40+
jsonChar
41+
= jsonEscape
42+
| ~"\\" ~"\"" "\u{0020}".."\u{10FFFF}"
43+
44+
//+ "\\n", "\\u1234"
45+
jsonEscape (a JSON escape sequence)
46+
= "\\\""
47+
| "\\\\"
48+
| "\\/"
49+
| "\\b"
50+
| "\\f"
51+
| "\\n"
52+
| "\\r"
53+
| "\\t"
54+
| "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
55+
}
56+
57+
/*
58+
A stricter version of the grammar that prevents many malformed example
59+
comments from being parsed as regular comments.
60+
*/
61+
OhmWithExamplesStrict <: OhmWithExamples {
62+
// Redefine 'comment' to avoid malformed example comments from silently
63+
// being parsed as regular comments.
64+
comment :=
65+
| ~exampleCommentPrefix comment_singleLine
66+
| comment_multiLine
67+
}
68+
`;
69+
70+
export const grammars = ohm.grammars(grammarsSource, {Ohm: ohm.ohmGrammar});
71+
72+
const semantics = grammars.OhmWithExamples.createSemantics().addOperation('hasExamples', {
73+
_iter(...children) {
74+
return children.some(c => c.hasExamples());
75+
},
76+
exampleComments(_, commentIter) {
77+
return commentIter.numChildren > 0;
78+
},
79+
});
80+
81+
semantics.addOperation('examples', {
82+
grammarsWithExamples(exampleCommentsIter, grammarIter) {
83+
const result = [];
84+
for (const [i, child] of Object.entries(grammarIter.children)) {
85+
if (exampleCommentsIter.hasExamples()) {
86+
const defaultExamples = exampleCommentsIter.child(i).examples();
87+
const grammar = child.grammarName();
88+
result.push(...defaultExamples.map(ex => ({...ex, grammar, rule: ''})));
89+
}
90+
result.push(...child.examples());
91+
}
92+
return result;
93+
},
94+
Grammar(name, _, _open, exampleCommentsIter, ruleIter, _close) {
95+
const result = [];
96+
const grammar = this.grammarName();
97+
for (let i = 0; i < ruleIter.numChildren; i++) {
98+
const rule = ruleIter.child(i).ruleName();
99+
100+
// Augment each of the examples with the grammar and rule name.
101+
const examples = exampleCommentsIter.child(i).examples();
102+
const augmentedExamples = examples.map(ex => ({...ex, grammar, rule}));
103+
104+
result.push(...augmentedExamples);
105+
}
106+
return result;
107+
},
108+
exampleComments(_, commentIter) {
109+
return commentIter.children.flatMap(c => c.examples());
110+
},
111+
exampleComment_positive(_, examples) {
112+
return examples.examples().map(ex => ({...ex, shouldMatch: true}));
113+
},
114+
exampleComment_negative(_, examples) {
115+
return examples.examples().map(ex => ({...ex, shouldMatch: false}));
116+
},
117+
examples(_ws, jsonStringList, _) {
118+
return jsonStringList.asIteration().children.map(t => {
119+
return {example: JSON.parse(t.sourceString)};
120+
});
121+
},
122+
comment_singleLine(_, commentCharIter, _nl) {
123+
return [];
124+
},
125+
comment_multiLine(_, commentCharIter, _nl) {
126+
return [];
127+
},
128+
});
129+
130+
semantics.addOperation('grammarName', {
131+
Grammar(name, _, _open, exampleCommentsIter, ruleIter, _close) {
132+
return name.sourceString;
133+
},
134+
});
135+
136+
semantics.addOperation('ruleName', {
137+
Rule_define(ident, _formals, _desc, _, _body) {
138+
return ident.sourceString;
139+
},
140+
Rule_override(ident, _formals, _, _body) {
141+
return ident.sourceString;
142+
},
143+
Rule_extend(ident, _formals, _, _body) {
144+
return ident.sourceString;
145+
},
146+
});
147+
148+
/** @typedef {{grammar: string, rule: string, example: string, shouldMatch: boolean}} Example */
149+
150+
/**
151+
* @param {string} grammarsDef - A string containing one or more grammar definitions.
152+
* @return {[Example]}
153+
*/
154+
export function extractExamples(grammarsDef) {
155+
const matchResult = grammars.OhmWithExamples.match(grammarsDef, 'grammarsWithExamples');
156+
if (matchResult.failed()) {
157+
throw new Error(matchResult.message);
158+
}
159+
return semantics(matchResult).examples();
160+
}

packages/ohm-js/extras/index.d.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,18 @@ export function getLineAndColumnMessage(
2929
offset: number,
3030
...ranges: number[][]
3131
): string;
32+
33+
interface Example {
34+
grammar: string;
35+
rule: string;
36+
example: string;
37+
shouldMatch: boolean;
38+
}
39+
40+
/**
41+
* Given a string containing one or more grammar definitions, returns an array
42+
* of examples extracted from the comments.
43+
* Positive examples look like `//+ "one", "two"` and negative examples like
44+
* `//- "shouldn't match"`. The examples text is a JSON string.
45+
*/
46+
export function extractExamples(grammarsDef: string): [Example];

packages/ohm-js/extras/index.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export {getLineAndColumnMessage, getLineAndColumn} from '../src/util.js';
22
export {VisitorFamily} from './VisitorFamily.js';
33
export {semanticsForToAST, toAST} from './semantics-toAST.js';
4+
export {extractExamples} from './extractExamples.js';
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
Superset of the Ohm grammar that allows examples to be embedded in comments.
3+
Any valid Ohm grammar will also be matched by this grammar.
4+
*/
5+
6+
// Example:
7+
//+ "//+ \"x\"\nG {\n//- \"\"\nstart = \"x\"}"
8+
OhmWithExamples <: Ohm {
9+
// The default start rule for Ohm is 'Grammars', which is syntactic rule.
10+
// When the start rule is a syntactic rule, there's no way to get access to
11+
// leading space (including comments). So, for this grammar to be useful,
12+
// you have to explicit use this rule as the start rule.
13+
grammarsWithExamples = (exampleComments applySyntactic<Grammar>)*
14+
15+
Grammar := ident SuperGrammar? "{" (#exampleComments Rule)* "}"
16+
17+
exampleComments = (spacesNoExampleComment exampleComment)*
18+
19+
// Examples:
20+
//+ "//+ \"blah\""
21+
//+ "//- \"one\", \"two\""
22+
//- "// - \"x\", "//-\"x\"
23+
exampleComment
24+
= "//+" examples -- positive
25+
| "//-" examples -- negative
26+
27+
examples = spaceNoNl+ nonemptyListOf<terminal, exampleSep> spaceNoNl*
28+
exampleSep = "," spaces
29+
30+
exampleCommentPrefix = "//+" | "//-"
31+
32+
spaceNoNl = ~"\n" space
33+
spacesNoExampleComment = (~exampleCommentPrefix space)*
34+
}
35+
36+
/*
37+
A stricter version of the grammar that prevents many malformed example
38+
comments from being parsed as regular comments.
39+
*/
40+
OhmWithExamplesStrict <: OhmWithExamples {
41+
// Redefine 'comment' to avoid malformed example comments from silently
42+
// being parsed as regular comments.
43+
comment :=
44+
| ~exampleCommentPrefix comment_singleLine
45+
| comment_multiLine
46+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import test from 'ava';
2+
import {extractExamples, grammars, grammarsSource} from '../../extras/extractExamples.js';
3+
4+
test('empty', t => {
5+
t.deepEqual(extractExamples(''), []);
6+
});
7+
8+
test('grammar with no examples', t => {
9+
t.deepEqual(extractExamples('G { }'), []);
10+
});
11+
12+
test('simple positive examples', t => {
13+
let examples = extractExamples(`
14+
G {
15+
//+ "x"
16+
start = "x"
17+
}
18+
`);
19+
t.deepEqual(examples, [{grammar: 'G', rule: 'start', example: 'x', shouldMatch: true}]);
20+
21+
examples = extractExamples(`
22+
G {
23+
//+ ""
24+
start = ""
25+
26+
//+ "x"
27+
other = ""
28+
}
29+
`);
30+
t.deepEqual(examples, [
31+
{grammar: 'G', rule: 'start', example: '', shouldMatch: true},
32+
{grammar: 'G', rule: 'other', example: 'x', shouldMatch: true},
33+
]);
34+
});
35+
36+
test('examples for default start rule', t => {
37+
let examples = extractExamples(`
38+
//+ "hey"
39+
G {
40+
//+ ""
41+
start = ""
42+
}
43+
`);
44+
t.deepEqual(examples, [
45+
{grammar: 'G', rule: '', example: 'hey', shouldMatch: true},
46+
{grammar: 'G', rule: 'start', example: '', shouldMatch: true},
47+
]);
48+
49+
examples = extractExamples(`
50+
//+ "hey"
51+
G {
52+
//+ ""
53+
start = ""
54+
}
55+
`);
56+
t.deepEqual(examples, [
57+
{grammar: 'G', rule: '', example: 'hey', shouldMatch: true},
58+
{grammar: 'G', rule: 'start', example: '', shouldMatch: true},
59+
]);
60+
});
61+
62+
test('top-level whitespace', t => {
63+
const expected = [{grammar: 'G', rule: '', example: '', shouldMatch: true}];
64+
t.deepEqual(extractExamples(' //+ ""\n G{}'), expected);
65+
t.deepEqual(extractExamples(' //+ "" \nG{}'), expected);
66+
t.deepEqual(extractExamples('\n\n//+ ""\n\nG{}'), expected);
67+
});
68+
69+
function getExamples(input) {
70+
return extractExamples(`G { ${input}\nstart = }`).map(({example, shouldMatch}) => {
71+
return {example, shouldMatch};
72+
});
73+
}
74+
75+
test('example comments - negative examples', t => {
76+
t.deepEqual(getExamples('//- "blah"\n'), [{example: 'blah', shouldMatch: false}]);
77+
t.deepEqual(
78+
getExamples(`
79+
//+ "blah"
80+
//- "wooo"`),
81+
[
82+
{example: 'blah', shouldMatch: true},
83+
{example: 'wooo', shouldMatch: false},
84+
],
85+
);
86+
// t.throws(() => getExamples('//-"x"'), null, 'space required after "-"');
87+
t.deepEqual(getExamples('// - "x"'), [], 'parsed as a normal comment');
88+
});
89+
90+
test('example comments - corner cases', t => {
91+
t.deepEqual(
92+
getExamples('//+ "blah"\n\n'),
93+
[{example: 'blah', shouldMatch: true}],
94+
'extra blank lines before rule',
95+
);
96+
t.deepEqual(
97+
getExamples(`
98+
//+ "blah"
99+
//+ "wooo"`),
100+
[
101+
{example: 'blah', shouldMatch: true},
102+
{example: 'wooo', shouldMatch: true},
103+
],
104+
'extra leading space',
105+
);
106+
// t.throws(() => {
107+
// t.deepEqual(getExamples('//+ '), [], 'no terminals');
108+
// });
109+
t.deepEqual(getExamples('//+ "" '), [{example: '', shouldMatch: true}], 'trailing space');
110+
t.deepEqual(
111+
getExamples('//+ ""\n//- ""'),
112+
[
113+
{example: '', shouldMatch: true},
114+
{example: '', shouldMatch: false},
115+
],
116+
'contradictory examples',
117+
);
118+
t.deepEqual(
119+
getExamples('//+ ""\n//+ ""'),
120+
[
121+
{example: '', shouldMatch: true},
122+
{example: '', shouldMatch: true},
123+
],
124+
'duplicate examples',
125+
);
126+
});
127+
128+
test('extracted examples', t => {
129+
for (const {grammar, rule, example, shouldMatch} of extractExamples(grammarsSource)) {
130+
const g = grammars[grammar];
131+
const startRule = rule === '' ? undefined : rule;
132+
t.is(g.match(example, startRule).succeeded(), shouldMatch, `${example}`);
133+
}
134+
});

packages/packaging-tests/test/test-commonjs.cjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ function checkExports(exports) {
1616
function checkExtrasExports(extras) {
1717
assert.equal(typeof extras.VisitorFamily, 'function');
1818
assert.equal(typeof extras.toAST, 'function');
19+
assert.equal(typeof extras.extractExamples, 'function');
1920
assert.not('default' in exports, 'there should be no default export');
20-
assert.ok(Object.keys(extras).length === 5);
21+
assert.ok(Object.keys(extras).length === 6);
2122
}
2223

2324
test('Core CommonJS exports', async () => {

packages/packaging-tests/test/test-esm.mjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ test('Main exports (ESM)', async () => {
2121
test('Extras exports (ESM)', async () => {
2222
assert.equal(typeof extras.VisitorFamily, 'function');
2323
assert.equal(typeof extras.toAST, 'function');
24-
assert.ok(Object.keys(extras).length === 5);
24+
assert.equal(typeof extras.extractExamples, 'function');
25+
assert.ok(Object.keys(extras).length === 6);
2526

2627
const exports = await import('ohm-js/extras');
2728
assert.not('default' in exports, 'there should be no default export');

0 commit comments

Comments
 (0)