Skip to content

Commit df55490

Browse files
authored
Merge pull request #18 from mkantor/externalize-parsing
Externalize parser combinator library
2 parents b4632b6 + f7b8051 commit df55490

File tree

11 files changed

+165
-373
lines changed

11 files changed

+165
-373
lines changed

package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"dependencies": {
2222
"@matt.kantor/either": "^1.0.0",
2323
"@matt.kantor/option": "^1.0.0",
24+
"@matt.kantor/parsing": "^1.0.0",
2425
"kleur": "^4.1.5"
2526
}
2627
}

src/language/parsing/atom.ts

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
1-
import { parser, type Parser } from '../../parsing.js'
1+
import {
2+
type Parser,
3+
anySingleCharacter,
4+
as,
5+
butNot,
6+
lazy,
7+
literal,
8+
map,
9+
oneOf,
10+
oneOrMore,
11+
sequence,
12+
zeroOrMore,
13+
} from '@matt.kantor/parsing'
214
import { optionallySurroundedByParentheses } from './parentheses.js'
315
import { whitespace } from './trivia.js'
416

@@ -10,57 +22,57 @@ export const isAtom = (value: unknown): value is Atom =>
1022
export const unit = '' as const
1123

1224
export const atomParser: Parser<Atom> = optionallySurroundedByParentheses(
13-
parser.lazy(() => parser.oneOf([quotedAtom, unquotedAtom])),
25+
lazy(() => oneOf([quotedAtom, unquotedAtom])),
1426
)
1527

16-
const quotedAtom = parser.map(
17-
parser.sequence([
18-
parser.as(parser.literal('"'), ''),
19-
parser.map(
20-
parser.zeroOrMore(
21-
parser.oneOf([
22-
parser.butNot(
23-
parser.anySingleCharacter,
24-
parser.oneOf([parser.literal('"'), parser.literal('\\')]),
28+
const quotedAtom = map(
29+
sequence([
30+
as(literal('"'), ''),
31+
map(
32+
zeroOrMore(
33+
oneOf([
34+
butNot(
35+
anySingleCharacter,
36+
oneOf([literal('"'), literal('\\')]),
2537
'`"` or `\\`',
2638
),
27-
parser.as(parser.literal('\\"'), '"'),
28-
parser.as(parser.literal('\\\\'), '\\'),
39+
as(literal('\\"'), '"'),
40+
as(literal('\\\\'), '\\'),
2941
]),
3042
),
3143
output => output.join(''),
3244
),
33-
parser.as(parser.literal('"'), ''),
45+
as(literal('"'), ''),
3446
]),
3547
([_1, contents, _2]) => contents,
3648
)
3749

38-
const unquotedAtom = parser.map(
39-
parser.oneOrMore(
40-
parser.butNot(
41-
parser.anySingleCharacter,
42-
parser.oneOf([
50+
const unquotedAtom = map(
51+
oneOrMore(
52+
butNot(
53+
anySingleCharacter,
54+
oneOf([
4355
whitespace,
44-
parser.literal('"'),
45-
parser.literal('{'),
46-
parser.literal('}'),
47-
parser.literal('['),
48-
parser.literal(']'),
49-
parser.literal('('),
50-
parser.literal(')'),
51-
parser.literal('<'),
52-
parser.literal('>'),
53-
parser.literal('#'),
54-
parser.literal('&'),
55-
parser.literal('|'),
56-
parser.literal('\\'),
57-
parser.literal('='),
58-
parser.literal(':'),
59-
parser.literal(';'),
60-
parser.literal(','),
61-
parser.literal('//'),
62-
parser.literal('/*'),
63-
parser.literal('*/'),
56+
literal('"'),
57+
literal('{'),
58+
literal('}'),
59+
literal('['),
60+
literal(']'),
61+
literal('('),
62+
literal(')'),
63+
literal('<'),
64+
literal('>'),
65+
literal('#'),
66+
literal('&'),
67+
literal('|'),
68+
literal('\\'),
69+
literal('='),
70+
literal(':'),
71+
literal(';'),
72+
literal(','),
73+
literal('//'),
74+
literal('/*'),
75+
literal('*/'),
6476
]),
6577
'a forbidden character sequence',
6678
),

src/language/parsing/molecule.ts

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
import { parser, type Parser } from '../../parsing.js'
1+
import {
2+
as,
3+
lazy,
4+
literal,
5+
map,
6+
nothing,
7+
oneOf,
8+
oneOrMore,
9+
sequence,
10+
zeroOrMore,
11+
type Parser,
12+
} from '@matt.kantor/parsing'
213
import { atomParser, type Atom } from './atom.js'
314
import { optionallySurroundedByParentheses } from './parentheses.js'
415
import { trivia } from './trivia.js'
@@ -7,32 +18,32 @@ export type Molecule = { readonly [key: Atom]: Molecule | Atom }
718

819
export const unit: Molecule = {}
920

10-
export const moleculeParser: Parser<Molecule> = parser.oneOf([
21+
export const moleculeParser: Parser<Molecule> = oneOf([
1122
optionallySurroundedByParentheses(
12-
parser.map(
13-
parser.lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
23+
map(
24+
lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
1425
Object.fromEntries,
1526
),
1627
),
17-
parser.lazy(() => sugaredApply),
18-
parser.lazy(() => sugaredFunction),
28+
lazy(() => sugaredApply),
29+
lazy(() => sugaredFunction),
1930
])
2031

2132
// During parsing molecules and properties are represented as nested arrays (of key/value pairs).
2233
// The following utilities make it easier to work with such a structure.
2334

2435
const flat = <Output>(theParser: Parser<readonly Output[]>) =>
25-
parser.map(theParser, output => output.flat())
36+
map(theParser, output => output.flat())
2637

27-
const omit = (theParser: Parser<unknown>) => parser.as(theParser, [])
38+
const omit = (theParser: Parser<unknown>) => as(theParser, [])
2839

2940
const optional = <Output>(
3041
theParser: Parser<readonly Output[]>,
31-
): Parser<readonly Output[]> => parser.oneOf([theParser, omit(parser.nothing)])
42+
): Parser<readonly Output[]> => oneOf([theParser, omit(nothing)])
3243

3344
const withoutOmittedOutputs = <Output>(
3445
theParser: Parser<readonly (readonly Output[])[]>,
35-
) => parser.map(theParser, output => output.filter(output => output.length > 0))
46+
) => map(theParser, output => output.filter(output => output.length > 0))
3647

3748
// Keyless properties are automatically assigned numeric indexes, which uses some mutable state.
3849
type Indexer = () => string
@@ -48,37 +59,30 @@ const makeIncrementingIndexer = (): Indexer => {
4859

4960
// Language-specific parsers follow.
5061

51-
const propertyDelimiter = parser.oneOf([
52-
parser.sequence([
53-
optional(omit(trivia)),
54-
parser.literal(','),
55-
optional(omit(trivia)),
56-
]),
62+
const propertyDelimiter = oneOf([
63+
sequence([optional(omit(trivia)), literal(','), optional(omit(trivia))]),
5764
trivia,
5865
])
5966

6067
const sugaredLookup: Parser<PartialMolecule> =
6168
optionallySurroundedByParentheses(
62-
parser.map(
63-
parser.sequence([
64-
parser.literal(':'),
65-
parser.oneOf([atomParser, moleculeParser]),
66-
]),
69+
map(
70+
sequence([literal(':'), oneOf([atomParser, moleculeParser])]),
6771
([_colon, query]) => ({ 0: '@lookup', query }),
6872
),
6973
)
7074

7175
const sugaredFunction: Parser<PartialMolecule> =
7276
optionallySurroundedByParentheses(
73-
parser.map(
77+
map(
7478
flat(
75-
parser.sequence([
76-
parser.map(atomParser, output => [output]),
79+
sequence([
80+
map(atomParser, output => [output]),
7781
omit(trivia),
78-
omit(parser.literal('=>')),
82+
omit(literal('=>')),
7983
omit(trivia),
80-
parser.map(
81-
parser.lazy(() => propertyValue),
84+
map(
85+
lazy(() => propertyValue),
8286
output => [output],
8387
),
8488
]),
@@ -91,16 +95,16 @@ const sugaredFunction: Parser<PartialMolecule> =
9195
),
9296
)
9397

94-
const sugaredApply: Parser<PartialMolecule> = parser.map(
95-
parser.sequence([
96-
parser.oneOf([sugaredLookup, parser.lazy(() => sugaredFunction)]),
97-
parser.oneOrMore(
98-
parser.sequence([
99-
parser.literal('('),
98+
const sugaredApply: Parser<PartialMolecule> = map(
99+
sequence([
100+
oneOf([sugaredLookup, lazy(() => sugaredFunction)]),
101+
oneOrMore(
102+
sequence([
103+
literal('('),
100104
optional(omit(trivia)),
101-
parser.lazy(() => propertyValue),
105+
lazy(() => propertyValue),
102106
optional(omit(trivia)),
103-
parser.literal(')'),
107+
literal(')'),
104108
]),
105109
),
106110
]),
@@ -116,52 +120,49 @@ const sugaredApply: Parser<PartialMolecule> = parser.map(
116120
)
117121

118122
const propertyKey = atomParser
119-
const propertyValue = parser.oneOf([
123+
const propertyValue = oneOf([
120124
sugaredApply, // must come first to avoid ambiguity
121-
parser.lazy(() => moleculeParser), // must come second to avoid ambiguity
125+
lazy(() => moleculeParser), // must come second to avoid ambiguity
122126
atomParser,
123127
sugaredLookup,
124128
])
125129

126130
const namedProperty = flat(
127-
parser.sequence([
131+
sequence([
128132
propertyKey,
129-
omit(parser.literal(':')),
133+
omit(literal(':')),
130134
optional(omit(trivia)),
131135
propertyValue,
132136
]),
133137
)
134138

135139
const numberedProperty = (index: Indexer) =>
136-
parser.map(propertyValue, value => [index(), value])
140+
map(propertyValue, value => [index(), value])
137141

138142
const property = (index: Indexer) =>
139143
optionallySurroundedByParentheses(
140-
parser.oneOf([namedProperty, numberedProperty(index)]),
144+
oneOf([namedProperty, numberedProperty(index)]),
141145
)
142146

143147
const moleculeAsEntries = (index: Indexer) =>
144148
withoutOmittedOutputs(
145149
flat(
146-
parser.sequence([
147-
omit(parser.literal('{')),
150+
sequence([
151+
omit(literal('{')),
148152
// Allow initial property not preceded by a delimiter (e.g. `{a b}`).
149-
parser.map(optional(property(index)), property => [property]),
150-
parser.zeroOrMore(
153+
map(optional(property(index)), property => [property]),
154+
zeroOrMore(
151155
flat(
152-
parser.sequence([
153-
omit(propertyDelimiter),
154-
parser.lazy(() => property(index)),
155-
]),
156+
sequence([omit(propertyDelimiter), lazy(() => property(index))]),
156157
),
157158
),
158159
optional(omit(propertyDelimiter)),
159-
omit(parser.literal('}')),
160+
omit(literal('}')),
160161
]),
161162
),
162163
)
163164

164-
// This is a lazy workaround for `parser.sequence` returning an array rather than a tuple with
165+
// This is a lazy workaround for `sequence` returning an array rather than a tuple with
165166
// definitely-present elements.
166167
type PartialMolecule = {
167168
readonly [key: Atom]: PartialMolecule | Atom | undefined
Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,36 @@
1-
import { parser, type Parser } from '../../parsing.js'
1+
import {
2+
literal,
3+
map,
4+
oneOf,
5+
sequence,
6+
zeroOrMore,
7+
type Parser,
8+
} from '@matt.kantor/parsing'
29
import { trivia } from './trivia.js'
310

411
const optionallySurroundedBy = <Output>(
512
parser1: Parser<unknown>,
613
theParser: Parser<Output>,
714
parser2: Parser<unknown>,
815
): Parser<Output> =>
9-
parser.oneOf([
16+
oneOf([
1017
theParser,
11-
parser.map(
12-
parser.sequence([parser1, theParser, parser2]),
13-
([_1, output, _2]) => output,
14-
),
18+
map(sequence([parser1, theParser, parser2]), ([_1, output, _2]) => output),
1519
])
1620

1721
export const optionallySurroundedByParentheses = <Output>(
1822
theParser: Parser<Output>,
1923
): Parser<Output> =>
20-
parser.oneOf([
24+
oneOf([
2125
// This allows `theParser` to greedily consume trivia.
2226
optionallySurroundedBy(
23-
parser.literal('('),
27+
literal('('),
2428
theParser,
25-
parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]),
29+
sequence([zeroOrMore(trivia), literal(')')]),
2630
),
2731
optionallySurroundedBy(
28-
parser.sequence([parser.literal('('), parser.zeroOrMore(trivia)]),
32+
sequence([literal('('), zeroOrMore(trivia)]),
2933
theParser,
30-
parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]),
34+
sequence([zeroOrMore(trivia), literal(')')]),
3135
),
3236
])

0 commit comments

Comments
 (0)