Skip to content

Commit 9224aa3

Browse files
authored
Merge pull request #54 from mkantor/optimize-parsers
Trivially clean up parsers
2 parents 6969f52 + 3b9b05b commit 9224aa3

File tree

6 files changed

+126
-93
lines changed

6 files changed

+126
-93
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ data representation implied by the fact that a value is an atom (e.g. the atom
5151
`2` may be an integer in memory).
5252

5353
Bare words not containing any
54-
[reserved character sequences](./src/language/parsing/atom.ts#L19-L41) are
54+
[reserved character sequences](./src/language/parsing/atom.ts#L33-L57) are
5555
atoms:
5656

5757
```

src/language/parsing/atom.ts

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,69 +3,79 @@ import {
33
anySingleCharacter,
44
as,
55
butNot,
6-
lazy,
76
literal,
87
map,
98
oneOf,
109
oneOrMore,
1110
sequence,
1211
zeroOrMore,
1312
} from '@matt.kantor/parsing'
13+
import {
14+
backslash,
15+
closingBlockCommentDelimiter,
16+
closingBrace,
17+
closingParenthesis,
18+
colon,
19+
comma,
20+
escapedBackslash,
21+
escapedQuote,
22+
openingBlockCommentDelimiter,
23+
openingBrace,
24+
openingParenthesis,
25+
quote,
26+
singleLineCommentDelimiter,
27+
} from './literals.js'
1428
import { optionallySurroundedByParentheses } from './parentheses.js'
1529
import { whitespace } from './trivia.js'
1630

1731
export type Atom = string
1832

1933
const atomComponentsRequiringQuotation = [
34+
backslash,
35+
closingBlockCommentDelimiter,
36+
closingBrace,
37+
closingParenthesis,
38+
colon,
39+
comma,
40+
openingBlockCommentDelimiter,
41+
openingBrace,
42+
openingParenthesis,
43+
quote,
44+
singleLineCommentDelimiter,
2045
whitespace,
21-
literal('"'),
22-
literal('{'),
23-
literal('}'),
46+
47+
// Reserved for future use:
2448
literal('['),
2549
literal(']'),
26-
literal('('),
27-
literal(')'),
2850
literal('<'),
2951
literal('>'),
3052
literal('#'),
3153
literal('&'),
3254
literal('|'),
33-
literal('\\'),
3455
literal('='),
35-
literal(':'),
3656
literal(';'),
37-
literal(','),
38-
literal('//'),
39-
literal('/*'),
40-
literal('*/'),
4157
] as const
4258

43-
export const atomParser: Parser<Atom> = optionallySurroundedByParentheses(
44-
lazy(() => oneOf([unquotedAtomParser, quotedAtomParser])),
45-
)
46-
4759
export const atomWithAdditionalQuotationRequirements = (
4860
additionalQuoteRequiringComponent: Parser<unknown>,
4961
) =>
5062
optionallySurroundedByParentheses(
51-
lazy(() =>
52-
oneOf([
53-
map(
54-
oneOrMore(
55-
butNot(
56-
anySingleCharacter,
57-
oneOf([
58-
...atomComponentsRequiringQuotation,
59-
additionalQuoteRequiringComponent,
60-
]),
61-
'a character sequence requiring quotation',
62-
),
63+
oneOf([
64+
map(
65+
oneOrMore(
66+
butNot(
67+
anySingleCharacter,
68+
oneOf([
69+
...atomComponentsRequiringQuotation,
70+
additionalQuoteRequiringComponent,
71+
]),
72+
'a character sequence requiring quotation',
6373
),
64-
characters => characters.join(''),
6574
),
66-
quotedAtomParser,
67-
]),
68-
),
75+
characters => characters.join(''),
76+
),
77+
quotedAtomParser,
78+
]),
6979
)
7080

7181
export const unquotedAtomParser = map(
@@ -81,23 +91,23 @@ export const unquotedAtomParser = map(
8191

8292
const quotedAtomParser = map(
8393
sequence([
84-
literal('"'),
94+
quote,
8595
map(
8696
zeroOrMore(
8797
oneOf([
8898
// `"` and `\` need to be escaped
89-
butNot(
90-
anySingleCharacter,
91-
oneOf([literal('"'), literal('\\')]),
92-
'`"` or `\\`',
93-
),
94-
as(literal('\\"'), '"'),
95-
as(literal('\\\\'), '\\'),
99+
butNot(anySingleCharacter, oneOf([quote, backslash]), '`"` or `\\`'),
100+
as(escapedQuote, '"'),
101+
as(escapedBackslash, '\\'),
96102
]),
97103
),
98104
output => output.join(''),
99105
),
100-
literal('"'),
106+
quote,
101107
]),
102108
([_1, contents, _2]) => contents,
103109
)
110+
111+
export const atomParser: Parser<Atom> = optionallySurroundedByParentheses(
112+
oneOf([unquotedAtomParser, quotedAtomParser]),
113+
)

src/language/parsing/literals.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { literal } from '@matt.kantor/parsing'
2+
3+
export const arrow = literal('=>')
4+
export const asterisk = literal('*')
5+
export const backslash = literal('\\')
6+
export const closingBlockCommentDelimiter = literal('*/')
7+
export const closingBrace = literal('}')
8+
export const closingParenthesis = literal(')')
9+
export const colon = literal(':')
10+
export const comma = literal(',')
11+
export const dot = literal('.')
12+
export const escapedBackslash = literal('\\\\')
13+
export const escapedQuote = literal('\\"')
14+
export const newline = literal('\n')
15+
export const openingBlockCommentDelimiter = literal('/*')
16+
export const openingBrace = literal('{')
17+
export const openingParenthesis = literal('(')
18+
export const quote = literal('"')
19+
export const singleLineCommentDelimiter = literal('//')
20+
export const slash = literal('/')

src/language/parsing/molecule.ts

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import {
22
lazy,
3-
literal,
43
map,
54
nothing,
65
oneOf,
@@ -15,15 +14,21 @@ import {
1514
atomWithAdditionalQuotationRequirements,
1615
type Atom,
1716
} from './atom.js'
17+
import {
18+
arrow,
19+
closingBrace,
20+
closingParenthesis,
21+
colon,
22+
comma,
23+
dot,
24+
openingBrace,
25+
openingParenthesis,
26+
} from './literals.js'
1827
import { optionallySurroundedByParentheses } from './parentheses.js'
19-
import { trivia } from './trivia.js'
28+
import { optionalTrivia, trivia } from './trivia.js'
2029

2130
export type Molecule = { readonly [key: Atom]: Molecule | Atom }
2231

23-
export const moleculeParser: Parser<Molecule> = lazy(
24-
() => potentiallySugaredMolecule,
25-
)
26-
2732
// Keyless properties are automatically assigned numeric indexes, which uses some mutable state.
2833
type Indexer = () => string
2934
const makeIncrementingIndexer = (): Indexer => {
@@ -47,7 +52,7 @@ const propertyValue = oneOf([
4752
])
4853

4954
const namedProperty = map(
50-
sequence([propertyKey, literal(':'), optional(trivia), propertyValue]),
55+
sequence([propertyKey, colon, optionalTrivia, propertyValue]),
5156
([key, _colon, _trivia, value]) => [key, value] as const,
5257
)
5358

@@ -60,28 +65,28 @@ const property = (index: Indexer) =>
6065
)
6166

6267
const propertyDelimiter = oneOf([
63-
sequence([optional(trivia), literal(','), optional(trivia)]),
68+
sequence([optionalTrivia, comma, optionalTrivia]),
6469
trivia,
6570
])
6671

6772
const argument = map(
6873
sequence([
69-
literal('('),
70-
optional(trivia),
74+
openingParenthesis,
75+
optionalTrivia,
7176
propertyValue,
72-
optional(trivia),
73-
literal(')'),
77+
optionalTrivia,
78+
closingParenthesis,
7479
]),
7580
([_openingParenthesis, _trivia1, argument, _trivia2, _closingParenthesis]) =>
7681
argument,
7782
)
7883

7984
const dottedKeyPathComponent = map(
8085
sequence([
81-
optional(trivia),
82-
literal('.'),
83-
optional(trivia),
84-
atomWithAdditionalQuotationRequirements(literal('.')),
86+
optionalTrivia,
87+
dot,
88+
optionalTrivia,
89+
atomWithAdditionalQuotationRequirements(dot),
8590
]),
8691
([_trivia1, _dot, _trivia2, key]) => key,
8792
)
@@ -91,7 +96,7 @@ const moleculeAsEntries = (
9196
): Parser<readonly (readonly [string, string | Molecule])[]> =>
9297
map(
9398
sequence([
94-
literal('{'),
99+
openingBrace,
95100
// Allow initial property not preceded by a delimiter (e.g. `{a b}`).
96101
optional(property(index)),
97102
zeroOrMore(
@@ -101,7 +106,7 @@ const moleculeAsEntries = (
101106
),
102107
),
103108
optional(propertyDelimiter),
104-
literal('}'),
109+
closingBrace,
105110
]),
106111
([
107112
_openingBrace,
@@ -125,17 +130,17 @@ const sugarFreeMolecule: Parser<Molecule> = optionallySurroundedByParentheses(
125130
const sugaredLookup: Parser<Molecule> = optionallySurroundedByParentheses(
126131
map(
127132
sequence([
128-
literal(':'),
133+
colon,
129134
// Reserve `.` so that `:a.b` is parsed as a lookup followed by an index.
130-
atomWithAdditionalQuotationRequirements(literal('.')),
135+
atomWithAdditionalQuotationRequirements(dot),
131136
]),
132137
([_colon, key]) => ({ 0: '@lookup', key }),
133138
),
134139
)
135140

136141
const sugaredFunction: Parser<Molecule> = optionallySurroundedByParentheses(
137142
map(
138-
sequence([atomParser, trivia, literal('=>'), trivia, propertyValue]),
143+
sequence([atomParser, trivia, arrow, trivia, propertyValue]),
139144
([parameter, _trivia1, _arrow, _trivia2, body]) => ({
140145
0: '@function',
141146
parameter,
@@ -210,3 +215,5 @@ const potentiallySugaredMolecule: Parser<Molecule> = (() => {
210215
potentiallySugaredNonApply,
211216
])
212217
})()
218+
219+
export const moleculeParser: Parser<Molecule> = potentiallySugaredMolecule
Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
1-
import {
2-
literal,
3-
map,
4-
oneOf,
5-
sequence,
6-
zeroOrMore,
7-
type Parser,
8-
} from '@matt.kantor/parsing'
9-
import { trivia } from './trivia.js'
1+
import { map, oneOf, sequence, type Parser } from '@matt.kantor/parsing'
2+
import { closingParenthesis, openingParenthesis } from './literals.js'
3+
import { optionalTrivia } from './trivia.js'
104

115
const optionallySurroundedBy = <Output>(
126
parser1: Parser<unknown>,
@@ -21,16 +15,8 @@ const optionallySurroundedBy = <Output>(
2115
export const optionallySurroundedByParentheses = <Output>(
2216
theParser: Parser<Output>,
2317
): Parser<Output> =>
24-
oneOf([
25-
// This allows `theParser` to greedily consume trivia.
26-
optionallySurroundedBy(
27-
literal('('),
28-
theParser,
29-
sequence([zeroOrMore(trivia), literal(')')]),
30-
),
31-
optionallySurroundedBy(
32-
sequence([literal('('), zeroOrMore(trivia)]),
33-
theParser,
34-
sequence([zeroOrMore(trivia), literal(')')]),
35-
),
36-
])
18+
optionallySurroundedBy(
19+
sequence([openingParenthesis, optionalTrivia]),
20+
theParser,
21+
sequence([optionalTrivia, closingParenthesis]),
22+
)

src/language/parsing/trivia.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,43 @@
11
import {
22
anySingleCharacter,
33
butNot,
4-
literal,
54
lookaheadNot,
5+
nothing,
66
oneOf,
77
oneOrMore,
88
regularExpression,
99
sequence,
1010
zeroOrMore,
1111
} from '@matt.kantor/parsing'
12+
import {
13+
asterisk,
14+
closingBlockCommentDelimiter,
15+
newline,
16+
openingBlockCommentDelimiter,
17+
singleLineCommentDelimiter,
18+
slash,
19+
} from './literals.js'
1220

1321
const blockComment = sequence([
14-
literal('/*'),
22+
openingBlockCommentDelimiter,
1523
zeroOrMore(
1624
oneOf([
17-
butNot(anySingleCharacter, literal('*'), '*'),
18-
lookaheadNot(literal('*'), literal('/'), '/'),
25+
butNot(anySingleCharacter, asterisk, '*'),
26+
lookaheadNot(asterisk, slash, '/'),
1927
]),
2028
),
21-
literal('*/'),
29+
closingBlockCommentDelimiter,
2230
])
2331

2432
const singleLineComment = sequence([
25-
literal('//'),
26-
zeroOrMore(butNot(anySingleCharacter, literal('\n'), 'newline')),
33+
singleLineCommentDelimiter,
34+
zeroOrMore(butNot(anySingleCharacter, newline, 'newline')),
2735
])
2836

29-
export const whitespace = regularExpression(/\s+/)
37+
export const whitespace = regularExpression(/^\s+/)
3038

3139
export const trivia = oneOrMore(
3240
oneOf([whitespace, singleLineComment, blockComment]),
3341
)
42+
43+
export const optionalTrivia = oneOf([trivia, nothing])

0 commit comments

Comments
 (0)