Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"dependencies": {
"@matt.kantor/either": "^1.0.0",
"@matt.kantor/option": "^1.0.0",
"@matt.kantor/parsing": "^1.0.0",
"kleur": "^4.1.5"
}
}
90 changes: 51 additions & 39 deletions src/language/parsing/atom.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
import { parser, type Parser } from '../../parsing.js'
import {
type Parser,
anySingleCharacter,
as,
butNot,
lazy,
literal,
map,
oneOf,
oneOrMore,
sequence,
zeroOrMore,
} from '@matt.kantor/parsing'
import { optionallySurroundedByParentheses } from './parentheses.js'
import { whitespace } from './trivia.js'

Expand All @@ -10,57 +22,57 @@ export const isAtom = (value: unknown): value is Atom =>
export const unit = '' as const

export const atomParser: Parser<Atom> = optionallySurroundedByParentheses(
parser.lazy(() => parser.oneOf([quotedAtom, unquotedAtom])),
lazy(() => oneOf([quotedAtom, unquotedAtom])),
)

const quotedAtom = parser.map(
parser.sequence([
parser.as(parser.literal('"'), ''),
parser.map(
parser.zeroOrMore(
parser.oneOf([
parser.butNot(
parser.anySingleCharacter,
parser.oneOf([parser.literal('"'), parser.literal('\\')]),
const quotedAtom = map(
sequence([
as(literal('"'), ''),
map(
zeroOrMore(
oneOf([
butNot(
anySingleCharacter,
oneOf([literal('"'), literal('\\')]),
'`"` or `\\`',
),
parser.as(parser.literal('\\"'), '"'),
parser.as(parser.literal('\\\\'), '\\'),
as(literal('\\"'), '"'),
as(literal('\\\\'), '\\'),
]),
),
output => output.join(''),
),
parser.as(parser.literal('"'), ''),
as(literal('"'), ''),
]),
([_1, contents, _2]) => contents,
)

const unquotedAtom = parser.map(
parser.oneOrMore(
parser.butNot(
parser.anySingleCharacter,
parser.oneOf([
const unquotedAtom = map(
oneOrMore(
butNot(
anySingleCharacter,
oneOf([
whitespace,
parser.literal('"'),
parser.literal('{'),
parser.literal('}'),
parser.literal('['),
parser.literal(']'),
parser.literal('('),
parser.literal(')'),
parser.literal('<'),
parser.literal('>'),
parser.literal('#'),
parser.literal('&'),
parser.literal('|'),
parser.literal('\\'),
parser.literal('='),
parser.literal(':'),
parser.literal(';'),
parser.literal(','),
parser.literal('//'),
parser.literal('/*'),
parser.literal('*/'),
literal('"'),
literal('{'),
literal('}'),
literal('['),
literal(']'),
literal('('),
literal(')'),
literal('<'),
literal('>'),
literal('#'),
literal('&'),
literal('|'),
literal('\\'),
literal('='),
literal(':'),
literal(';'),
literal(','),
literal('//'),
literal('/*'),
literal('*/'),
]),
'a forbidden character sequence',
),
Expand Down
103 changes: 52 additions & 51 deletions src/language/parsing/molecule.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
import { parser, type Parser } from '../../parsing.js'
import {
as,
lazy,
literal,
map,
nothing,
oneOf,
oneOrMore,
sequence,
zeroOrMore,
type Parser,
} from '@matt.kantor/parsing'
import { atomParser, type Atom } from './atom.js'
import { optionallySurroundedByParentheses } from './parentheses.js'
import { trivia } from './trivia.js'
Expand All @@ -7,32 +18,32 @@ export type Molecule = { readonly [key: Atom]: Molecule | Atom }

export const unit: Molecule = {}

export const moleculeParser: Parser<Molecule> = parser.oneOf([
export const moleculeParser: Parser<Molecule> = oneOf([
optionallySurroundedByParentheses(
parser.map(
parser.lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
map(
lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
Object.fromEntries,
),
),
parser.lazy(() => sugaredApply),
parser.lazy(() => sugaredFunction),
lazy(() => sugaredApply),
lazy(() => sugaredFunction),
])

// During parsing molecules and properties are represented as nested arrays (of key/value pairs).
// The following utilities make it easier to work with such a structure.

const flat = <Output>(theParser: Parser<readonly Output[]>) =>
parser.map(theParser, output => output.flat())
map(theParser, output => output.flat())

const omit = (theParser: Parser<unknown>) => parser.as(theParser, [])
const omit = (theParser: Parser<unknown>) => as(theParser, [])

const optional = <Output>(
theParser: Parser<readonly Output[]>,
): Parser<readonly Output[]> => parser.oneOf([theParser, omit(parser.nothing)])
): Parser<readonly Output[]> => oneOf([theParser, omit(nothing)])

const withoutOmittedOutputs = <Output>(
theParser: Parser<readonly (readonly Output[])[]>,
) => parser.map(theParser, output => output.filter(output => output.length > 0))
) => map(theParser, output => output.filter(output => output.length > 0))

// Keyless properties are automatically assigned numeric indexes, which uses some mutable state.
type Indexer = () => string
Expand All @@ -48,37 +59,30 @@ const makeIncrementingIndexer = (): Indexer => {

// Language-specific parsers follow.

const propertyDelimiter = parser.oneOf([
parser.sequence([
optional(omit(trivia)),
parser.literal(','),
optional(omit(trivia)),
]),
const propertyDelimiter = oneOf([
sequence([optional(omit(trivia)), literal(','), optional(omit(trivia))]),
trivia,
])

const sugaredLookup: Parser<PartialMolecule> =
optionallySurroundedByParentheses(
parser.map(
parser.sequence([
parser.literal(':'),
parser.oneOf([atomParser, moleculeParser]),
]),
map(
sequence([literal(':'), oneOf([atomParser, moleculeParser])]),
([_colon, query]) => ({ 0: '@lookup', query }),
),
)

const sugaredFunction: Parser<PartialMolecule> =
optionallySurroundedByParentheses(
parser.map(
map(
flat(
parser.sequence([
parser.map(atomParser, output => [output]),
sequence([
map(atomParser, output => [output]),
omit(trivia),
omit(parser.literal('=>')),
omit(literal('=>')),
omit(trivia),
parser.map(
parser.lazy(() => propertyValue),
map(
lazy(() => propertyValue),
output => [output],
),
]),
Expand All @@ -91,16 +95,16 @@ const sugaredFunction: Parser<PartialMolecule> =
),
)

const sugaredApply: Parser<PartialMolecule> = parser.map(
parser.sequence([
parser.oneOf([sugaredLookup, parser.lazy(() => sugaredFunction)]),
parser.oneOrMore(
parser.sequence([
parser.literal('('),
const sugaredApply: Parser<PartialMolecule> = map(
sequence([
oneOf([sugaredLookup, lazy(() => sugaredFunction)]),
oneOrMore(
sequence([
literal('('),
optional(omit(trivia)),
parser.lazy(() => propertyValue),
lazy(() => propertyValue),
optional(omit(trivia)),
parser.literal(')'),
literal(')'),
]),
),
]),
Expand All @@ -116,52 +120,49 @@ const sugaredApply: Parser<PartialMolecule> = parser.map(
)

const propertyKey = atomParser
const propertyValue = parser.oneOf([
const propertyValue = oneOf([
sugaredApply, // must come first to avoid ambiguity
parser.lazy(() => moleculeParser), // must come second to avoid ambiguity
lazy(() => moleculeParser), // must come second to avoid ambiguity
atomParser,
sugaredLookup,
])

const namedProperty = flat(
parser.sequence([
sequence([
propertyKey,
omit(parser.literal(':')),
omit(literal(':')),
optional(omit(trivia)),
propertyValue,
]),
)

const numberedProperty = (index: Indexer) =>
parser.map(propertyValue, value => [index(), value])
map(propertyValue, value => [index(), value])

const property = (index: Indexer) =>
optionallySurroundedByParentheses(
parser.oneOf([namedProperty, numberedProperty(index)]),
oneOf([namedProperty, numberedProperty(index)]),
)

const moleculeAsEntries = (index: Indexer) =>
withoutOmittedOutputs(
flat(
parser.sequence([
omit(parser.literal('{')),
sequence([
omit(literal('{')),
// Allow initial property not preceded by a delimiter (e.g. `{a b}`).
parser.map(optional(property(index)), property => [property]),
parser.zeroOrMore(
map(optional(property(index)), property => [property]),
zeroOrMore(
flat(
parser.sequence([
omit(propertyDelimiter),
parser.lazy(() => property(index)),
]),
sequence([omit(propertyDelimiter), lazy(() => property(index))]),
),
),
optional(omit(propertyDelimiter)),
omit(parser.literal('}')),
omit(literal('}')),
]),
),
)

// This is a lazy workaround for `parser.sequence` returning an array rather than a tuple with
// This is a lazy workaround for `sequence` returning an array rather than a tuple with
// definitely-present elements.
type PartialMolecule = {
readonly [key: Atom]: PartialMolecule | Atom | undefined
Expand Down
26 changes: 15 additions & 11 deletions src/language/parsing/parentheses.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,36 @@
import { parser, type Parser } from '../../parsing.js'
import {
literal,
map,
oneOf,
sequence,
zeroOrMore,
type Parser,
} from '@matt.kantor/parsing'
import { trivia } from './trivia.js'

const optionallySurroundedBy = <Output>(
parser1: Parser<unknown>,
theParser: Parser<Output>,
parser2: Parser<unknown>,
): Parser<Output> =>
parser.oneOf([
oneOf([
theParser,
parser.map(
parser.sequence([parser1, theParser, parser2]),
([_1, output, _2]) => output,
),
map(sequence([parser1, theParser, parser2]), ([_1, output, _2]) => output),
])

export const optionallySurroundedByParentheses = <Output>(
theParser: Parser<Output>,
): Parser<Output> =>
parser.oneOf([
oneOf([
// This allows `theParser` to greedily consume trivia.
optionallySurroundedBy(
parser.literal('('),
literal('('),
theParser,
parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]),
sequence([zeroOrMore(trivia), literal(')')]),
),
optionallySurroundedBy(
parser.sequence([parser.literal('('), parser.zeroOrMore(trivia)]),
sequence([literal('('), zeroOrMore(trivia)]),
theParser,
parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]),
sequence([zeroOrMore(trivia), literal(')')]),
),
])
Loading
Loading