From f7b80519123312820487e0d0aba1606b2e56a72c Mon Sep 17 00:00:00 2001 From: Matt Kantor Date: Sun, 26 Jan 2025 08:29:16 -0500 Subject: [PATCH] Adopt @matt.kantor/parsing package Also import individual symbols rather than a namespace. --- package-lock.json | 10 ++ package.json | 1 + src/language/parsing/atom.ts | 90 +++++++------- src/language/parsing/molecule.ts | 103 ++++++++-------- src/language/parsing/parentheses.ts | 26 +++-- src/language/parsing/syntax-tree.ts | 18 ++- src/language/parsing/trivia.ts | 40 ++++--- src/parsing.ts | 5 - src/parsing/combinators.ts | 174 ---------------------------- src/parsing/constructors.ts | 52 --------- src/parsing/parser.ts | 19 --- 11 files changed, 165 insertions(+), 373 deletions(-) delete mode 100644 src/parsing.ts delete mode 100644 src/parsing/combinators.ts delete mode 100644 src/parsing/constructors.ts delete mode 100644 src/parsing/parser.ts diff --git a/package-lock.json b/package-lock.json index 6923b6f..5ee7dcd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@matt.kantor/either": "^1.0.0", "@matt.kantor/option": "^1.0.0", + "@matt.kantor/parsing": "^1.0.0", "kleur": "^4.1.5" }, "bin": { @@ -50,6 +51,15 @@ "integrity": "sha512-SfSHY47RmrQn7Dm6L9Tx4bmUzef4xDmOcQeFtgK5e/3NG/oWzNB6vZkDyBy7rDiBgk5o2Z99HutcEvDX9xTrfg==", "license": "MIT" }, + "node_modules/@matt.kantor/parsing": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@matt.kantor/parsing/-/parsing-1.0.0.tgz", + "integrity": "sha512-JRv4fjHGkDJJpimlsetUhJv6p2DQra0H/HrzqtN/zkY42fSOkA31nCQZSi1hs7ihBKDQl6VmnLN4ZWsyRdE5nA==", + "license": "MIT", + "dependencies": { + "@matt.kantor/either": "^1.0.0" + } + }, "node_modules/@types/node": { "version": "22.7.8", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.8.tgz", diff --git a/package.json b/package.json index 1bde3e2..2dfee27 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "dependencies": { "@matt.kantor/either": "^1.0.0", "@matt.kantor/option": "^1.0.0", + "@matt.kantor/parsing": "^1.0.0", "kleur": "^4.1.5" } } diff --git a/src/language/parsing/atom.ts b/src/language/parsing/atom.ts index 6a71837..aa8dc02 100644 --- a/src/language/parsing/atom.ts +++ b/src/language/parsing/atom.ts @@ -1,4 +1,16 @@ -import { parser, type Parser } from '../../parsing.js' +import { + type Parser, + anySingleCharacter, + as, + butNot, + lazy, + literal, + map, + oneOf, + oneOrMore, + sequence, + zeroOrMore, +} from '@matt.kantor/parsing' import { optionallySurroundedByParentheses } from './parentheses.js' import { whitespace } from './trivia.js' @@ -10,57 +22,57 @@ export const isAtom = (value: unknown): value is Atom => export const unit = '' as const export const atomParser: Parser = optionallySurroundedByParentheses( - parser.lazy(() => parser.oneOf([quotedAtom, unquotedAtom])), + lazy(() => oneOf([quotedAtom, unquotedAtom])), ) -const quotedAtom = parser.map( - parser.sequence([ - parser.as(parser.literal('"'), ''), - parser.map( - parser.zeroOrMore( - parser.oneOf([ - parser.butNot( - parser.anySingleCharacter, - parser.oneOf([parser.literal('"'), parser.literal('\\')]), +const quotedAtom = map( + sequence([ + as(literal('"'), ''), + map( + zeroOrMore( + oneOf([ + butNot( + anySingleCharacter, + oneOf([literal('"'), literal('\\')]), '`"` or `\\`', ), - parser.as(parser.literal('\\"'), '"'), - parser.as(parser.literal('\\\\'), '\\'), + as(literal('\\"'), '"'), + as(literal('\\\\'), '\\'), ]), ), output => output.join(''), ), - parser.as(parser.literal('"'), ''), + as(literal('"'), ''), ]), ([_1, contents, _2]) => contents, ) -const unquotedAtom = parser.map( - parser.oneOrMore( - parser.butNot( - parser.anySingleCharacter, - parser.oneOf([ +const unquotedAtom = map( + oneOrMore( + butNot( + anySingleCharacter, + oneOf([ whitespace, - parser.literal('"'), - parser.literal('{'), - parser.literal('}'), - parser.literal('['), - parser.literal(']'), - parser.literal('('), - parser.literal(')'), - parser.literal('<'), - parser.literal('>'), - parser.literal('#'), - parser.literal('&'), - parser.literal('|'), - parser.literal('\\'), - parser.literal('='), - parser.literal(':'), - parser.literal(';'), - parser.literal(','), - parser.literal('//'), - parser.literal('/*'), - parser.literal('*/'), + literal('"'), + literal('{'), + literal('}'), + literal('['), + literal(']'), + literal('('), + literal(')'), + literal('<'), + literal('>'), + literal('#'), + literal('&'), + literal('|'), + literal('\\'), + literal('='), + literal(':'), + literal(';'), + literal(','), + literal('//'), + literal('/*'), + literal('*/'), ]), 'a forbidden character sequence', ), diff --git a/src/language/parsing/molecule.ts b/src/language/parsing/molecule.ts index b970714..b361180 100644 --- a/src/language/parsing/molecule.ts +++ b/src/language/parsing/molecule.ts @@ -1,4 +1,15 @@ -import { parser, type Parser } from '../../parsing.js' +import { + as, + lazy, + literal, + map, + nothing, + oneOf, + oneOrMore, + sequence, + zeroOrMore, + type Parser, +} from '@matt.kantor/parsing' import { atomParser, type Atom } from './atom.js' import { optionallySurroundedByParentheses } from './parentheses.js' import { trivia } from './trivia.js' @@ -7,32 +18,32 @@ export type Molecule = { readonly [key: Atom]: Molecule | Atom } export const unit: Molecule = {} -export const moleculeParser: Parser = parser.oneOf([ +export const moleculeParser: Parser = oneOf([ optionallySurroundedByParentheses( - parser.map( - parser.lazy(() => moleculeAsEntries(makeIncrementingIndexer())), + map( + lazy(() => moleculeAsEntries(makeIncrementingIndexer())), Object.fromEntries, ), ), - parser.lazy(() => sugaredApply), - parser.lazy(() => sugaredFunction), + lazy(() => sugaredApply), + lazy(() => sugaredFunction), ]) // During parsing molecules and properties are represented as nested arrays (of key/value pairs). // The following utilities make it easier to work with such a structure. const flat = (theParser: Parser) => - parser.map(theParser, output => output.flat()) + map(theParser, output => output.flat()) -const omit = (theParser: Parser) => parser.as(theParser, []) +const omit = (theParser: Parser) => as(theParser, []) const optional = ( theParser: Parser, -): Parser => parser.oneOf([theParser, omit(parser.nothing)]) +): Parser => oneOf([theParser, omit(nothing)]) const withoutOmittedOutputs = ( theParser: Parser, -) => parser.map(theParser, output => output.filter(output => output.length > 0)) +) => map(theParser, output => output.filter(output => output.length > 0)) // Keyless properties are automatically assigned numeric indexes, which uses some mutable state. type Indexer = () => string @@ -48,37 +59,30 @@ const makeIncrementingIndexer = (): Indexer => { // Language-specific parsers follow. -const propertyDelimiter = parser.oneOf([ - parser.sequence([ - optional(omit(trivia)), - parser.literal(','), - optional(omit(trivia)), - ]), +const propertyDelimiter = oneOf([ + sequence([optional(omit(trivia)), literal(','), optional(omit(trivia))]), trivia, ]) const sugaredLookup: Parser = optionallySurroundedByParentheses( - parser.map( - parser.sequence([ - parser.literal(':'), - parser.oneOf([atomParser, moleculeParser]), - ]), + map( + sequence([literal(':'), oneOf([atomParser, moleculeParser])]), ([_colon, query]) => ({ 0: '@lookup', query }), ), ) const sugaredFunction: Parser = optionallySurroundedByParentheses( - parser.map( + map( flat( - parser.sequence([ - parser.map(atomParser, output => [output]), + sequence([ + map(atomParser, output => [output]), omit(trivia), - omit(parser.literal('=>')), + omit(literal('=>')), omit(trivia), - parser.map( - parser.lazy(() => propertyValue), + map( + lazy(() => propertyValue), output => [output], ), ]), @@ -91,16 +95,16 @@ const sugaredFunction: Parser = ), ) -const sugaredApply: Parser = parser.map( - parser.sequence([ - parser.oneOf([sugaredLookup, parser.lazy(() => sugaredFunction)]), - parser.oneOrMore( - parser.sequence([ - parser.literal('('), +const sugaredApply: Parser = map( + sequence([ + oneOf([sugaredLookup, lazy(() => sugaredFunction)]), + oneOrMore( + sequence([ + literal('('), optional(omit(trivia)), - parser.lazy(() => propertyValue), + lazy(() => propertyValue), optional(omit(trivia)), - parser.literal(')'), + literal(')'), ]), ), ]), @@ -116,52 +120,49 @@ const sugaredApply: Parser = parser.map( ) const propertyKey = atomParser -const propertyValue = parser.oneOf([ +const propertyValue = oneOf([ sugaredApply, // must come first to avoid ambiguity - parser.lazy(() => moleculeParser), // must come second to avoid ambiguity + lazy(() => moleculeParser), // must come second to avoid ambiguity atomParser, sugaredLookup, ]) const namedProperty = flat( - parser.sequence([ + sequence([ propertyKey, - omit(parser.literal(':')), + omit(literal(':')), optional(omit(trivia)), propertyValue, ]), ) const numberedProperty = (index: Indexer) => - parser.map(propertyValue, value => [index(), value]) + map(propertyValue, value => [index(), value]) const property = (index: Indexer) => optionallySurroundedByParentheses( - parser.oneOf([namedProperty, numberedProperty(index)]), + oneOf([namedProperty, numberedProperty(index)]), ) const moleculeAsEntries = (index: Indexer) => withoutOmittedOutputs( flat( - parser.sequence([ - omit(parser.literal('{')), + sequence([ + omit(literal('{')), // Allow initial property not preceded by a delimiter (e.g. `{a b}`). - parser.map(optional(property(index)), property => [property]), - parser.zeroOrMore( + map(optional(property(index)), property => [property]), + zeroOrMore( flat( - parser.sequence([ - omit(propertyDelimiter), - parser.lazy(() => property(index)), - ]), + sequence([omit(propertyDelimiter), lazy(() => property(index))]), ), ), optional(omit(propertyDelimiter)), - omit(parser.literal('}')), + omit(literal('}')), ]), ), ) -// This is a lazy workaround for `parser.sequence` returning an array rather than a tuple with +// This is a lazy workaround for `sequence` returning an array rather than a tuple with // definitely-present elements. type PartialMolecule = { readonly [key: Atom]: PartialMolecule | Atom | undefined diff --git a/src/language/parsing/parentheses.ts b/src/language/parsing/parentheses.ts index abbab53..cd80c17 100644 --- a/src/language/parsing/parentheses.ts +++ b/src/language/parsing/parentheses.ts @@ -1,4 +1,11 @@ -import { parser, type Parser } from '../../parsing.js' +import { + literal, + map, + oneOf, + sequence, + zeroOrMore, + type Parser, +} from '@matt.kantor/parsing' import { trivia } from './trivia.js' const optionallySurroundedBy = ( @@ -6,27 +13,24 @@ const optionallySurroundedBy = ( theParser: Parser, parser2: Parser, ): Parser => - parser.oneOf([ + oneOf([ theParser, - parser.map( - parser.sequence([parser1, theParser, parser2]), - ([_1, output, _2]) => output, - ), + map(sequence([parser1, theParser, parser2]), ([_1, output, _2]) => output), ]) export const optionallySurroundedByParentheses = ( theParser: Parser, ): Parser => - parser.oneOf([ + oneOf([ // This allows `theParser` to greedily consume trivia. optionallySurroundedBy( - parser.literal('('), + literal('('), theParser, - parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]), + sequence([zeroOrMore(trivia), literal(')')]), ), optionallySurroundedBy( - parser.sequence([parser.literal('('), parser.zeroOrMore(trivia)]), + sequence([literal('('), zeroOrMore(trivia)]), theParser, - parser.sequence([parser.zeroOrMore(trivia), parser.literal(')')]), + sequence([zeroOrMore(trivia), literal(')')]), ), ]) diff --git a/src/language/parsing/syntax-tree.ts b/src/language/parsing/syntax-tree.ts index eca9d37..a3440da 100644 --- a/src/language/parsing/syntax-tree.ts +++ b/src/language/parsing/syntax-tree.ts @@ -1,5 +1,11 @@ import option, { type Option } from '@matt.kantor/option' -import { parser, type Parser } from '../../parsing.js' +import { + map, + oneOf, + sequence, + zeroOrMore, + type Parser, +} from '@matt.kantor/parsing' import { withPhantomData, type WithPhantomData } from '../../phantom-data.js' import type { JsonArray, @@ -76,11 +82,11 @@ type JsonRecordForbiddingSymbolicKeys = { readonly [key: symbol]: undefined }> -export const syntaxTreeParser: Parser = parser.map( - parser.sequence([ - parser.zeroOrMore(trivia), - parser.oneOf([atomParser, moleculeParser]), - parser.zeroOrMore(trivia), +export const syntaxTreeParser: Parser = map( + sequence([ + zeroOrMore(trivia), + oneOf([atomParser, moleculeParser]), + zeroOrMore(trivia), ]), ([_leadingTrivia, syntaxTree, _trailingTrivia]) => canonicalize(syntaxTree), ) diff --git a/src/language/parsing/trivia.ts b/src/language/parsing/trivia.ts index e8f534e..89ffa3c 100644 --- a/src/language/parsing/trivia.ts +++ b/src/language/parsing/trivia.ts @@ -1,25 +1,33 @@ -import { parser } from '../../parsing.js' +import { + anySingleCharacter, + butNot, + literal, + lookaheadNot, + oneOf, + oneOrMore, + regularExpression, + sequence, + zeroOrMore, +} from '@matt.kantor/parsing' -const blockComment = parser.sequence([ - parser.literal('/*'), - parser.zeroOrMore( - parser.oneOf([ - parser.butNot(parser.anySingleCharacter, parser.literal('*'), '*'), - parser.lookaheadNot(parser.literal('*'), parser.literal('/'), '/'), +const blockComment = sequence([ + literal('/*'), + zeroOrMore( + oneOf([ + butNot(anySingleCharacter, literal('*'), '*'), + lookaheadNot(literal('*'), literal('/'), '/'), ]), ), - parser.literal('*/'), + literal('*/'), ]) -const singleLineComment = parser.sequence([ - parser.literal('//'), - parser.zeroOrMore( - parser.butNot(parser.anySingleCharacter, parser.literal('\n'), 'newline'), - ), +const singleLineComment = sequence([ + literal('//'), + zeroOrMore(butNot(anySingleCharacter, literal('\n'), 'newline')), ]) -export const whitespace = parser.regularExpression(/\s+/) +export const whitespace = regularExpression(/\s+/) -export const trivia = parser.oneOrMore( - parser.oneOf([whitespace, singleLineComment, blockComment]), +export const trivia = oneOrMore( + oneOf([whitespace, singleLineComment, blockComment]), ) diff --git a/src/parsing.ts b/src/parsing.ts deleted file mode 100644 index 6dc87ba..0000000 --- a/src/parsing.ts +++ /dev/null @@ -1,5 +0,0 @@ -import * as combinators from './parsing/combinators.js' -import * as constructors from './parsing/constructors.js' - -export const parser = { ...combinators, ...constructors } -export type { Parser } from './parsing/parser.js' diff --git a/src/parsing/combinators.ts b/src/parsing/combinators.ts deleted file mode 100644 index 3e40916..0000000 --- a/src/parsing/combinators.ts +++ /dev/null @@ -1,174 +0,0 @@ -import either from '@matt.kantor/either' -import { nothing } from './constructors.js' -import type { AlwaysSucceedingParser, Parser, Success } from './parser.js' - -export const as = - ( - parser: Parser, - newOutput: NewOutput, - ): Parser => - input => - either.map(parser(input), success => ({ - output: newOutput, - remainingInput: success.remainingInput, - })) - -export const butNot = - ( - parser: Parser, - not: Parser, - notName: string, - ): Parser => - input => - either.flatMap(parser(input), success => { - const notResult = not(input) - if (!either.isLeft(notResult)) { - return either.makeLeft({ - input, - message: `input was unexpectedly ${notName}`, - }) - } else { - return either.makeRight(success) - } - }) - -export const lazy = - (parser: () => Parser): Parser => - input => - parser()(input) - -export const lookaheadNot = - ( - parser: Parser, - notFollowedBy: Parser, - followedByName: string, - ): Parser => - input => - either.flatMap(parser(input), success => - either.match(notFollowedBy(success.remainingInput), { - left: _ => either.makeRight(success), - right: _ => - either.makeLeft({ - input, - message: `input was unexpectedly followed by ${followedByName}`, - }), - }), - ) - -export const map = - ( - parser: Parser, - f: (output: Output) => NewOutput, - ): Parser => - input => - either.map(parser(input), success => ({ - output: f(success.output), - remainingInput: success.remainingInput, - })) - -/** - * Apply the given `parsers` to the same input until one succeeds or all fail. - */ -export const oneOf = - < - Parsers extends readonly [ - Parser, - Parser, - ...(readonly Parser[]), - ], - >( - parsers: Parsers, - ): Parser> => - input => - parsers.reduce( - (result: ReturnType>>, parser) => - either.match(result, { - right: either.makeRight, - left: _ => parser(input), - }), - either.makeLeft({ input, message: '' }), // `parsers` is non-empty so this is never returned - ) -type OneOfOutput[]> = { - [Index in keyof Parsers]: OutputOf -}[number] - -export const oneOrMore = ( - parser: Parser, -): Parser => - map(sequence([map(parser, output => [output]), zeroOrMore(parser)]), output => - output.flat(), - ) - -/** - * Apply the given `parsers` in order to the input, requiring all to succeed. - */ -export const sequence = - < - const Parsers extends readonly [ - Parser, - Parser, - ...(readonly Parser[]), - ], - >( - parsers: Parsers, - ): Parser> => - input => - either.map( - parsers.reduce( - ( - results: ReturnType< - Parser[number][]> - >, - parser, - ) => - either.match(results, { - right: successes => - either.map(parser(successes.remainingInput), newSuccess => ({ - remainingInput: newSuccess.remainingInput, - output: [...successes.output, newSuccess.output], - })), - left: either.makeLeft, - }), - either.makeRight({ remainingInput: input, output: [] }), // `parsers` is non-empty so this is never returned - ), - // The above `reduce` constructs the output to be concordant with this type, but TypeScript - // doesn't know that. - // TODO: Consider tightening `reduce`'s signature instead. - output => output as Success>, - ) - -type SequenceOutput[]> = { - [Index in keyof Parsers]: OutputOf -} - -export const zeroOrMore = - (parser: Parser): AlwaysSucceedingParser => - input => { - const result = oneOf([parser, nothing])(input) - const success = either.match(result, { - left: _ => ({ - output: [], - remainingInput: input, - }), - right: lastSuccess => { - if (lastSuccess.output === undefined) { - return { - output: [], - remainingInput: lastSuccess.remainingInput, - } - } else { - const nextResult = zeroOrMore(parser)(lastSuccess.remainingInput) - return { - output: [lastSuccess.output, ...nextResult.value.output], - remainingInput: nextResult.value.remainingInput, - } - } - }, - }) - return either.makeRight(success) - } - -type OutputOf> = Extract< - ReturnType['value'], - Success ->['output'] diff --git a/src/parsing/constructors.ts b/src/parsing/constructors.ts deleted file mode 100644 index b761cdc..0000000 --- a/src/parsing/constructors.ts +++ /dev/null @@ -1,52 +0,0 @@ -import either from '@matt.kantor/either' -import type { AlwaysSucceedingParser, Parser } from './parser.js' - -export const anySingleCharacter: Parser = input => { - const firstCodePoint = input.codePointAt(0) - if (firstCodePoint === undefined) { - return either.makeLeft({ - input, - message: 'input was empty', - }) - } else { - const firstCharacter = String.fromCodePoint(firstCodePoint) - return either.makeRight({ - output: firstCharacter, - remainingInput: input.slice(firstCharacter.length), - }) - } -} - -export const literal = - (text: string): Parser => - input => - input.startsWith(text) - ? either.makeRight({ - remainingInput: input.slice(text.length), - output: text, - }) - : either.makeLeft({ - input, - message: `input did not begin with \`${text}\``, - }) - -export const nothing: AlwaysSucceedingParser = input => - either.makeRight({ - remainingInput: input, - output: undefined, - }) - -export const regularExpression = - (pattern: RegExp): Parser => - input => { - const match = pattern.exec(input) - return match === null || match.index !== 0 - ? either.makeLeft({ - input, - message: 'input did not match regular expression', - }) - : either.makeRight({ - remainingInput: input.slice(match[0].length), - output: match[0], - }) - } diff --git a/src/parsing/parser.ts b/src/parsing/parser.ts deleted file mode 100644 index 240e776..0000000 --- a/src/parsing/parser.ts +++ /dev/null @@ -1,19 +0,0 @@ -import type { Either, Right } from '@matt.kantor/either' - -export type Success = { - readonly remainingInput: string - readonly output: Output -} - -export type InvalidInputError = { - readonly input: string - readonly message: string -} - -export type Parser = ( - input: string, -) => Either> - -export type AlwaysSucceedingParser = ( - input: string, -) => Right>