diff --git a/README.md b/README.md index 0bfb3df..5a688ab 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ data representation implied by the fact that a value is an atom (e.g. the atom `2` may be an integer in memory). Bare words not containing any -[reserved character sequences](./src/language/parsing/atom.ts#L32-L54) are +[reserved character sequences](./src/language/parsing/atom.ts#L24-L46) are atoms: ``` diff --git a/src/end-to-end.test.ts b/src/end-to-end.test.ts index 16c7541..0516888 100644 --- a/src/end-to-end.test.ts +++ b/src/end-to-end.test.ts @@ -53,6 +53,47 @@ testCases(endToEnd, code => code)('end-to-end tests', [ body: { 0: '@lookup', query: { 0: 'a' } }, }), ], + ['{ success }.0', either.makeRight('success')], + ['{ f: :identity }.f(success)', either.makeRight('success')], + ['{ f: :identity }.f({ a: success }).a', either.makeRight('success')], + [ + '{ f: :identity }.f({ g: :identity }).g({ a: success }).a', + either.makeRight('success'), + ], + ['{ a: { b: success } }.a.b', either.makeRight('success')], + [ + '{ a: { "b.c(d) e \\" {}": success } }.a."b.c(d) e \\" {}"', + either.makeRight('success'), + ], + ['(a => { b: :a }.b)(success)', either.makeRight('success')], + ['(a => { b: :a })(success).b', either.makeRight('success')], + ['{ success }/**/./**/0', either.makeRight('success')], + [ + ` + { a: { b: success } } // blah + // blah + .a // blah + // blah + .b // blah + `, + either.makeRight('success'), + ], + [ + `{ + a: { + b: { + c: z => { + d: y => x => { + e: { + f: w => { g: { :z :y :x :w } } + } + } + } + } + } + }.a.b.c(a).d(b)(c).e.f(d).g`, + either.makeRight({ 0: 'a', 1: 'b', 2: 'c', 3: 'd' }), + ], ['{ a: ({ A }) }', either.makeRight({ a: { 0: 'A' } })], ['{ a: ( A ) }', either.makeRight({ a: 'A' })], ['{ a: ("A A A") }', either.makeRight({ a: 'A A A' })], diff --git a/src/language/parsing/atom.ts b/src/language/parsing/atom.ts index c2c195e..99d094b 100644 --- a/src/language/parsing/atom.ts +++ b/src/language/parsing/atom.ts @@ -21,38 +21,64 @@ export const isAtom = (value: unknown): value is Atom => export const unit = '' as const +const atomComponentsRequiringQuotation = [ + whitespace, + literal('"'), + literal('{'), + literal('}'), + literal('['), + literal(']'), + literal('('), + literal(')'), + literal('<'), + literal('>'), + literal('#'), + literal('&'), + literal('|'), + literal('\\'), + literal('='), + literal(':'), + literal(';'), + literal(','), + literal('//'), + literal('/*'), + literal('*/'), +] as const + export const atomParser: Parser = optionallySurroundedByParentheses( lazy(() => oneOf([unquotedAtomParser, quotedAtomParser])), ) +export const atomWithAdditionalQuotationRequirements = ( + additionalQuoteRequiringComponent: Parser, +) => + optionallySurroundedByParentheses( + lazy(() => + oneOf([ + map( + oneOrMore( + butNot( + anySingleCharacter, + oneOf([ + ...atomComponentsRequiringQuotation, + additionalQuoteRequiringComponent, + ]), + 'a character sequence requiring quotation', + ), + ), + characters => characters.join(''), + ), + quotedAtomParser, + ]), + ), + ) + export const unquotedAtomParser = map( oneOrMore( butNot( anySingleCharacter, - oneOf([ - whitespace, - literal('"'), - literal('{'), - literal('}'), - literal('['), - literal(']'), - literal('('), - literal(')'), - literal('<'), - literal('>'), - literal('#'), - literal('&'), - literal('|'), - literal('\\'), - literal('='), - literal(':'), - literal(';'), - literal(','), - literal('//'), - literal('/*'), - literal('*/'), - ]), - 'a forbidden character sequence', + oneOf(atomComponentsRequiringQuotation), + 'a character sequence requiring quotation', ), ), characters => characters.join(''), diff --git a/src/language/parsing/molecule.ts b/src/language/parsing/molecule.ts index c8863fc..a592ccf 100644 --- a/src/language/parsing/molecule.ts +++ b/src/language/parsing/molecule.ts @@ -9,7 +9,12 @@ import { zeroOrMore, type Parser, } from '@matt.kantor/parsing' -import { atomParser, type Atom } from './atom.js' +import { keyPathToMolecule } from '../semantics.js' +import { + atomParser, + atomWithAdditionalQuotationRequirements, + type Atom, +} from './atom.js' import { optionallySurroundedByParentheses } from './parentheses.js' import { trivia } from './trivia.js' @@ -17,20 +22,9 @@ export type Molecule = { readonly [key: Atom]: Molecule | Atom } export const unit: Molecule = {} -export const moleculeParser: Parser = oneOf([ - optionallySurroundedByParentheses( - map( - lazy(() => moleculeAsEntries(makeIncrementingIndexer())), - Object.fromEntries, - ), - ), - lazy(() => sugaredApply), - lazy(() => sugaredFunction), -]) - -const optional = ( - parser: Parser>, -): Parser => oneOf([parser, nothing]) +export const moleculeParser: Parser = lazy( + () => potentiallySugaredMolecule, +) // Keyless properties are automatically assigned numeric indexes, which uses some mutable state. type Indexer = () => string @@ -44,65 +38,14 @@ const makeIncrementingIndexer = (): Indexer => { } } -const propertyDelimiter = oneOf([ - sequence([optional(trivia), literal(','), optional(trivia)]), - trivia, -]) - -const sugaredLookup: Parser = optionallySurroundedByParentheses( - map( - sequence([literal(':'), oneOf([atomParser, moleculeParser])]), - ([_colon, query]) => ({ 0: '@lookup', query }), - ), -) - -const sugaredFunction: Parser = optionallySurroundedByParentheses( - map( - sequence([ - atomParser, - trivia, - literal('=>'), - trivia, - lazy(() => propertyValue), - ]), - ([parameter, _trivia1, _arrow, _trivia2, body]) => ({ - 0: '@function', - parameter, - body, - }), - ), -) - -const sugaredApply: Parser = map( - sequence([ - oneOf([sugaredLookup, lazy(() => sugaredFunction)]), - oneOrMore( - sequence([ - literal('('), - optional(trivia), - lazy(() => propertyValue), - optional(trivia), - literal(')'), - ]), - ), - ]), - ([f, multipleArguments]) => - multipleArguments.reduce( - (expression, [_1, _2, argument, _3, _4]) => ({ - 0: '@apply', - function: expression, - argument, - }), - f, - ), -) +const optional = ( + parser: Parser>, +): Parser => oneOf([parser, nothing]) const propertyKey = atomParser const propertyValue = oneOf([ - sugaredApply, // must come first to avoid ambiguity - lazy(() => moleculeParser), // must come second to avoid ambiguity + lazy(() => potentiallySugaredMolecule), atomParser, - sugaredLookup, ]) const namedProperty = map( @@ -118,6 +61,33 @@ const property = (index: Indexer) => oneOf([namedProperty, numberedProperty(index)]), ) +const propertyDelimiter = oneOf([ + sequence([optional(trivia), literal(','), optional(trivia)]), + trivia, +]) + +const argument = map( + sequence([ + literal('('), + optional(trivia), + propertyValue, + optional(trivia), + literal(')'), + ]), + ([_openingParenthesis, _trivia1, argument, _trivia2, _closingParenthesis]) => + argument, +) + +const dottedKeyPathComponent = map( + sequence([ + optional(trivia), + literal('.'), + optional(trivia), + atomWithAdditionalQuotationRequirements(literal('.')), + ]), + ([_trivia1, _dot, _trivia2, key]) => key, +) + const moleculeAsEntries = ( index: Indexer, ): Parser => @@ -146,3 +116,95 @@ const moleculeAsEntries = ( ? remainingProperties : [optionalInitialProperty, ...remainingProperties], ) + +const sugarFreeMolecule: Parser = optionallySurroundedByParentheses( + map( + lazy(() => moleculeAsEntries(makeIncrementingIndexer())), + Object.fromEntries, + ), +) + +const sugaredLookup: Parser = optionallySurroundedByParentheses( + map( + sequence([literal(':'), oneOf([atomParser, sugarFreeMolecule])]), + ([_colon, query]) => ({ 0: '@lookup', query }), + ), +) + +const sugaredFunction: Parser = optionallySurroundedByParentheses( + map( + sequence([atomParser, trivia, literal('=>'), trivia, propertyValue]), + ([parameter, _trivia1, _arrow, _trivia2, body]) => ({ + 0: '@function', + parameter, + body, + }), + ), +) + +const potentiallySugaredMolecule: Parser = (() => { + // The awkward setup in here avoids infinite recursion when applying the mutually-dependent + // parsers for index and apply sugars. Indexes/applications can be chained to form + // arbitrarily-long expressions (e.g. `:a.b.c(d).e(f)(g).h.i(j).k`). + + const potentiallySugaredNonApply = map( + sequence([ + oneOf([sugaredLookup, sugaredFunction, sugarFreeMolecule]), + zeroOrMore(dottedKeyPathComponent), + ]), + ([object, keyPath]) => + keyPath.length === 0 + ? object + : { + 0: '@index', + object, + query: keyPathToMolecule(keyPath), + }, + ) + + const sugaredApplyWithOptionalTrailingIndexesAndApplies = map( + sequence([ + potentiallySugaredNonApply, + oneOrMore(argument), + zeroOrMore( + sequence([oneOrMore(dottedKeyPathComponent), zeroOrMore(argument)]), + ), + ]), + ([ + functionToApply, + multipleArguments, + trailingIndexQueriesAndApplyArguments, + ]) => { + const initialApply = multipleArguments.reduce( + (expression, argument) => ({ + 0: '@apply', + function: expression, + argument, + }), + functionToApply, + ) + + return trailingIndexQueriesAndApplyArguments.reduce( + (expression, [keyPath, possibleArguments]) => + possibleArguments.reduce( + (functionToApply, argument) => ({ + 0: '@apply', + function: functionToApply, + argument, + }), + { + 0: '@index', + object: expression, + query: keyPathToMolecule(keyPath), + }, + ), + initialApply, + ) + }, + ) + + return oneOf([ + sugaredApplyWithOptionalTrailingIndexesAndApplies, + potentiallySugaredNonApply, + ]) +})()