diff --git a/README.md b/README.md index da65701..02fe61d 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,34 @@ Functions can be applied: } ``` +Infix notation can be used to apply binary functions (those which look like +`b => a => …`). For example, the expression `x f y` desugars to `:f(y)(x)`. +Here's another example: + +``` +{ + cons: b => a => { :a, :b } + list: 1 cons (2 cons 3) // evaluates to `{ 1, { 2, 3 } }` +} +``` + +The standard library contains symbolically-named functions for arithmetic and +other familiar binary operations. For example, `1 + 2 - 3` is `0`. Also included +in the standard library are the functions`|>` (pipe) and `>>` (flow): + +``` +{ + // `>>` composes functions + append_bc: :atom.append(b) >> :atom.append(c) + + // `|>` pipes an argument into a function + abc: a |> :append_bc +} +``` + +All binary operations are currently left-associative and there is no operator +precedence. Use of parentheses is encouraged. + #### Keywords The functions and lookups shown above are syntax sugar for _keyword @@ -197,7 +225,7 @@ possible. For example, this program compiles to the literal value `2` (no computation will occur at runtime): ``` -:integer.add(1)(1) +1 + 1 ``` There's currently no module system and all Please programs are single files, but diff --git a/examples/fibonacci.plz b/examples/fibonacci.plz index 496ac01..6c90536 100644 --- a/examples/fibonacci.plz +++ b/examples/fibonacci.plz @@ -1,24 +1,23 @@ { fibonacci: n => { - @if, :integer.less_than(2)(:n) + @if + condition: :n < 2 then: :n - else: :integer.add( - :fibonacci(:integer.subtract(2)(:n)) - )( - :fibonacci(:integer.subtract(1)(:n)) - ) + else: :fibonacci(:n - 1) + :fibonacci(:n - 2) } - input: { @runtime, context => :context.arguments.lookup(input) } + input: { + @runtime + context => :context.arguments.lookup(input) + } - output: :apply(:input)( - :match({ - none: _ => "missing input argument" - some: input => { - @if, :natural_number.is(:input) - then: :fibonacci(:input) - else: "input must be a natural number" - } - }) - ) + output: :input match { + none: _ => "missing input argument" + some: input => { + @if + condition: :natural_number.is(:input) + then: :fibonacci(:input) + else: "input must be a natural number" + } + } }.output diff --git a/examples/kitchen-sink.plz b/examples/kitchen-sink.plz index f3b08fa..7548407 100644 --- a/examples/kitchen-sink.plz +++ b/examples/kitchen-sink.plz @@ -4,7 +4,7 @@ bar: :foo sky_is_blue: :boolean.not(false) colors: { red, green, blue } - two: :integer.add(1)(1) + two: 1 + 1 add_one: :integer.add(1) three: :add_one(:two) function: x => { value: :x } diff --git a/examples/lookup-environment-variable.plz b/examples/lookup-environment-variable.plz index 3670d55..0db7fa2 100644 --- a/examples/lookup-environment-variable.plz +++ b/examples/lookup-environment-variable.plz @@ -1,17 +1,15 @@ -{@runtime, context => - :flow( - :match({ +/** + * Given CLI arguments like `--variable=FOO`, looks up the environment + * variable named `FOO`. + */ +{ + @runtime + context => + :context.arguments.lookup(variable) match { none: {} - some: :flow( - :match({ - none: {} - some: :identity - }) - )( - :context.environment.lookup - ) - }) - )( - :context.arguments.lookup - )(variable) + some: :context.environment.lookup >> :match({ + none: {} + some: :identity + }) + } } diff --git a/src/end-to-end.test.ts b/src/end-to-end.test.ts index cf6b886..1fc9f68 100644 --- a/src/end-to-end.test.ts +++ b/src/end-to-end.test.ts @@ -19,6 +19,7 @@ testCases(endToEnd, code => code)('end-to-end tests', [ ['""', either.makeRight('')], ['{}', either.makeRight({})], ['hi', either.makeRight('hi')], + ['1.1', either.makeRight('1.1')], ['{{{}}}', either.makeRight({ 0: { 0: {} } })], ['"hello world"', either.makeRight('hello world')], ['{foo:bar}', either.makeRight({ foo: 'bar' })], @@ -36,6 +37,10 @@ testCases(endToEnd, code => code)('end-to-end tests', [ assert.deepEqual(result.value.kind, 'panic') }, ], + ['{a:A, b:{@lookup, a}}', either.makeRight({ a: 'A', b: 'A' })], + ['{a:A, {@lookup, a}}', either.makeRight({ a: 'A', 0: 'A' })], + ['{a:A, b: :a}', either.makeRight({ a: 'A', b: 'A' })], + ['{a:A, :a}', either.makeRight({ a: 'A', 0: 'A' })], [ '{@runtime, _ => {@panic}}', result => { @@ -44,25 +49,15 @@ testCases(endToEnd, code => code)('end-to-end tests', [ assert.deepEqual(result.value.kind, 'panic') }, ], - ['{a:A, b:{@lookup, a}}', either.makeRight({ a: 'A', b: 'A' })], - ['{a:A, b: :a}', either.makeRight({ a: 'A', b: 'A' })], - ['{a:A, {@lookup, a}}', either.makeRight({ a: 'A', 0: 'A' })], - ['{a:A, :a}', either.makeRight({ a: 'A', 0: 'A' })], - ['{ a: (a => :a)(A) }', either.makeRight({ a: 'A' })], - ['{ a: ( a => :a )( A ) }', either.makeRight({ a: 'A' })], - ['(a => :a)(A)', either.makeRight('A')], [ - '{ a: a => :a, b: :a(A) }', - result => { - if (either.isLeft(result)) { - assert.fail(result.value.message) - } - assert(typeof result.value === 'object') - assert.deepEqual(result.value['b'], 'A') - }, + 'a => :a', + either.makeRight({ + 0: '@function', + parameter: 'a', + body: { 0: '@lookup', key: 'a' }, + }), ], [ - // TODO: Should functions be implicitly serialized? Or should this be an error? '(a => :a)', either.makeRight({ 0: '@function', @@ -70,47 +65,6 @@ testCases(endToEnd, code => code)('end-to-end tests', [ body: { 0: '@lookup', key: 'a' }, }), ], - ['{ success }.0', either.makeRight('success')], - ['{ f: :identity }.f(success)', either.makeRight('success')], - ['{ f: :identity }.f({ a: success }).a', either.makeRight('success')], - [ - '{ f: :identity }.f({ g: :identity }).g({ a: success }).a', - either.makeRight('success'), - ], - ['{ a: { b: success } }.a.b', either.makeRight('success')], - [ - '{ a: { "b.c(d) e \\" {}": success } }.a."b.c(d) e \\" {}"', - either.makeRight('success'), - ], - ['(a => { b: :a }.b)(success)', either.makeRight('success')], - ['(a => { b: :a })(success).b', either.makeRight('success')], - ['{ success }/**/./**/0', either.makeRight('success')], - [ - ` - { a: { b: success } } // blah - // blah - .a // blah - // blah - .b // blah - `, - either.makeRight('success'), - ], - [ - `{ - a: { - b: { - c: z => { - d: y => x => { - e: { - f: w => { g: { :z, :y, :x, :w, } } - } - } - } - } - } - }.a.b.c(a).d(b)(c).e.f(d).g`, - either.makeRight({ 0: 'a', 1: 'b', 2: 'c', 3: 'd' }), - ], ['{ a: ({ A }) }', either.makeRight({ a: { 0: 'A' } })], ['{ a: ( A ) }', either.makeRight({ a: 'A' })], ['{ a: ("A A A") }', either.makeRight({ a: 'A A A' })], @@ -127,13 +81,6 @@ testCases(endToEnd, code => code)('end-to-end tests', [ `/**/{/**/a:/**/b/**/,/**/c:/**/d/**/}/**/`, either.makeRight({ a: 'b', c: 'd' }), ], - [ - `/**/(/**/a/**/=>/**/:a/**/)(/**/output/**/)/**/`, - either.makeRight('output'), - ], - [':match({ a: A })({ tag: a, value: {} })', either.makeRight('A')], - [':atom.prepend(a)(b)', either.makeRight('ab')], - [':flow(:atom.append(b))(:atom.append(a))(z)', either.makeRight('zab')], [ `{ // foo: bar @@ -152,61 +99,63 @@ testCases(endToEnd, code => code)('end-to-end tests', [ 'evaluated data': { tag: 'none', value: {} }, }), ], + ['(a => :a)(A)', either.makeRight('A')], + ['{ a: (a => :a)(A) }', either.makeRight({ a: 'A' })], + ['{ a: ( a => :a )( A ) }', either.makeRight({ a: 'A' })], + ['(_ => B)(A)', either.makeRight('B')], + ['{ success }.0', either.makeRight('success')], + ['{ f: :identity }.f(success)', either.makeRight('success')], + ['{ f: :identity }.f({ a: success }).a', either.makeRight('success')], [ - `{@runtime - :object.lookup("key which does not exist in runtime context") - }`, - either.makeRight({ tag: 'none', value: {} }), + '{ f: :identity }.f({ g: :identity }).g({ a: success }).a', + either.makeRight('success'), ], + ['{ a: { b: success } }.a.b', either.makeRight('success')], [ - `{@runtime, :flow( - :match({ - none: "environment does not exist" - some: :flow( - :match({ - none: "environment.lookup does not exist" - some: :apply(PATH) - }) - )( - :object.lookup(lookup) - ) - }) - )( - :object.lookup(environment) - )}`, - output => { - if (either.isLeft(output)) { - assert.fail(output.value.message) - } - assert(typeof output.value === 'object') - assert.deepEqual(output.value['tag'], 'some') - assert.deepEqual(typeof output.value['value'], 'string') - }, + '{ a: { "b.c(d) e \\" {}": success } }.a."b.c(d) e \\" {}"', + either.makeRight('success'), ], + ['(a => { b: :a }.b)(success)', either.makeRight('success')], + ['(a => { b: :a })(success).b', either.makeRight('success')], + ['{ success }/**/./**/0', either.makeRight('success')], [ - `{@runtime, context => - :identity(:context).program.start_time - }`, - output => { - if (either.isLeft(output)) { - assert.fail(output.value.message) + ` + { a: { b: success } } // blah + // blah + .a // blah + // blah + .b // blah + `, + either.makeRight('success'), + ], + [ + `/**/(/**/a/**/=>/**/:a/**/)(/**/output/**/)/**/`, + either.makeRight('output'), + ], + [':identity(output)', either.makeRight('output')], + [ + '{ a: a => :a, b: :a(A) }', + result => { + if (either.isLeft(result)) { + assert.fail(result.value.message) } - assert(typeof output.value === 'string') + assert(typeof result.value === 'object') + assert.deepEqual(result.value['b'], 'A') }, ], + [':match({ a: A })({ tag: a, value: {} })', either.makeRight('A')], [ `{@runtime, context => - :context.environment.lookup(PATH) + :identity(:context).program.start_time }`, output => { if (either.isLeft(output)) { assert.fail(output.value.message) } - assert(typeof output.value === 'object') - assert.deepEqual(output.value['tag'], 'some') - assert.deepEqual(typeof output.value['value'], 'string') + assert(typeof output.value === 'string') }, ], + [':atom.prepend(a)(b)', either.makeRight('ab')], [`:natural_number.add(1)(1)`, either.makeRight('2')], [ `:natural_number.add(one)(juan)`, @@ -215,8 +164,20 @@ testCases(endToEnd, code => code)('end-to-end tests', [ }, ], [`:integer.add(42)(-1)`, either.makeRight('41')], + [`42 + -1`, either.makeRight('41')], [`:integer.subtract(-1)(-1)`, either.makeRight('0')], - [`:integer.subtract(1)(2)`, either.makeRight('1')], + [`-1 - -1`, either.makeRight('0')], + [`2 - 1`, either.makeRight('1')], + [`1 - 2 - 3`, either.makeRight('-4')], + [`1 - (2 - 3)`, either.makeRight('2')], + [`(1 - 2) - 3`, either.makeRight('-4')], + [':flow(:atom.append(b))(:atom.append(a))(z)', either.makeRight('zab')], + [ + `{@runtime + :object.lookup("key which does not exist in runtime context") + }`, + either.makeRight({ tag: 'none', value: {} }), + ], [ `:object.lookup(output)({ add_one: :integer.add(1) @@ -243,6 +204,79 @@ testCases(endToEnd, code => code)('end-to-end tests', [ }`, either.makeRight({ true: 'true', false: 'false' }), ], + [ + `{@runtime, :flow( + :match({ + none: "environment does not exist" + some: :flow( + :match({ + none: "environment.lookup does not exist" + some: :apply(PATH) + }) + )( + :object.lookup(lookup) + ) + }) + )( + :object.lookup(environment) + )}`, + output => { + if (either.isLeft(output)) { + assert.fail(output.value.message) + } + assert(typeof output.value === 'object') + assert.deepEqual(output.value['tag'], 'some') + assert.deepEqual(typeof output.value['value'], 'string') + }, + ], + [ + `(a => b => c => { :a, :b, :c })(0)(1)(2)`, + either.makeRight({ 0: 0, 1: 1, 2: 2 }), + ], + [ + `{ + a: { + b: { + c: z => { + d: y => x => { + e: { + f: w => { g: { :z, :y, :x, :w, } } + } + } + } + } + } + }.a.b.c(a).d(b)(c).e.f(d).g`, + either.makeRight({ 0: 'a', 1: 'b', 2: 'c', 3: 'd' }), + ], + [ + `{@runtime, context => + :context.environment.lookup(PATH) + }`, + output => { + if (either.isLeft(output)) { + assert.fail(output.value.message) + } + assert(typeof output.value === 'object') + assert.deepEqual(output.value['tag'], 'some') + assert.deepEqual(typeof output.value['value'], 'string') + }, + ], + [ + `{@if, true + "it works!" + {@panic} + }`, + either.makeRight('it works!'), + ], + [ + `{ + a + b + c + }`, + either.makeRight({ 0: 'a', 1: 'b', 2: 'c' }), + ], [ `{@runtime, context => {@if, :boolean.not(:boolean.is(:context)) @@ -257,14 +291,153 @@ testCases(endToEnd, code => code)('end-to-end tests', [ fibonacci: n => { @if, :integer.less_than(2)(:n) then: :n - else: :integer.add( - :fibonacci(:integer.subtract(2)(:n)) - )( - :fibonacci(:integer.subtract(1)(:n)) - ) + else: :fibonacci(:n - 1) + :fibonacci(:n - 2) } result: :fibonacci(10) }.result`, either.makeRight('55'), ], + [ + `{ + +: a => b => :integer.add(:a)(:b) + result: 1 + 1 + }.result`, + either.makeRight('2'), + ], + [`1 + 1`, either.makeRight('2')], + [`1 integer.add 1`, either.makeRight('2')], + [`(1 + 1)`, either.makeRight('2')], + [`(2 - 1) + (4 - 2)`, either.makeRight('3')], + [`0 < 1`, either.makeRight('true')], + [`1 > 0`, either.makeRight('true')], + [`0 < 0`, either.makeRight('false')], + [`0 > 0`, either.makeRight('false')], + [`1 < 0`, either.makeRight('false')], + [`0 > 1`, either.makeRight('false')], + [`(a => (1 + :a))(1)`, either.makeRight('2')], + [`2 |> (a => :a)`, either.makeRight('2')], + [`a atom.append b atom.append c`, either.makeRight('abc')], + [`b atom.append c atom.prepend a`, either.makeRight('abc')], + [`(b atom.append c) atom.prepend a`, either.makeRight('abc')], + [`a atom.append (c atom.prepend b)`, either.makeRight('abc')], + [ + `1 + + 2 + + 3 + + 4`, + either.makeRight('10'), + ], + [ + `1 + + 2 + + 3 + + 4`, + either.makeRight('10'), + ], + [`{ f: _ => 1 + 1 }.f(whatever)`, either.makeRight('2')], + [ + `{ + one: 1 + two: :one + :one + }.two`, + either.makeRight('2'), + ], + [ + `{@runtime, context => + ( + PATH + |> :context.environment.lookup + |> :match({ + none: _ => "$PATH not set" + some: :atom.prepend("PATH=") + }) + ) + }`, + result => { + if (either.isLeft(result)) { + assert.fail(result.value.message) + } + const output = result.value + assert(typeof output === 'string') + assert(output.startsWith('PATH=')) + }, + ], + [ + `{ + one: 1 + two: 2 + three: 3 + four: 4 + ten: :one + :two + :three + :four + }.ten`, + either.makeRight('10'), + ], + [ + `{ + add_ten: :integer.add(1) >> :integer.add(9) + }.add_ten(0)`, + either.makeRight('10'), + ], + [ + `( + :+(1) + >> :+(2) + >> :+(3) + >> :+(4) + )(0)`, + either.makeRight('10'), + ], + + [ + `( + :+(1) >> + :+(2) >> + :+(3) >> + :+(4) + )(0)`, + either.makeRight('10'), + ], + [`a |> :atom.append(b) |> :atom.append(c)`, either.makeRight('abc')], + [`a |> (:atom.append(b) >> :atom.append(c))`, either.makeRight('abc')], + [`:|>(:>>(:atom.append(c))(:atom.append(b)))(a)`, either.makeRight('abc')], + [ + `{ + |>: f => a => :f(:a) + ab: a |> :atom.append(b) + abc: :ab |> :atom.append(c) + }.abc`, + either.makeRight('abc'), + ], + [ + `{ + append_bc: :atom.append(b) >> :atom.append(c) + abc: a |> :append_bc + }.abc`, + either.makeRight('abc'), + ], + [ + `{ + nested_option: { + tag: some, + value: { + tag: some, + value: { + tag: some, + value: "it works!" + } + } + } + output: :nested_option match { + none: unreachable + some: :identity + } match { + none: unreachable + some: :identity + } match { + none: unreachable + some: :identity + } + }.output`, + either.makeRight('it works!'), + ], ]) diff --git a/src/language/parsing.ts b/src/language/parsing.ts index ec2f1af..f9ccdcb 100644 --- a/src/language/parsing.ts +++ b/src/language/parsing.ts @@ -1,5 +1,5 @@ export type { Atom } from './parsing/atom.js' -export type { Molecule } from './parsing/molecule.js' +export type { Molecule } from './parsing/expression.js' export { canonicalize, type JsonValueForbiddingSymbolicKeys, diff --git a/src/language/parsing/atom.ts b/src/language/parsing/atom.ts index eb0e72e..802e749 100644 --- a/src/language/parsing/atom.ts +++ b/src/language/parsing/atom.ts @@ -106,6 +106,6 @@ const quotedAtomParser = map( ([_1, contents, _2]) => contents, ) -export const atomParser: Parser = optionallySurroundedByParentheses( +export const atom: Parser = optionallySurroundedByParentheses( oneOf([unquotedAtomParser, quotedAtomParser]), ) diff --git a/src/language/parsing/expression.ts b/src/language/parsing/expression.ts new file mode 100644 index 0000000..0b7ae49 --- /dev/null +++ b/src/language/parsing/expression.ts @@ -0,0 +1,527 @@ +import { + lazy, + map, + nothing, + oneOf, + oneOrMore, + sequence, + zeroOrMore, + type Parser, +} from '@matt.kantor/parsing' +import type { Writable } from '../../utility-types.js' +import { keyPathToMolecule, type KeyPath } from '../semantics.js' +import { + atom, + atomWithAdditionalQuotationRequirements, + type Atom, +} from './atom.js' +import { + arrow, + closingBrace, + colon, + comma, + dot, + newline, + openingBrace, +} from './literals.js' +import { + optionallySurroundedByParentheses, + surroundedByParentheses, +} from './parentheses.js' +import { optionalTrivia, trivia, triviaExceptNewlines } from './trivia.js' + +export type Molecule = { readonly [key: Atom]: Molecule | Atom } + +// Keyless properties are automatically assigned numeric indexes, which uses some mutable state. +type Indexer = () => string +const makeIncrementingIndexer = (): Indexer => { + const state = { currentIndex: 0n } + return () => { + const index = state.currentIndex + // TODO: Consider using a `State` monad or something instead of mutation. + state.currentIndex += 1n + return String(index) + } +} + +const optional = ( + parser: Parser>, +): Parser => oneOf([parser, nothing]) + +const trailingIndexesAndArgumentsToExpression = ( + root: Atom | Molecule, + trailingIndexesAndArguments: readonly TrailingIndexOrArgument[], +) => + trailingIndexesAndArguments.reduce((expression, indexOrArgument) => { + switch (indexOrArgument.kind) { + case 'argument': + return { + 0: '@apply', + function: expression, + argument: indexOrArgument.argument, + } + case 'index': + return { + 0: '@index', + object: expression, + query: keyPathToMolecule(indexOrArgument.query), + } + } + }, root) + +type InfixOperator = readonly [Atom, readonly TrailingIndexOrArgument[]] +type InfixOperand = Atom | Molecule +type InfixToken = InfixOperator | InfixOperand + +/** + * Infix operations should be of the following form: + * ``` + * [InfixOperand, InfixOperator, InfixOperand, InfixOperator, …, InfixOperand] + * ``` + * However this can't be directly modeled in TypeScript. + */ +type InfixOperation = readonly [InfixToken, ...InfixToken[]] + +const isOperand = (value: InfixToken | undefined): value is InfixOperand => + !Array.isArray(value) +const isOperator = (value: InfixToken | undefined): value is InfixOperator => + Array.isArray(value) + +const infixTokensToExpression = ( + operation: InfixOperation, +): Molecule | Atom => { + const firstToken = operation[0] + if (operation.length === 1 && isOperand(firstToken)) { + return firstToken + } else { + const leftmostOperationLHS = operation[0] + if (leftmostOperationLHS === undefined) { + throw new Error('Infix operation was empty. This is a bug!') + } + if (!isOperand(leftmostOperationLHS)) { + throw new Error( + 'Leftmost token in infix operation was not an operand. This is a bug!', + ) + } + + const leftmostOperator = operation[1] + if (!isOperator(leftmostOperator)) { + throw new Error( + 'Could not find leftmost operator in infix operation. This is a bug!', + ) + } + + const leftmostOperationRHS = operation[2] + if (!isOperand(leftmostOperationRHS)) { + throw new Error( + 'Missing right-hand side of infix operation. This is a bug!', + ) + } + + const leftmostFunction = trailingIndexesAndArgumentsToExpression( + { 0: '@lookup', key: leftmostOperator[0] }, + leftmostOperator[1], + ) + + const reducedLeftmostOperation: Molecule = { + 0: '@apply', + function: { + 0: '@apply', + function: leftmostFunction, + argument: leftmostOperationRHS, + }, + argument: leftmostOperationLHS, + } + + return infixTokensToExpression([ + reducedLeftmostOperation, + ...operation.slice(3), + ]) + } +} + +const atomRequiringDotQuotation = atomWithAdditionalQuotationRequirements(dot) + +const namedProperty = map( + sequence([atom, colon, optionalTrivia, lazy(() => expression)]), + ([key, _colon, _trivia, value]) => [key, value] as const, +) + +const propertyWithOptionalKey = optionallySurroundedByParentheses( + oneOf([ + namedProperty, + map( + lazy(() => expression), + value => [undefined, value] as const, + ), + ]), +) + +const propertyDelimiter = oneOf([ + sequence([optionalTrivia, comma, optionalTrivia]), + sequence([optional(triviaExceptNewlines), newline, optionalTrivia]), +]) + +const argument = surroundedByParentheses(lazy(() => expression)) + +const compactDottedKeyPathComponent = map( + sequence([dot, atomRequiringDotQuotation]), + ([_dot, key]) => key, +) + +const dottedKeyPathComponent = map( + sequence([optionalTrivia, dot, optionalTrivia, atomRequiringDotQuotation]), + ([_trivia1, _dot, _trivia2, key]) => key, +) + +const sugarFreeMolecule: Parser = map( + sequence([ + openingBrace, + optionalTrivia, + sequence([ + // Allow initial property not preceded by a delimiter (e.g. `{a, b}`). + optional(propertyWithOptionalKey), + zeroOrMore( + map( + sequence([propertyDelimiter, propertyWithOptionalKey]), + ([_delimiter, property]) => property, + ), + ), + ]), + optional(propertyDelimiter), + optionalTrivia, + closingBrace, + ]), + ([ + _openingBrace, + _trivia1, + [optionalInitialProperty, remainingProperties], + _trailingDelimiter, + _trivia2, + _closingBrace, + ]) => { + const properties = + optionalInitialProperty === undefined + ? remainingProperties + : [optionalInitialProperty, ...remainingProperties] + const enumerate = makeIncrementingIndexer() + return properties.reduce((molecule: Writable, [key, value]) => { + if (key === undefined) { + // Note that `enumerate()` increments its internal counter as a side effect. + molecule[enumerate()] = value + } else { + molecule[key] = value + } + return molecule + }, {}) + }, +) + +type TrailingIndexOrArgument = + | { + readonly kind: 'argument' + readonly argument: Molecule | Atom + } + | { + readonly kind: 'index' + readonly query: KeyPath + } + +const dottedKeyPath = oneOrMore(dottedKeyPathComponent) +const compactDottedKeyPath = oneOrMore(compactDottedKeyPathComponent) + +const trailingIndexesAndArguments: Parser = + zeroOrMore( + oneOf([ + map(dottedKeyPath, query => ({ kind: 'index', query } as const)), + map(argument, argument => ({ kind: 'argument', argument } as const)), + ]), + ) + +const compactTrailingIndexesAndArguments: Parser< + readonly TrailingIndexOrArgument[] +> = zeroOrMore( + oneOf([ + map(compactDottedKeyPath, query => ({ kind: 'index', query } as const)), + map(argument, argument => ({ kind: 'argument', argument } as const)), + ]), +) + +const infixOperator = sequence([ + atomRequiringDotQuotation, + compactTrailingIndexesAndArguments, +]) + +const compactExpression: Parser = oneOf([ + // (a => :b).c(d) + // (1 + 1) + map( + sequence([ + surroundedByParentheses( + oneOf([ + lazy(() => precededByAtomThenTrivia), + lazy(() => precededByColonThenAtom), + ]), + ), + compactTrailingIndexesAndArguments, + ]), + ([expression, trailingIndexesAndArguments]) => + trailingIndexesAndArgumentsToExpression( + expression, + trailingIndexesAndArguments, + ), + ), + // :a.b + // :a.b(1).c + // :f(x) + // :a.b(1)(2) + map( + sequence([ + colon, + atomRequiringDotQuotation, + compactTrailingIndexesAndArguments, + ]), + ([_colon, key, trailingIndexesAndArguments]) => + trailingIndexesAndArgumentsToExpression( + { 0: '@lookup', key }, + trailingIndexesAndArguments, + ), + ), + // {} + lazy(() => precededByOpeningBrace), + // 1 + atom, +]) + +const trailingInfixTokens = oneOrMore( + map( + oneOf([ + // Allowing newlines both before and after operators could lead to + // ambiguity between three enumerated object properties, or a single + // enumerated property whose value is the result of an infix expression: + // ``` + // { + // 1 + // + + // 1 + // } + // ``` + // TODO: This could be made context-dependent, only forbidding newlines + // when between curly braces. Currently this forbids the above formatting + // even within parentheses, where there would be no ambiguity. + sequence([ + trivia, + infixOperator, + triviaExceptNewlines, + compactExpression, + ]), + sequence([ + triviaExceptNewlines, + infixOperator, + trivia, + compactExpression, + ]), + ]), + ([_trivia1, operator, _trivia2, operand]) => [operator, operand] as const, + ), +) + +type TrailingInfixToken = readonly [ + operator: readonly [Atom, readonly TrailingIndexOrArgument[]], + operand: Molecule | Atom, +] +type TrailingFunctionBodyOrInfixTokens = + | { + readonly kind: 'functionBody' + readonly additionalParameters: readonly Atom[] + readonly body: Molecule | Atom + } + | { + readonly kind: 'infixTokens' + readonly tokens: readonly [ + TrailingInfixToken, + ...(readonly TrailingInfixToken[]), + ] + } + +const precededByAtomThenTrivia = map( + sequence([ + atom, + oneOf([ + // a => :b + // a => {} + // a => (b => c => :d) + // a => b => c => d + // a => 1 + 1 + map( + sequence([ + trivia, + arrow, + trivia, + zeroOrMore( + map( + sequence([atom, trivia, arrow, trivia]), + ([parameter, _trivia1, _arrow, _trivia2]) => parameter, + ), + ), + lazy(() => expression), + ]), + ([ + _trivia1, + _arrow, + _trivia2, + additionalParameters, + body, + ]): TrailingFunctionBodyOrInfixTokens => ({ + kind: 'functionBody', + additionalParameters, + body, + }), + ), + // 1 + 2 + 3 + 4 + // 1 + (2 + 3 + 4) + map( + trailingInfixTokens, + (tokens): TrailingFunctionBodyOrInfixTokens => ({ + kind: 'infixTokens', + tokens, + }), + ), + ]), + ]), + ([initialAtom, trailingFunctionBodyOrInfixTokens]) => { + switch (trailingFunctionBodyOrInfixTokens.kind) { + case 'functionBody': + const [lastParameter, ...additionalParameters] = [ + ...trailingFunctionBodyOrInfixTokens.additionalParameters.toReversed(), + initialAtom, + ] + const initialFunction = { + 0: '@function', + parameter: lastParameter, + body: trailingFunctionBodyOrInfixTokens.body, + } + return additionalParameters.reduce( + (expression, additionalParameter) => ({ + 0: '@function', + parameter: additionalParameter, + body: expression, + }), + initialFunction, + ) + case 'infixTokens': + return infixTokensToExpression([ + initialAtom, + ...trailingFunctionBodyOrInfixTokens.tokens.flat(), + ]) + } + }, +) + +// :a +// :a.b +// :a.b(1).c +// :f(x) +// :a.b(1)(2) +// :a b.c :z +// :a b.c z +// :f(g) + b +// :a + :b + :c + :d +const precededByColonThenAtom = map( + sequence([ + colon, + atomRequiringDotQuotation, + trailingIndexesAndArguments, + zeroOrMore( + map( + // See note in `trailingInfixTokens` about newlines. + oneOf([ + sequence([ + trivia, + infixOperator, + triviaExceptNewlines, + compactExpression, + ]), + sequence([ + triviaExceptNewlines, + infixOperator, + trivia, + compactExpression, + ]), + ]), + ([_trivia1, operator, _trivia2, operand]) => + [operator, operand] as const, + ), + ), + ]), + ([_colon, key, trailingIndexesAndArguments, infixOperationTokens]) => { + const initialExpression = trailingIndexesAndArgumentsToExpression( + { 0: '@lookup', key }, + trailingIndexesAndArguments, + ) + const [firstToken, ...additionalTokens] = infixOperationTokens + if (firstToken === undefined) { + return initialExpression + } else { + return infixTokensToExpression([ + initialExpression, + ...firstToken, + ...additionalTokens.flat(), + ]) + } + }, +) + +// (1 + 1) +// (1 + 2 + 3 + 4) +// (x => :x) +// (x => :x)(x).b +// (1 + 1).b +// (:x => x)(1) +// (:f >> :g)(1) +// (1 + 1) - (1 + 1) +const precededByOpeningParenthesis = oneOf([ + map( + sequence([ + surroundedByParentheses(lazy(() => expression)), + trailingInfixTokens, + ]), + ([initialExpression, trailingInfixTokens]) => + infixTokensToExpression([ + initialExpression, + ...trailingInfixTokens.flat(), + ]), + ), + map( + sequence([ + surroundedByParentheses(lazy(() => expression)), + trailingIndexesAndArguments, + ]), + ([expression, trailingIndexesAndArguments]) => + trailingIndexesAndArgumentsToExpression( + expression, + trailingIndexesAndArguments, + ), + ), +]) + +// {} +// { a: b } +// { 1, 2, 3 } +// {a::f}.a(1) + 1 +const precededByOpeningBrace = map( + sequence([sugarFreeMolecule, trailingIndexesAndArguments]), + ([expression, trailingIndexesAndArguments]) => + trailingIndexesAndArgumentsToExpression( + expression, + trailingIndexesAndArguments, + ), +) + +export const expression: Parser = oneOf([ + precededByOpeningParenthesis, + precededByOpeningBrace, + precededByColonThenAtom, + precededByAtomThenTrivia, + atom, +]) diff --git a/src/language/parsing/molecule.ts b/src/language/parsing/molecule.ts deleted file mode 100644 index ac6dea1..0000000 --- a/src/language/parsing/molecule.ts +++ /dev/null @@ -1,229 +0,0 @@ -import { - lazy, - map, - nothing, - oneOf, - oneOrMore, - sequence, - zeroOrMore, - type Parser, -} from '@matt.kantor/parsing' -import type { Writable } from '../../utility-types.js' -import { keyPathToMolecule } from '../semantics.js' -import { - atomParser, - atomWithAdditionalQuotationRequirements, - type Atom, -} from './atom.js' -import { - arrow, - closingBrace, - closingParenthesis, - colon, - comma, - dot, - newline, - openingBrace, - openingParenthesis, -} from './literals.js' -import { optionallySurroundedByParentheses } from './parentheses.js' -import { optionalTrivia, trivia, triviaExceptNewlines } from './trivia.js' - -export type Molecule = { readonly [key: Atom]: Molecule | Atom } - -// Keyless properties are automatically assigned numeric indexes, which uses some mutable state. -type Indexer = () => string -const makeIncrementingIndexer = (): Indexer => { - const state = { currentIndex: 0n } - return () => { - const index = state.currentIndex - // TODO: Consider using a `State` monad or something instead of mutation. - state.currentIndex += 1n - return String(index) - } -} - -const optional = ( - parser: Parser>, -): Parser => oneOf([parser, nothing]) - -const propertyKey = atomParser -const propertyValue = oneOf([ - lazy(() => potentiallySugaredMolecule), - atomParser, -]) - -const namedProperty = map( - sequence([propertyKey, colon, optionalTrivia, propertyValue]), - ([key, _colon, _trivia, value]) => [key, value] as const, -) - -const propertyWithOptionalKey = optionallySurroundedByParentheses( - oneOf([ - namedProperty, - map(propertyValue, value => [undefined, value] as const), - ]), -) - -const propertyDelimiter = oneOf([ - sequence([optionalTrivia, comma, optionalTrivia]), - sequence([optional(triviaExceptNewlines), newline, optionalTrivia]), -]) - -const argument = map( - sequence([ - openingParenthesis, - optionalTrivia, - propertyValue, - optionalTrivia, - closingParenthesis, - ]), - ([_openingParenthesis, _trivia1, argument, _trivia2, _closingParenthesis]) => - argument, -) - -const dottedKeyPathComponent = map( - sequence([ - optionalTrivia, - dot, - optionalTrivia, - atomWithAdditionalQuotationRequirements(dot), - ]), - ([_trivia1, _dot, _trivia2, key]) => key, -) - -const sugarFreeMolecule: Parser = optionallySurroundedByParentheses( - map( - sequence([ - openingBrace, - optionalTrivia, - sequence([ - // Allow initial property not preceded by a delimiter (e.g. `{a, b}`). - optional(propertyWithOptionalKey), - zeroOrMore( - map( - sequence([propertyDelimiter, propertyWithOptionalKey]), - ([_delimiter, property]) => property, - ), - ), - ]), - optional(propertyDelimiter), - optionalTrivia, - closingBrace, - ]), - ([ - _openingBrace, - _trivia1, - [optionalInitialProperty, remainingProperties], - _trailingDelimiter, - _trivia2, - _closingBrace, - ]) => { - const properties = - optionalInitialProperty === undefined - ? remainingProperties - : [optionalInitialProperty, ...remainingProperties] - const enumerate = makeIncrementingIndexer() - return properties.reduce((molecule: Writable, [key, value]) => { - if (key === undefined) { - // Note that `enumerate()` increments its internal counter as a side effect. - molecule[enumerate()] = value - } else { - molecule[key] = value - } - return molecule - }, {}) - }, - ), -) - -const sugaredLookup: Parser = optionallySurroundedByParentheses( - map( - sequence([ - colon, - // Reserve `.` so that `:a.b` is parsed as a lookup followed by an index. - atomWithAdditionalQuotationRequirements(dot), - ]), - ([_colon, key]) => ({ 0: '@lookup', key }), - ), -) - -const sugaredFunction: Parser = optionallySurroundedByParentheses( - map( - sequence([atomParser, trivia, arrow, trivia, propertyValue]), - ([parameter, _trivia1, _arrow, _trivia2, body]) => ({ - 0: '@function', - parameter, - body, - }), - ), -) - -const potentiallySugaredMolecule: Parser = (() => { - // The awkward setup in here avoids infinite recursion when applying the mutually-dependent - // parsers for index and apply sugars. Indexes/applications can be chained to form - // arbitrarily-long expressions (e.g. `:a.b.c(d).e(f)(g).h.i(j).k`). - - const potentiallySugaredNonApply = map( - sequence([ - oneOf([sugaredLookup, sugaredFunction, sugarFreeMolecule]), - zeroOrMore(dottedKeyPathComponent), - ]), - ([object, keyPath]) => - keyPath.length === 0 - ? object - : { - 0: '@index', - object, - query: keyPathToMolecule(keyPath), - }, - ) - - const sugaredApplyWithOptionalTrailingIndexesAndApplies = map( - sequence([ - potentiallySugaredNonApply, - oneOrMore(argument), - zeroOrMore( - sequence([oneOrMore(dottedKeyPathComponent), zeroOrMore(argument)]), - ), - ]), - ([ - functionToApply, - multipleArguments, - trailingIndexQueriesAndApplyArguments, - ]) => { - const initialApply = multipleArguments.reduce( - (expression, argument) => ({ - 0: '@apply', - function: expression, - argument, - }), - functionToApply, - ) - - return trailingIndexQueriesAndApplyArguments.reduce( - (expression, [keyPath, possibleArguments]) => - possibleArguments.reduce( - (functionToApply, argument) => ({ - 0: '@apply', - function: functionToApply, - argument, - }), - { - 0: '@index', - object: expression, - query: keyPathToMolecule(keyPath), - }, - ), - initialApply, - ) - }, - ) - - return oneOf([ - sugaredApplyWithOptionalTrailingIndexesAndApplies, - potentiallySugaredNonApply, - ]) -})() - -export const moleculeParser: Parser = potentiallySugaredMolecule diff --git a/src/language/parsing/parentheses.ts b/src/language/parsing/parentheses.ts index 8e83dee..c628f21 100644 --- a/src/language/parsing/parentheses.ts +++ b/src/language/parsing/parentheses.ts @@ -20,3 +20,18 @@ export const optionallySurroundedByParentheses = ( theParser, sequence([optionalTrivia, closingParenthesis]), ) + +export const surroundedByParentheses = ( + theParser: Parser, +): Parser => + map( + sequence([ + openingParenthesis, + optionalTrivia, + theParser, + optionalTrivia, + closingParenthesis, + ]), + ([_openParenthesis, _trivia1, output, _trivia2, _closeParenthesis]) => + output, + ) diff --git a/src/language/parsing/syntax-tree.ts b/src/language/parsing/syntax-tree.ts index a3440da..0bf9173 100644 --- a/src/language/parsing/syntax-tree.ts +++ b/src/language/parsing/syntax-tree.ts @@ -1,11 +1,5 @@ import option, { type Option } from '@matt.kantor/option' -import { - map, - oneOf, - sequence, - zeroOrMore, - type Parser, -} from '@matt.kantor/parsing' +import { map, sequence, type Parser } from '@matt.kantor/parsing' import { withPhantomData, type WithPhantomData } from '../../phantom-data.js' import type { JsonArray, @@ -14,9 +8,9 @@ import type { Writable, } from '../../utility-types.js' import type { KeyPath } from '../semantics.js' -import { atomParser, type Atom } from './atom.js' -import { moleculeParser, type Molecule } from './molecule.js' -import { trivia } from './trivia.js' +import { type Atom } from './atom.js' +import { expression, type Molecule } from './expression.js' +import { optionalTrivia } from './trivia.js' declare const _canonicalized: unique symbol export type Canonicalized = { readonly [_canonicalized]: true } @@ -83,10 +77,6 @@ type JsonRecordForbiddingSymbolicKeys = { }> export const syntaxTreeParser: Parser = map( - sequence([ - zeroOrMore(trivia), - oneOf([atomParser, moleculeParser]), - zeroOrMore(trivia), - ]), + sequence([optionalTrivia, expression, optionalTrivia]), ([_leadingTrivia, syntaxTree, _trailingTrivia]) => canonicalize(syntaxTree), ) diff --git a/src/language/parsing/trivia.ts b/src/language/parsing/trivia.ts index 6b25d11..3a473c1 100644 --- a/src/language/parsing/trivia.ts +++ b/src/language/parsing/trivia.ts @@ -35,6 +35,7 @@ const singleLineComment = sequence([ ]) export const whitespace = regularExpression(/^\s+/) +export const whitespaceExceptNewlines = regularExpression(/[^\S\n]+/) export const trivia = oneOrMore( oneOf([whitespace, singleLineComment, blockComment]), @@ -42,8 +43,6 @@ export const trivia = oneOrMore( export const optionalTrivia = oneOf([trivia, nothing]) -export const whitespaceExceptNewlines = regularExpression(/[^\S\n]+/) - export const triviaExceptNewlines = oneOrMore( oneOf([whitespaceExceptNewlines, singleLineComment, blockComment]), )