mkantor · mkantor · Feb 11, 2025 · Feb 11, 2025
diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ data representation implied by the fact that a value is an atom (e.g. the atom
 `2` may be an integer in memory).
 
 Bare words not containing any
-[reserved character sequences](./src/language/parsing/atom.ts#L32-L54) are
+[reserved character sequences](./src/language/parsing/atom.ts#L24-L46) are
 atoms:
 
 ```

diff --git a/src/end-to-end.test.ts b/src/end-to-end.test.ts
@@ -53,6 +53,47 @@ testCases(endToEnd, code => code)('end-to-end tests', [
       body: { 0: '@lookup', query: { 0: 'a' } },
     }),
   ],
+  ['{ success }.0', either.makeRight('success')],
+  ['{ f: :identity }.f(success)', either.makeRight('success')],
+  ['{ f: :identity }.f({ a: success }).a', either.makeRight('success')],
+  [
+    '{ f: :identity }.f({ g: :identity }).g({ a: success }).a',
+    either.makeRight('success'),
+  ],
+  ['{ a: { b: success } }.a.b', either.makeRight('success')],
+  [
+    '{ a: { "b.c(d) e \\" {}": success } }.a."b.c(d) e \\" {}"',
+    either.makeRight('success'),
+  ],
+  ['(a => { b: :a }.b)(success)', either.makeRight('success')],
+  ['(a => { b: :a })(success).b', either.makeRight('success')],
+  ['{ success }/**/./**/0', either.makeRight('success')],
+  [
+    `
+      { a: { b: success } } // blah
+        // blah
+        .a // blah
+        // blah
+        .b // blah
+    `,
+    either.makeRight('success'),
+  ],
+  [
+    `{
+      a: {
+        b: {
+          c: z => {
+            d: y => x => {
+              e: {
+                f: w => { g: { :z :y :x :w } }
+              }
+            }
+          }
+        }
+      }
+    }.a.b.c(a).d(b)(c).e.f(d).g`,
+    either.makeRight({ 0: 'a', 1: 'b', 2: 'c', 3: 'd' }),
+  ],
   ['{ a: ({ A }) }', either.makeRight({ a: { 0: 'A' } })],
   ['{ a: ( A ) }', either.makeRight({ a: 'A' })],
   ['{ a: ("A A A") }', either.makeRight({ a: 'A A A' })],

diff --git a/src/language/parsing/atom.ts b/src/language/parsing/atom.ts
@@ -21,38 +21,64 @@ export const isAtom = (value: unknown): value is Atom =>
 
 export const unit = '' as const
 
+const atomComponentsRequiringQuotation = [
+  whitespace,
+  literal('"'),
+  literal('{'),
+  literal('}'),
+  literal('['),
+  literal(']'),
+  literal('('),
+  literal(')'),
+  literal('<'),
+  literal('>'),
+  literal('#'),
+  literal('&'),
+  literal('|'),
+  literal('\\'),
+  literal('='),
+  literal(':'),
+  literal(';'),
+  literal(','),
+  literal('//'),
+  literal('/*'),
+  literal('*/'),
+] as const
+
 export const atomParser: Parser<Atom> = optionallySurroundedByParentheses(
   lazy(() => oneOf([unquotedAtomParser, quotedAtomParser])),
 )
 
+export const atomWithAdditionalQuotationRequirements = (
+  additionalQuoteRequiringComponent: Parser<unknown>,
+) =>
+  optionallySurroundedByParentheses(
+    lazy(() =>
+      oneOf([
+        map(
+          oneOrMore(
+            butNot(
+              anySingleCharacter,
+              oneOf([
+                ...atomComponentsRequiringQuotation,
+                additionalQuoteRequiringComponent,
+              ]),
+              'a character sequence requiring quotation',
+            ),
+          ),
+          characters => characters.join(''),
+        ),
+        quotedAtomParser,
+      ]),
+    ),
+  )
+
 export const unquotedAtomParser = map(
   oneOrMore(
     butNot(
       anySingleCharacter,
-      oneOf([
-        whitespace,
-        literal('"'),
-        literal('{'),
-        literal('}'),
-        literal('['),
-        literal(']'),
-        literal('('),
-        literal(')'),
-        literal('<'),
-        literal('>'),
-        literal('#'),
-        literal('&'),
-        literal('|'),
-        literal('\\'),
-        literal('='),
-        literal(':'),
-        literal(';'),
-        literal(','),
-        literal('//'),
-        literal('/*'),
-        literal('*/'),
-      ]),
-      'a forbidden character sequence',
+      oneOf(atomComponentsRequiringQuotation),
+      'a character sequence requiring quotation',
     ),
   ),
   characters => characters.join(''),

diff --git a/src/language/parsing/molecule.ts b/src/language/parsing/molecule.ts
@@ -9,28 +9,22 @@ import {
   zeroOrMore,
   type Parser,
 } from '@matt.kantor/parsing'
-import { atomParser, type Atom } from './atom.js'
+import { keyPathToMolecule } from '../semantics.js'
+import {
+  atomParser,
+  atomWithAdditionalQuotationRequirements,
+  type Atom,
+} from './atom.js'
 import { optionallySurroundedByParentheses } from './parentheses.js'
 import { trivia } from './trivia.js'
 
 export type Molecule = { readonly [key: Atom]: Molecule | Atom }
 
 export const unit: Molecule = {}
 
-export const moleculeParser: Parser<Molecule> = oneOf([
-  optionallySurroundedByParentheses(
-    map(
-      lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
-      Object.fromEntries,
-    ),
-  ),
-  lazy(() => sugaredApply),
-  lazy(() => sugaredFunction),
-])
-
-const optional = <Output>(
-  parser: Parser<NonNullable<Output>>,
-): Parser<Output | undefined> => oneOf([parser, nothing])
+export const moleculeParser: Parser<Molecule> = lazy(
+  () => potentiallySugaredMolecule,
+)
 
 // Keyless properties are automatically assigned numeric indexes, which uses some mutable state.
 type Indexer = () => string
@@ -44,65 +38,14 @@ const makeIncrementingIndexer = (): Indexer => {
   }
 }
 
-const propertyDelimiter = oneOf([
-  sequence([optional(trivia), literal(','), optional(trivia)]),
-  trivia,
-])
-
-const sugaredLookup: Parser<Molecule> = optionallySurroundedByParentheses(
-  map(
-    sequence([literal(':'), oneOf([atomParser, moleculeParser])]),
-    ([_colon, query]) => ({ 0: '@lookup', query }),
-  ),
-)
-
-const sugaredFunction: Parser<Molecule> = optionallySurroundedByParentheses(
-  map(
-    sequence([
-      atomParser,
-      trivia,
-      literal('=>'),
-      trivia,
-      lazy(() => propertyValue),
-    ]),
-    ([parameter, _trivia1, _arrow, _trivia2, body]) => ({
-      0: '@function',
-      parameter,
-      body,
-    }),
-  ),
-)
-
-const sugaredApply: Parser<Molecule> = map(
-  sequence([
-    oneOf([sugaredLookup, lazy(() => sugaredFunction)]),
-    oneOrMore(
-      sequence([
-        literal('('),
-        optional(trivia),
-        lazy(() => propertyValue),
-        optional(trivia),
-        literal(')'),
-      ]),
-    ),
-  ]),
-  ([f, multipleArguments]) =>
-    multipleArguments.reduce<Molecule>(
-      (expression, [_1, _2, argument, _3, _4]) => ({
-        0: '@apply',
-        function: expression,
-        argument,
-      }),
-      f,
-    ),
-)
+const optional = <Output>(
+  parser: Parser<NonNullable<Output>>,
+): Parser<Output | undefined> => oneOf([parser, nothing])
 
 const propertyKey = atomParser
 const propertyValue = oneOf([
-  sugaredApply, // must come first to avoid ambiguity
-  lazy(() => moleculeParser), // must come second to avoid ambiguity
+  lazy(() => potentiallySugaredMolecule),
   atomParser,
-  sugaredLookup,
 ])
 
 const namedProperty = map(
@@ -118,6 +61,33 @@ const property = (index: Indexer) =>
     oneOf([namedProperty, numberedProperty(index)]),
   )
 
+const propertyDelimiter = oneOf([
+  sequence([optional(trivia), literal(','), optional(trivia)]),
+  trivia,
+])
+
+const argument = map(
+  sequence([
+    literal('('),
+    optional(trivia),
+    propertyValue,
+    optional(trivia),
+    literal(')'),
+  ]),
+  ([_openingParenthesis, _trivia1, argument, _trivia2, _closingParenthesis]) =>
+    argument,
+)
+
+const dottedKeyPathComponent = map(
+  sequence([
+    optional(trivia),
+    literal('.'),
+    optional(trivia),
+    atomWithAdditionalQuotationRequirements(literal('.')),
+  ]),
+  ([_trivia1, _dot, _trivia2, key]) => key,
+)
+
 const moleculeAsEntries = (
   index: Indexer,
 ): Parser<readonly (readonly [string, string | Molecule])[]> =>
@@ -146,3 +116,95 @@ const moleculeAsEntries = (
         ? remainingProperties
         : [optionalInitialProperty, ...remainingProperties],
   )
+
+const sugarFreeMolecule: Parser<Molecule> = optionallySurroundedByParentheses(
+  map(
+    lazy(() => moleculeAsEntries(makeIncrementingIndexer())),
+    Object.fromEntries,
+  ),
+)
+
+const sugaredLookup: Parser<Molecule> = optionallySurroundedByParentheses(
+  map(
+    sequence([literal(':'), oneOf([atomParser, sugarFreeMolecule])]),
+    ([_colon, query]) => ({ 0: '@lookup', query }),
+  ),
+)
+
+const sugaredFunction: Parser<Molecule> = optionallySurroundedByParentheses(
+  map(
+    sequence([atomParser, trivia, literal('=>'), trivia, propertyValue]),
+    ([parameter, _trivia1, _arrow, _trivia2, body]) => ({
+      0: '@function',
+      parameter,
+      body,
+    }),
+  ),
+)
+
+const potentiallySugaredMolecule: Parser<Molecule> = (() => {
+  // The awkward setup in here avoids infinite recursion when applying the mutually-dependent
+  // parsers for index and apply sugars. Indexes/applications can be chained to form
+  // arbitrarily-long expressions (e.g. `:a.b.c(d).e(f)(g).h.i(j).k`).
+
+  const potentiallySugaredNonApply = map(
+    sequence([
+      oneOf([sugaredLookup, sugaredFunction, sugarFreeMolecule]),
+      zeroOrMore(dottedKeyPathComponent),
+    ]),
+    ([object, keyPath]) =>
+      keyPath.length === 0
+        ? object
+        : {
+            0: '@index',
+            object,
+            query: keyPathToMolecule(keyPath),
+          },
+  )
+
+  const sugaredApplyWithOptionalTrailingIndexesAndApplies = map(
+    sequence([
+      potentiallySugaredNonApply,
+      oneOrMore(argument),
+      zeroOrMore(
+        sequence([oneOrMore(dottedKeyPathComponent), zeroOrMore(argument)]),
+      ),
+    ]),
+    ([
+      functionToApply,
+      multipleArguments,
+      trailingIndexQueriesAndApplyArguments,
+    ]) => {
+      const initialApply = multipleArguments.reduce<Molecule>(
+        (expression, argument) => ({
+          0: '@apply',
+          function: expression,
+          argument,
+        }),
+        functionToApply,
+      )
+
+      return trailingIndexQueriesAndApplyArguments.reduce(
+        (expression, [keyPath, possibleArguments]) =>
+          possibleArguments.reduce<Molecule>(
+            (functionToApply, argument) => ({
+              0: '@apply',
+              function: functionToApply,
+              argument,
+            }),
+            {
+              0: '@index',
+              object: expression,
+              query: keyPathToMolecule(keyPath),
+            },
+          ),
+        initialApply,
+      )
+    },
+  )
+
+  return oneOf([
+    sugaredApplyWithOptionalTrailingIndexesAndApplies,
+    potentiallySugaredNonApply,
+  ])
+})()