From cf932be01061c2f92a0841eb057b78dc11e8e130 Mon Sep 17 00:00:00 2001 From: Mark Larah Date: Fri, 25 Jul 2025 01:00:01 -1000 Subject: [PATCH 1/5] Revert "No `{Ignored}` tokens when parsing schema coordinates (#4450)" This reverts commit 04dd13e28c9c992d65355e309d8233f6abf36e43. --- src/index.ts | 2 -- src/language/__tests__/lexer-test.ts | 33 +----------------------- src/language/__tests__/parser-test.ts | 4 +-- src/language/__tests__/printer-test.ts | 24 ++++++------------ src/language/index.ts | 4 +-- src/language/lexer.ts | 25 ------------------ src/language/parser.ts | 35 +++----------------------- 7 files changed, 17 insertions(+), 110 deletions(-) diff --git a/src/index.ts b/src/index.ts index ddc799e2ba..1f80cf51f3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -230,7 +230,6 @@ export { printSourceLocation, // Lex Lexer, - SchemaCoordinateLexer, TokenKind, // Parse parse, @@ -262,7 +261,6 @@ export { export type { ParseOptions, - ParseSchemaCoordinateOptions, SourceLocation, // Visitor utilities ASTVisitor, diff --git a/src/language/__tests__/lexer-test.ts b/src/language/__tests__/lexer-test.ts index 433d3c4181..f324a20a24 100644 --- a/src/language/__tests__/lexer-test.ts +++ b/src/language/__tests__/lexer-test.ts @@ -9,11 +9,7 @@ import { inspect } from '../../jsutils/inspect.js'; import { GraphQLError } from '../../error/GraphQLError.js'; import type { Token } from '../ast.js'; -import { - isPunctuatorTokenKind, - Lexer, - SchemaCoordinateLexer, -} from '../lexer.js'; +import { isPunctuatorTokenKind, Lexer } from '../lexer.js'; import { Source } from '../source.js'; import { TokenKind } from '../tokenKind.js'; @@ -1193,33 +1189,6 @@ describe('Lexer', () => { }); }); -describe('SchemaCoordinateLexer', () => { - it('can be stringified', () => { - const lexer = new SchemaCoordinateLexer(new Source('Name.field')); - expect(Object.prototype.toString.call(lexer)).to.equal( - '[object SchemaCoordinateLexer]', - ); - }); - - it('tracks a schema coordinate', () => { - const lexer = new SchemaCoordinateLexer(new Source('Name.field')); - expect(lexer.advance()).to.contain({ - kind: TokenKind.NAME, - start: 0, - end: 4, - value: 'Name', - }); - }); - - it('forbids ignored tokens', () => { - const lexer = new SchemaCoordinateLexer(new Source('\nName.field')); - expectToThrowJSON(() => lexer.advance()).to.deep.equal({ - message: 'Syntax Error: Invalid character: U+000A.', - locations: [{ line: 1, column: 1 }], - }); - }); -}); - describe('isPunctuatorTokenKind', () => { function isPunctuatorToken(text: string) { return isPunctuatorTokenKind(lexOne(text).kind); diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts index e8dd914f71..c0d247ddf5 100644 --- a/src/language/__tests__/parser-test.ts +++ b/src/language/__tests__/parser-test.ts @@ -751,11 +751,11 @@ describe('Parser', () => { }); it('rejects Name . Name ( Name : Name )', () => { - expect(() => parseSchemaCoordinate('MyType.field(arg:value)')) + expect(() => parseSchemaCoordinate('MyType.field(arg: value)')) .to.throw() .to.deep.include({ message: 'Syntax Error: Expected ")", found Name "value".', - locations: [{ line: 1, column: 18 }], + locations: [{ line: 1, column: 19 }], }); }); diff --git a/src/language/__tests__/printer-test.ts b/src/language/__tests__/printer-test.ts index a7a604bcba..589d9bfc8d 100644 --- a/src/language/__tests__/printer-test.ts +++ b/src/language/__tests__/printer-test.ts @@ -301,24 +301,16 @@ describe('Printer: Query document', () => { }); it('prints schema coordinates', () => { - expect(print(parseSchemaCoordinate('Name'))).to.equal('Name'); - expect(print(parseSchemaCoordinate('Name.field'))).to.equal('Name.field'); - expect(print(parseSchemaCoordinate('Name.field(arg:)'))).to.equal( - 'Name.field(arg:)', + expect(print(parseSchemaCoordinate(' Name '))).to.equal('Name'); + expect(print(parseSchemaCoordinate(' Name . field '))).to.equal( + 'Name.field', ); - expect(print(parseSchemaCoordinate('@name'))).to.equal('@name'); - expect(print(parseSchemaCoordinate('@name(arg:)'))).to.equal('@name(arg:)'); - }); - - it('throws syntax error for ignored tokens in schema coordinates', () => { - expect(() => print(parseSchemaCoordinate('# foo\nName'))).to.throw( - 'Syntax Error: Invalid character: "#"', - ); - expect(() => print(parseSchemaCoordinate('\nName'))).to.throw( - 'Syntax Error: Invalid character: U+000A.', + expect(print(parseSchemaCoordinate(' Name . field ( arg: )'))).to.equal( + 'Name.field(arg:)', ); - expect(() => print(parseSchemaCoordinate('Name .field'))).to.throw( - 'Syntax Error: Invalid character: " "', + expect(print(parseSchemaCoordinate(' @ name '))).to.equal('@name'); + expect(print(parseSchemaCoordinate(' @ name (arg:) '))).to.equal( + '@name(arg:)', ); }); }); diff --git a/src/language/index.ts b/src/language/index.ts index 1f2eff6bb7..c5620b4948 100644 --- a/src/language/index.ts +++ b/src/language/index.ts @@ -11,7 +11,7 @@ export { Kind } from './kinds.js'; export { TokenKind } from './tokenKind.js'; -export { Lexer, SchemaCoordinateLexer } from './lexer.js'; +export { Lexer } from './lexer.js'; export { parse, @@ -20,7 +20,7 @@ export { parseType, parseSchemaCoordinate, } from './parser.js'; -export type { ParseOptions, ParseSchemaCoordinateOptions } from './parser.js'; +export type { ParseOptions } from './parser.js'; export { print } from './printer.js'; diff --git a/src/language/lexer.ts b/src/language/lexer.ts index 4a2228e285..44abc05197 100644 --- a/src/language/lexer.ts +++ b/src/language/lexer.ts @@ -83,27 +83,6 @@ export class Lexer { } return token; } - - validateIgnoredToken(_position: number): void { - /* noop - ignored tokens are ignored */ - } -} - -/** - * As `Lexer`, but forbids ignored tokens as required of schema coordinates. - */ -export class SchemaCoordinateLexer extends Lexer { - override get [Symbol.toStringTag]() { - return 'SchemaCoordinateLexer'; - } - - override validateIgnoredToken(position: number): void { - throw syntaxError( - this.source, - position, - `Invalid character: ${printCodePointAt(this, position)}.`, - ); - } } /** @@ -238,7 +217,6 @@ function readNextToken(lexer: Lexer, start: number): Token { case 0x0009: // \t case 0x0020: // case 0x002c: // , - lexer.validateIgnoredToken(position); ++position; continue; // LineTerminator :: @@ -246,13 +224,11 @@ function readNextToken(lexer: Lexer, start: number): Token { // - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"] // - "Carriage Return (U+000D)" "New Line (U+000A)" case 0x000a: // \n - lexer.validateIgnoredToken(position); ++position; ++lexer.line; lexer.lineStart = position; continue; case 0x000d: // \r - lexer.validateIgnoredToken(position); if (body.charCodeAt(position + 1) === 0x000a) { position += 2; } else { @@ -263,7 +239,6 @@ function readNextToken(lexer: Lexer, start: number): Token { continue; // Comment case 0x0023: // # - lexer.validateIgnoredToken(position); return readComment(lexer, position); // Token :: // - Punctuator diff --git a/src/language/parser.ts b/src/language/parser.ts index 5cf3e14d21..5acfb4e85d 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -70,11 +70,7 @@ import type { import { Location, OperationTypeNode } from './ast.js'; import { DirectiveLocation } from './directiveLocation.js'; import { Kind } from './kinds.js'; -import { - isPunctuatorTokenKind, - Lexer, - SchemaCoordinateLexer, -} from './lexer.js'; +import { isPunctuatorTokenKind, Lexer } from './lexer.js'; import { isSource, Source } from './source.js'; import { TokenKind } from './tokenKind.js'; @@ -118,24 +114,6 @@ export interface ParseOptions { * ``` */ experimentalFragmentArguments?: boolean | undefined; - - /** - * You may override the Lexer class used to lex the source; this is used by - * schema coordinates to introduce a lexer that forbids ignored tokens. - */ - Lexer?: typeof Lexer | undefined; -} - -/** - * Configuration options to control schema coordinate parser behavior - */ -export interface ParseSchemaCoordinateOptions { - /** - * By default, the parser creates AST nodes that know the location - * in the source that they correspond to. This configuration flag - * disables that behavior for performance or testing. - */ - noLocation?: boolean | undefined; } /** @@ -221,13 +199,9 @@ export function parseType( */ export function parseSchemaCoordinate( source: string | Source, - options?: ParseSchemaCoordinateOptions, + options?: ParseOptions, ): SchemaCoordinateNode { - // Ignored tokens are excluded syntax for a Schema Coordinate. - const parser = new Parser(source, { - ...options, - Lexer: SchemaCoordinateLexer, - }); + const parser = new Parser(source, options); parser.expectToken(TokenKind.SOF); const coordinate = parser.parseSchemaCoordinate(); parser.expectToken(TokenKind.EOF); @@ -253,8 +227,7 @@ export class Parser { constructor(source: string | Source, options: ParseOptions = {}) { const sourceObj = isSource(source) ? source : new Source(source); - const LexerClass = options.Lexer ?? Lexer; - this._lexer = new LexerClass(sourceObj); + this._lexer = new Lexer(sourceObj); this._options = options; this._tokenCounter = 0; } From ddc7e0e07af841836c851d67dbf7315f8c3d7fce Mon Sep 17 00:00:00 2001 From: Mark Larah Date: Sat, 26 Jul 2025 23:55:18 -0500 Subject: [PATCH 2/5] SchemaCoordinateLexer --- src/language/__tests__/lexer-test.ts | 11 +- src/language/__tests__/parser-test.ts | 14 +- src/language/__tests__/printer-test.ts | 24 ++-- .../__tests__/schemaCoordinateLexer-test.ts | 48 +++++++ src/language/lexer.ts | 89 ++++++------ src/language/parser.ts | 29 +++- src/language/schemaCoordinateLexer.ts | 128 ++++++++++++++++++ 7 files changed, 274 insertions(+), 69 deletions(-) create mode 100644 src/language/__tests__/schemaCoordinateLexer-test.ts create mode 100644 src/language/schemaCoordinateLexer.ts diff --git a/src/language/__tests__/lexer-test.ts b/src/language/__tests__/lexer-test.ts index f324a20a24..85603dfaaa 100644 --- a/src/language/__tests__/lexer-test.ts +++ b/src/language/__tests__/lexer-test.ts @@ -166,8 +166,8 @@ describe('Lexer', () => { }); it('reports unexpected characters', () => { - expectSyntaxError('^').to.deep.equal({ - message: 'Syntax Error: Unexpected character: "^".', + expectSyntaxError('.').to.deep.equal({ + message: 'Syntax Error: Unexpected character: ".".', locations: [{ line: 1, column: 1 }], }); }); @@ -965,13 +965,6 @@ describe('Lexer', () => { value: undefined, }); - expect(lexOne('.')).to.contain({ - kind: TokenKind.DOT, - start: 0, - end: 1, - value: undefined, - }); - expect(lexOne('...')).to.contain({ kind: TokenKind.SPREAD, start: 0, diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts index c0d247ddf5..2ca4c86216 100644 --- a/src/language/__tests__/parser-test.ts +++ b/src/language/__tests__/parser-test.ts @@ -722,7 +722,7 @@ describe('Parser', () => { expect(() => parseSchemaCoordinate('MyType.field.deep')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected , found ".".', + message: 'Syntax Error: Expected , found ..', locations: [{ line: 1, column: 13 }], }); }); @@ -754,8 +754,8 @@ describe('Parser', () => { expect(() => parseSchemaCoordinate('MyType.field(arg: value)')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected ")", found Name "value".', - locations: [{ line: 1, column: 19 }], + message: 'Syntax Error: Invalid character: " ".', + locations: [{ line: 1, column: 18 }], }); }); @@ -794,9 +794,15 @@ describe('Parser', () => { expect(() => parseSchemaCoordinate('@myDirective.field')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected , found ".".', + message: 'Syntax Error: Expected , found ..', locations: [{ line: 1, column: 13 }], }); }); + + it('accepts a Source object', () => { + expect(parseSchemaCoordinate('MyType')).to.deep.equal( + parseSchemaCoordinate(new Source('MyType')), + ); + }); }); }); diff --git a/src/language/__tests__/printer-test.ts b/src/language/__tests__/printer-test.ts index 589d9bfc8d..a7a604bcba 100644 --- a/src/language/__tests__/printer-test.ts +++ b/src/language/__tests__/printer-test.ts @@ -301,16 +301,24 @@ describe('Printer: Query document', () => { }); it('prints schema coordinates', () => { - expect(print(parseSchemaCoordinate(' Name '))).to.equal('Name'); - expect(print(parseSchemaCoordinate(' Name . field '))).to.equal( - 'Name.field', - ); - expect(print(parseSchemaCoordinate(' Name . field ( arg: )'))).to.equal( + expect(print(parseSchemaCoordinate('Name'))).to.equal('Name'); + expect(print(parseSchemaCoordinate('Name.field'))).to.equal('Name.field'); + expect(print(parseSchemaCoordinate('Name.field(arg:)'))).to.equal( 'Name.field(arg:)', ); - expect(print(parseSchemaCoordinate(' @ name '))).to.equal('@name'); - expect(print(parseSchemaCoordinate(' @ name (arg:) '))).to.equal( - '@name(arg:)', + expect(print(parseSchemaCoordinate('@name'))).to.equal('@name'); + expect(print(parseSchemaCoordinate('@name(arg:)'))).to.equal('@name(arg:)'); + }); + + it('throws syntax error for ignored tokens in schema coordinates', () => { + expect(() => print(parseSchemaCoordinate('# foo\nName'))).to.throw( + 'Syntax Error: Invalid character: "#"', + ); + expect(() => print(parseSchemaCoordinate('\nName'))).to.throw( + 'Syntax Error: Invalid character: U+000A.', + ); + expect(() => print(parseSchemaCoordinate('Name .field'))).to.throw( + 'Syntax Error: Invalid character: " "', ); }); }); diff --git a/src/language/__tests__/schemaCoordinateLexer-test.ts b/src/language/__tests__/schemaCoordinateLexer-test.ts new file mode 100644 index 0000000000..ed380dee72 --- /dev/null +++ b/src/language/__tests__/schemaCoordinateLexer-test.ts @@ -0,0 +1,48 @@ +import { expect } from 'chai'; +import { describe, it } from 'mocha'; + +import { expectToThrowJSON } from '../../__testUtils__/expectJSON.js'; + +import { SchemaCoordinateLexer } from '../schemaCoordinateLexer.js'; +import { Source } from '../source.js'; +import { TokenKind } from '../tokenKind.js'; + +function lexOne(str: string) { + const lexer = new SchemaCoordinateLexer(new Source(str)); + return lexer.advance(); +} + +function lexSecond(str: string) { + const lexer = new SchemaCoordinateLexer(new Source(str)); + lexer.advance(); + return lexer.advance(); +} + +function expectSyntaxError(text: string) { + return expectToThrowJSON(() => lexSecond(text)); +} + +describe('SchemaCoordinateLexer', () => { + it('can be stringified', () => { + const lexer = new SchemaCoordinateLexer(new Source('Name.field')); + expect(Object.prototype.toString.call(lexer)).to.equal( + '[object SchemaCoordinateLexer]', + ); + }); + + it('ignores BOM header', () => { + expect(lexOne('\uFEFFfoo')).to.contain({ + kind: TokenKind.NAME, + start: 1, + end: 4, + value: 'foo', + }); + }); + + it('lex reports a useful syntax errors', () => { + expectSyntaxError('Foo .bar').to.deep.equal({ + message: 'Syntax Error: Invalid character: " ".', + locations: [{ line: 1, column: 4 }], + }); + }); +}); diff --git a/src/language/lexer.ts b/src/language/lexer.ts index 44abc05197..3709636e58 100644 --- a/src/language/lexer.ts +++ b/src/language/lexer.ts @@ -6,6 +6,21 @@ import { isDigit, isNameContinue, isNameStart } from './characterClasses.js'; import type { Source } from './source.js'; import { TokenKind } from './tokenKind.js'; +/** + * Parser supports parsing multiple Source types, which may have differing + * Lexer classes. This is used for schema coordinates which has its own distinct + * SchemaCoordinateLexer class. + */ +export interface LexerInterface { + source: Source; + lastToken: Token; + token: Token; + line: number; + lineStart: number; + advance: () => Token; + lookahead: () => Token; +} + /** * Given a Source object, creates a Lexer for that source. * A Lexer is a stateful stream generator in that every time @@ -14,7 +29,7 @@ import { TokenKind } from './tokenKind.js'; * EOF, after which the lexer will repeatedly return the same EOF token * whenever called. */ -export class Lexer { +export class Lexer implements LexerInterface { source: Source; /** @@ -95,7 +110,6 @@ export function isPunctuatorTokenKind(kind: TokenKind): boolean { kind === TokenKind.AMP || kind === TokenKind.PAREN_L || kind === TokenKind.PAREN_R || - kind === TokenKind.DOT || kind === TokenKind.SPREAD || kind === TokenKind.COLON || kind === TokenKind.EQUALS || @@ -151,8 +165,13 @@ function isTrailingSurrogate(code: number): boolean { * * Printable ASCII is printed quoted, while other points are printed in Unicode * code point form (ie. U+1234). + * + * @internal */ -function printCodePointAt(lexer: Lexer, location: number): string { +export function printCodePointAt( + lexer: LexerInterface, + location: number, +): string { const code = lexer.source.body.codePointAt(location); if (code === undefined) { @@ -169,9 +188,11 @@ function printCodePointAt(lexer: Lexer, location: number): string { /** * Create a token with line and column location information. + * + * @internal */ -function createToken( - lexer: Lexer, +export function createToken( + lexer: LexerInterface, kind: TokenKind, start: number, end: number, @@ -247,11 +268,7 @@ function readNextToken(lexer: Lexer, start: number): Token { // - FloatValue // - StringValue // - // Punctuator :: - // - DotPunctuator - // - OtherPunctuator - // - // OtherPunctuator :: one of ! $ & ( ) ... : = @ [ ] { | } + // Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | } case 0x0021: // ! return createToken(lexer, TokenKind.BANG, position, position + 1); case 0x0024: // $ @@ -268,7 +285,24 @@ function readNextToken(lexer: Lexer, start: number): Token { if (nextCode === 0x002e && body.charCodeAt(position + 2) === 0x002e) { return createToken(lexer, TokenKind.SPREAD, position, position + 3); } - return readDot(lexer, position); + if (nextCode === 0x002e) { + throw syntaxError( + lexer.source, + position, + 'Unexpected "..", did you mean "..."?', + ); + } else if (isDigit(nextCode)) { + const digits = lexer.source.body.slice( + position + 1, + readDigits(lexer, position + 1, nextCode), + ); + throw syntaxError( + lexer.source, + position, + `Invalid number, expected digit before ".", did you mean "0.${digits}"?`, + ); + } + break; } case 0x003a: // : return createToken(lexer, TokenKind.COLON, position, position + 1); @@ -321,35 +355,6 @@ function readNextToken(lexer: Lexer, start: number): Token { return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); } -/** - * Reads a dot token with helpful messages for negative lookahead. - * - * DotPunctuator :: `.` [lookahead != {`.`, Digit}] - */ -function readDot(lexer: Lexer, start: number): Token { - const nextCode = lexer.source.body.charCodeAt(start + 1); - // Full Stop (.) - if (nextCode === 0x002e) { - throw syntaxError( - lexer.source, - start, - 'Unexpected "..", did you mean "..."?', - ); - } - if (isDigit(nextCode)) { - const digits = lexer.source.body.slice( - start + 1, - readDigits(lexer, start + 1, nextCode), - ); - throw syntaxError( - lexer.source, - start, - `Invalid number, expected digit before ".", did you mean "0.${digits}"?`, - ); - } - return createToken(lexer, TokenKind.DOT, start, start + 1); -} - /** * Reads a comment token from the source file. * @@ -863,8 +868,10 @@ function readBlockString(lexer: Lexer, start: number): Token { * Name :: * - NameStart NameContinue* [lookahead != NameContinue] * ``` + * + * @internal */ -function readName(lexer: Lexer, start: number): Token { +export function readName(lexer: LexerInterface, start: number): Token { const body = lexer.source.body; const bodyLength = body.length; let position = start + 1; diff --git a/src/language/parser.ts b/src/language/parser.ts index 5acfb4e85d..618fa9753a 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -70,7 +70,9 @@ import type { import { Location, OperationTypeNode } from './ast.js'; import { DirectiveLocation } from './directiveLocation.js'; import { Kind } from './kinds.js'; +import type { LexerInterface } from './lexer.js'; import { isPunctuatorTokenKind, Lexer } from './lexer.js'; +import { SchemaCoordinateLexer } from './schemaCoordinateLexer.js'; import { isSource, Source } from './source.js'; import { TokenKind } from './tokenKind.js'; @@ -114,6 +116,12 @@ export interface ParseOptions { * ``` */ experimentalFragmentArguments?: boolean | undefined; + + /** + * You may override the Lexer class used to lex the source; this is used by + * schema coordinates to introduce a lexer with a resticted syntax. + */ + lexer?: LexerInterface | undefined; } /** @@ -199,9 +207,10 @@ export function parseType( */ export function parseSchemaCoordinate( source: string | Source, - options?: ParseOptions, ): SchemaCoordinateNode { - const parser = new Parser(source, options); + const sourceObj = isSource(source) ? source : new Source(source); + const lexer = new SchemaCoordinateLexer(sourceObj); + const parser = new Parser(source, { lexer }); parser.expectToken(TokenKind.SOF); const coordinate = parser.parseSchemaCoordinate(); parser.expectToken(TokenKind.EOF); @@ -220,15 +229,21 @@ export function parseSchemaCoordinate( * @internal */ export class Parser { - protected _options: ParseOptions; - protected _lexer: Lexer; + protected _options: Omit; + protected _lexer: LexerInterface; protected _tokenCounter: number; constructor(source: string | Source, options: ParseOptions = {}) { - const sourceObj = isSource(source) ? source : new Source(source); + const { lexer, ..._options } = options; + + if (lexer) { + this._lexer = lexer; + } else { + const sourceObj = isSource(source) ? source : new Source(source); + this._lexer = new Lexer(sourceObj); + } - this._lexer = new Lexer(sourceObj); - this._options = options; + this._options = _options; this._tokenCounter = 0; } diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts new file mode 100644 index 0000000000..d23a537cf3 --- /dev/null +++ b/src/language/schemaCoordinateLexer.ts @@ -0,0 +1,128 @@ +import { syntaxError } from '../error/syntaxError.js'; + +import { Token } from './ast.js'; +import { isNameStart } from './characterClasses.js'; +import type { LexerInterface } from './lexer.js'; +import { createToken, printCodePointAt, readName } from './lexer.js'; +import type { Source } from './source.js'; +import { TokenKind } from './tokenKind.js'; + +/** + * Given a Source schema coordinate, creates a Lexer for that source. + * A SchemaCoordinateLexer is a stateful stream generator in that every time + * it is advanced, it returns the next token in the Source. Assuming the + * source lexes, the final Token emitted by the lexer will be of kind + * EOF, after which the lexer will repeatedly return the same EOF token + * whenever called. + */ +export class SchemaCoordinateLexer implements LexerInterface { + source: Source; + + /** + * The previously focused non-ignored token. + */ + lastToken: Token; + + /** + * The currently focused non-ignored token. + */ + token: Token; + + /** + * The (1-indexed) line containing the current token. + * Since a schema coordinate may not contain newline, this value is always 1. + */ + line: 1 = 1 as const; + + /** + * The character offset at which the current line begins. + * Since a schema coordinate may not contain newline, this value is always 0. + */ + lineStart: 0 = 0 as const; + + constructor(source: Source) { + const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); + + this.source = source; + this.lastToken = startOfFileToken; + this.token = startOfFileToken; + } + + get [Symbol.toStringTag]() { + return 'SchemaCoordinateLexer'; + } + + /** + * Advances the token stream to the next non-ignored token. + */ + advance(): Token { + this.lastToken = this.token; + const token = (this.token = this.lookahead()); + return token; + } + + /** + * Looks ahead and returns the next non-ignored token, but does not change + * the current Lexer token. + */ + lookahead(): Token { + let token = this.token; + if (token.kind !== TokenKind.EOF) { + // Read the next token and form a link in the token linked-list. + const nextToken = readNextToken(this, token.end); + // @ts-expect-error next is only mutable during parsing. + token.next = nextToken; + // @ts-expect-error prev is only mutable during parsing. + nextToken.prev = token; + token = nextToken; + } + return token; + } +} + +/** + * Gets the next token from the source starting at the given position. + * + * This skips over whitespace until it finds the next lexable token, then lexes + * punctuators immediately or calls the appropriate helper function for more + * complicated tokens. + */ +function readNextToken(lexer: SchemaCoordinateLexer, start: number): Token { + const body = lexer.source.body; + const bodyLength = body.length; + let position = start; + + while (position < bodyLength) { + const code = body.charCodeAt(position); + + // SourceCharacter + switch (code) { + case 0xfeff: // + ++position; + continue; + case 0x002e: // . + return createToken(lexer, TokenKind.DOT, position, position + 1); + case 0x0028: // ( + return createToken(lexer, TokenKind.PAREN_L, position, position + 1); + case 0x0029: // ) + return createToken(lexer, TokenKind.PAREN_R, position, position + 1); + case 0x003a: // : + return createToken(lexer, TokenKind.COLON, position, position + 1); + case 0x0040: // @ + return createToken(lexer, TokenKind.AT, position, position + 1); + } + + // Name + if (isNameStart(code)) { + return readName(lexer, position); + } + + throw syntaxError( + lexer.source, + position, + `Invalid character: ${printCodePointAt(lexer, position)}.`, + ); + } + + return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); +} From cccea34a6343e37efc789a0bf96c037a18f7a5c4 Mon Sep 17 00:00:00 2001 From: Mark Larah Date: Sun, 27 Jul 2025 00:17:13 -0500 Subject: [PATCH 3/5] typo --- src/language/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/language/parser.ts b/src/language/parser.ts index 618fa9753a..369ec2bb02 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -119,7 +119,7 @@ export interface ParseOptions { /** * You may override the Lexer class used to lex the source; this is used by - * schema coordinates to introduce a lexer with a resticted syntax. + * schema coordinates to introduce a lexer with a restricted syntax. */ lexer?: LexerInterface | undefined; } From cd3e94fa412cf992e01138ace3542d3430e7fdec Mon Sep 17 00:00:00 2001 From: Mark Larah Date: Sun, 27 Jul 2025 01:42:32 -0500 Subject: [PATCH 4/5] add back old SchemaCoordinateLexer tests --- .../__tests__/schemaCoordinateLexer-test.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/language/__tests__/schemaCoordinateLexer-test.ts b/src/language/__tests__/schemaCoordinateLexer-test.ts index ed380dee72..33f4111f9b 100644 --- a/src/language/__tests__/schemaCoordinateLexer-test.ts +++ b/src/language/__tests__/schemaCoordinateLexer-test.ts @@ -30,6 +30,24 @@ describe('SchemaCoordinateLexer', () => { ); }); + it('tracks a schema coordinate', () => { + const lexer = new SchemaCoordinateLexer(new Source('Name.field')); + expect(lexer.advance()).to.contain({ + kind: TokenKind.NAME, + start: 0, + end: 4, + value: 'Name', + }); + }); + + it('forbids ignored tokens', () => { + const lexer = new SchemaCoordinateLexer(new Source('\nName.field')); + expectToThrowJSON(() => lexer.advance()).to.deep.equal({ + message: 'Syntax Error: Invalid character: U+000A.', + locations: [{ line: 1, column: 1 }], + }); + }); + it('ignores BOM header', () => { expect(lexOne('\uFEFFfoo')).to.contain({ kind: TokenKind.NAME, From ac2d11894026d79138939979ab65a649a4677c32 Mon Sep 17 00:00:00 2001 From: Mark Larah Date: Thu, 7 Aug 2025 12:09:51 -0500 Subject: [PATCH 5/5] don't parse BOM --- .../__tests__/schemaCoordinateLexer-test.ts | 14 -------------- src/language/schemaCoordinateLexer.ts | 8 ++------ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/src/language/__tests__/schemaCoordinateLexer-test.ts b/src/language/__tests__/schemaCoordinateLexer-test.ts index 33f4111f9b..1851e227f1 100644 --- a/src/language/__tests__/schemaCoordinateLexer-test.ts +++ b/src/language/__tests__/schemaCoordinateLexer-test.ts @@ -7,11 +7,6 @@ import { SchemaCoordinateLexer } from '../schemaCoordinateLexer.js'; import { Source } from '../source.js'; import { TokenKind } from '../tokenKind.js'; -function lexOne(str: string) { - const lexer = new SchemaCoordinateLexer(new Source(str)); - return lexer.advance(); -} - function lexSecond(str: string) { const lexer = new SchemaCoordinateLexer(new Source(str)); lexer.advance(); @@ -48,15 +43,6 @@ describe('SchemaCoordinateLexer', () => { }); }); - it('ignores BOM header', () => { - expect(lexOne('\uFEFFfoo')).to.contain({ - kind: TokenKind.NAME, - start: 1, - end: 4, - value: 'foo', - }); - }); - it('lex reports a useful syntax errors', () => { expectSyntaxError('Foo .bar').to.deep.equal({ message: 'Syntax Error: Invalid character: " ".', diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts index d23a537cf3..06be90ec0f 100644 --- a/src/language/schemaCoordinateLexer.ts +++ b/src/language/schemaCoordinateLexer.ts @@ -90,16 +90,12 @@ export class SchemaCoordinateLexer implements LexerInterface { function readNextToken(lexer: SchemaCoordinateLexer, start: number): Token { const body = lexer.source.body; const bodyLength = body.length; - let position = start; + const position = start; - while (position < bodyLength) { + if (position < bodyLength) { const code = body.charCodeAt(position); - // SourceCharacter switch (code) { - case 0xfeff: // - ++position; - continue; case 0x002e: // . return createToken(lexer, TokenKind.DOT, position, position + 1); case 0x0028: // (