Skip to content

Commit 4381b77

Browse files
authored
fix tokenizer (#18)
* fix tokenizer * update yarn lock * update node.js
1 parent e79a122 commit 4381b77

File tree

7 files changed

+4604
-443
lines changed

7 files changed

+4604
-443
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
strategy:
1616
matrix:
1717
os: [ubuntu-latest, windows-latest, macos-latest]
18-
node: [10, 12, 13]
18+
node: [10, 12, 14]
1919
steps:
2020
- name: Checkout
2121
uses: actions/checkout@v2

spec/syntax.ebnf

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@ Message ::= (Text? (Placeholder | Linked)? Text?)+;
1313
(* primitives *)
1414
Text ::= TextChar+;
1515
Placeholder ::= Named | List;
16-
Named ::= "%"? "{" Space? (Identifier) Space? "}";
16+
Modulo ::= "%";
17+
Named ::= Modulo? "{" Space? (Identifier) Space? "}";
1718
List ::= "{" Space? (Digits) Space? "}";
18-
Linked ::= "@" (LinkedDot LinkedModifier)? ":" LinkedRefer;
19+
Linked ::= "@" (LinkedModifier)? LinkedDelimiter LinkedRefer;
1920
LinkedRefer ::= "("? (LinkedKey | Placeholder) ")"?;
2021
LinkedKey ::= TextChar+;
22+
LinkedModifier ::= LinkedDot Identifier;
23+
LinkedDelimiter ::= ":";
2124
LinkedDot ::= ".";
22-
LinkedModifier ::= Identifier;
2325
2426
(* characters *)
2527
AnyChar ::= [#x0-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]; (* Unicode character *)

src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export { Path, PathValue } from './path'
2+
export { createCompiler, Compiler, CompileOptions } from './message/compiler'
23
export { PluralizationRule, LinkedModifiers } from './message/context'
34
export {
45
Locale,

src/message/tokenizer.ts

Lines changed: 143 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ export type Token = {
5454

5555
export type TokenizeContext = {
5656
currentType: TokenTypes
57-
currentValue: string | undefined | null // TODO: if dont' use, should be removed
57+
currentValue: string | undefined | null // TODO: if don't use, should be removed
5858
currentToken: Token | null
5959
offset: number
6060
startLoc: Position
@@ -65,6 +65,8 @@ export type TokenizeContext = {
6565
lastStartLoc: Position
6666
lastEndLoc: Position
6767
braceNest: number
68+
parenNest: number
69+
inLinked: boolean
6870
}
6971

7072
export type Tokenizer = Readonly<{
@@ -95,7 +97,9 @@ export function createTokenizer(source: string): Tokenizer {
9597
lastOffset: _initOffset,
9698
lastStartLoc: _initLoc,
9799
lastEndLoc: _initLoc,
98-
braceNest: 0
100+
braceNest: 0,
101+
parenNest: 0,
102+
inLinked: false
99103
}
100104

101105
const context = (): TokenizeContext => _context
@@ -119,6 +123,17 @@ export function createTokenizer(source: string): Tokenizer {
119123
return token
120124
}
121125

126+
const peekNewLines = (scnr: Scanner): void => {
127+
while (scnr.currentPeek() === NEW_LINE) {
128+
scnr.peek()
129+
}
130+
}
131+
132+
const skipNewLines = (scnr: Scanner): void => {
133+
peekNewLines(scnr)
134+
scnr.skipToPeek()
135+
}
136+
122137
const peekSpaces = (scnr: Scanner): string => {
123138
let buf = ''
124139
while (scnr.currentPeek() === SPACE || scnr.currentPeek() === NEW_LINE) {
@@ -182,7 +197,7 @@ export function createTokenizer(source: string): Tokenizer {
182197
return ret
183198
}
184199

185-
const isLinkedModifier = (
200+
const isLinkedModifierStart = (
186201
scnr: Scanner,
187202
context: TokenizeContext
188203
): boolean => {
@@ -195,7 +210,7 @@ export function createTokenizer(source: string): Tokenizer {
195210
return ret
196211
}
197212

198-
const isLinkedIdentifier = (
213+
const isLinkedReferStart = (
199214
scnr: Scanner,
200215
context: TokenizeContext
201216
): boolean => {
@@ -223,6 +238,7 @@ export function createTokenizer(source: string): Tokenizer {
223238
) {
224239
return false
225240
} else if (ch === NEW_LINE) {
241+
scnr.peek()
226242
return fn()
227243
} else {
228244
// other charactors
@@ -245,9 +261,7 @@ export function createTokenizer(source: string): Tokenizer {
245261
const { currentType } = context
246262
if (
247263
currentType === TokenTypes.BraceLeft ||
248-
currentType === TokenTypes.ParenLeft ||
249-
currentType === TokenTypes.LinkedDot ||
250-
currentType === TokenTypes.LinkedDelimiter
264+
currentType === TokenTypes.ParenLeft
251265
) {
252266
return false
253267
}
@@ -378,14 +392,18 @@ export function createTokenizer(source: string): Tokenizer {
378392
skipSpaces(scnr)
379393
let ch: string | undefined | null = ''
380394
let identifiers = ''
381-
const closure = (ch: string) => (ch !== TokenChars.BraceLeft && ch !== TokenChars.BraceRight)
395+
const closure = (ch: string) =>
396+
ch !== TokenChars.BraceLeft &&
397+
ch !== TokenChars.BraceRight &&
398+
ch !== SPACE &&
399+
ch !== NEW_LINE
382400
while ((ch = takeChar(scnr, closure))) {
383401
identifiers += ch
384402
}
385403
return identifiers
386404
}
387405

388-
const readLinkedModifierArg = (scnr: Scanner): string => {
406+
const readLinkedModifier = (scnr: Scanner): string => {
389407
let ch: string | undefined | null = ''
390408
let name = ''
391409
while ((ch = takeIdentifierChar(scnr))) {
@@ -394,10 +412,7 @@ export function createTokenizer(source: string): Tokenizer {
394412
return name
395413
}
396414

397-
const readLinkedIdentifier = (
398-
scnr: Scanner,
399-
context: TokenizeContext
400-
): string => {
415+
const readLinkedRefer = (scnr: Scanner, context: TokenizeContext): string => {
401416
const fn = (detect = false, useParentLeft = false, buf: string): string => {
402417
const ch = scnr.currentChar()
403418
if (
@@ -438,8 +453,11 @@ export function createTokenizer(source: string): Tokenizer {
438453
return plural
439454
}
440455

441-
const readToken = (scnr: Scanner, context: TokenizeContext): Token => {
442-
let token = { type: TokenTypes.EOF }
456+
const readTokenInPlaceholder = (
457+
scnr: Scanner,
458+
context: TokenizeContext
459+
): Token | null => {
460+
let token = null
443461
const ch = scnr.currentChar()
444462
switch (ch) {
445463
case TokenChars.BraceLeft:
@@ -453,18 +471,63 @@ export function createTokenizer(source: string): Tokenizer {
453471
token = getToken(context, TokenTypes.BraceRight, TokenChars.BraceRight)
454472
context.braceNest--
455473
context.braceNest > 0 && skipSpaces(scnr)
474+
if (context.inLinked && context.braceNest === 0) {
475+
context.inLinked = false
476+
}
456477
break
478+
default:
479+
let validNamedIdentifier = true
480+
let validListIdentifier = true
481+
if (isPluralStart(scnr)) {
482+
token = getToken(context, TokenTypes.Pipe, readPlural(scnr))
483+
// reset
484+
context.braceNest = 0
485+
context.parenNest = 0
486+
context.inLinked = false
487+
} else if (
488+
(validNamedIdentifier = isNamedIdentifierStart(scnr, context))
489+
) {
490+
token = getToken(context, TokenTypes.Named, readNamedIdentifier(scnr))
491+
skipSpaces(scnr)
492+
} else if (
493+
(validListIdentifier = isListIdentifierStart(scnr, context))
494+
) {
495+
token = getToken(context, TokenTypes.List, readListIdentifier(scnr))
496+
skipSpaces(scnr)
497+
} else if (!validNamedIdentifier && !validListIdentifier) {
498+
token = getToken(
499+
context,
500+
TokenTypes.InvalidPlace,
501+
readInvalidIdentifier(scnr)
502+
)
503+
skipSpaces(scnr)
504+
}
505+
break
506+
}
507+
return token
508+
}
509+
510+
const readTokenInLinked = (
511+
scnr: Scanner,
512+
context: TokenizeContext
513+
): Token | null => {
514+
let token = null
515+
const ch = scnr.currentChar()
516+
switch (ch) {
457517
case TokenChars.LinkedAlias:
458518
scnr.next()
459519
token = getToken(
460520
context,
461521
TokenTypes.LinkedAlias,
462522
TokenChars.LinkedAlias
463523
)
524+
context.inLinked = true
525+
skipNewLines(scnr)
464526
break
465527
case TokenChars.LinkedDot:
466528
scnr.next()
467529
token = getToken(context, TokenTypes.LinkedDot, TokenChars.LinkedDot)
530+
skipNewLines(scnr)
468531
break
469532
case TokenChars.LinkedDelimiter:
470533
scnr.next()
@@ -473,65 +536,94 @@ export function createTokenizer(source: string): Tokenizer {
473536
TokenTypes.LinkedDelimiter,
474537
TokenChars.LinkedDelimiter
475538
)
539+
skipNewLines(scnr)
476540
break
477541
case TokenChars.ParenLeft:
478542
scnr.next()
479543
token = getToken(context, TokenTypes.ParenLeft, TokenChars.ParenLeft)
544+
skipSpaces(scnr)
545+
context.parenNest++
480546
break
481547
case TokenChars.ParenRight:
482548
scnr.next()
483549
token = getToken(context, TokenTypes.ParenRight, TokenChars.ParenRight)
484-
break
485-
case TokenChars.Modulo:
486-
scnr.next()
487-
token = getToken(context, TokenTypes.Modulo, TokenChars.Modulo)
550+
context.parenNest--
551+
context.parenNest > 0 && skipSpaces(scnr)
552+
context.inLinked = false
488553
break
489554
default:
490-
let validNamedIdentifier = true
491-
let validListIdentifier = true
492555
if (isPluralStart(scnr)) {
493556
token = getToken(context, TokenTypes.Pipe, readPlural(scnr))
494-
context.braceNest = 0 // reset
495-
} else if (isTextStart(scnr, context)) {
496-
token = getToken(context, TokenTypes.Text, readText(scnr))
497-
} else if (
498-
(validNamedIdentifier = isNamedIdentifierStart(scnr, context))
499-
) {
500-
token = getToken(context, TokenTypes.Named, readNamedIdentifier(scnr))
501-
skipSpaces(scnr)
502-
} else if (
503-
(validListIdentifier = isListIdentifierStart(scnr, context))
504-
) {
505-
token = getToken(context, TokenTypes.List, readListIdentifier(scnr))
506-
skipSpaces(scnr)
507-
// } else if (!validNamedIdentifier && !validListIdentifier) {
508-
// token = getToken(
509-
// context,
510-
// TokenTypes.InvalidPlace,
511-
// readInvalidIdentifier(scnr)
512-
// )
513-
// skipSpaces(scnr)
514-
} else if (isLinkedModifier(scnr, context)) {
557+
// reset
558+
context.braceNest = 0
559+
context.parenNest = 0
560+
context.inLinked = false
561+
} else if (isLinkedModifierStart(scnr, context)) {
515562
token = getToken(
516563
context,
517564
TokenTypes.LinkedModifier,
518-
readLinkedModifierArg(scnr)
565+
readLinkedModifier(scnr)
519566
)
520-
} else if (isLinkedIdentifier(scnr, context)) {
567+
skipNewLines(scnr)
568+
} else if (isLinkedReferStart(scnr, context)) {
521569
if (ch === TokenChars.BraceLeft) {
522-
scnr.next()
523-
token = getToken(
524-
context,
525-
TokenTypes.BraceLeft,
526-
TokenChars.BraceLeft
527-
)
570+
// scan the placeholder
571+
token = readTokenInPlaceholder(scnr, context) || token
528572
} else {
529573
token = getToken(
530574
context,
531575
TokenTypes.LinkedKey,
532-
readLinkedIdentifier(scnr, context)
576+
readLinkedRefer(scnr, context)
533577
)
578+
if (context.parenNest === 0) {
579+
context.inLinked = false
580+
}
534581
}
582+
} else {
583+
context.braceNest = 0
584+
context.parenNest = 0
585+
context.inLinked = false
586+
token = readToken(scnr, context)
587+
}
588+
break
589+
}
590+
return token
591+
}
592+
593+
const readToken = (scnr: Scanner, context: TokenizeContext): Token => {
594+
let token = { type: TokenTypes.EOF }
595+
const ch = scnr.currentChar()
596+
597+
if (context.braceNest > 0) {
598+
return readTokenInPlaceholder(scnr, context) || token
599+
}
600+
601+
switch (ch) {
602+
case TokenChars.BraceLeft:
603+
token = readTokenInPlaceholder(scnr, context) || token
604+
break
605+
case TokenChars.LinkedAlias:
606+
token = readTokenInLinked(scnr, context) || token
607+
break
608+
case TokenChars.Modulo:
609+
scnr.next()
610+
token = getToken(context, TokenTypes.Modulo, TokenChars.Modulo)
611+
break
612+
default:
613+
if (isPluralStart(scnr)) {
614+
token = getToken(context, TokenTypes.Pipe, readPlural(scnr))
615+
// reset
616+
context.braceNest = 0
617+
context.parenNest = 0
618+
context.inLinked = false
619+
} else if (context.braceNest > 0) {
620+
// scan the placeholder
621+
token = readTokenInPlaceholder(scnr, context) || token
622+
} else if (context.inLinked) {
623+
// scan the linked
624+
token = readTokenInLinked(scnr, context) || token
625+
} else if (isTextStart(scnr, context)) {
626+
token = getToken(context, TokenTypes.Text, readText(scnr))
535627
}
536628
break
537629
}

0 commit comments

Comments
 (0)