Skip to content

Commit 2070905

Browse files
committed
Initial work on tokenizer enhancements.
1 parent 4814107 commit 2070905

File tree

2 files changed

+117
-25
lines changed

2 files changed

+117
-25
lines changed

src/parser/statementParser.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,13 @@ enum STORAGE_MODIFIERS {
5252
STATIC = 'static',
5353
NEW = 'new',
5454
LITERAL = 'literal',
55+
FINAL = 'final',
5556
}
5657

5758
enum ACCESS_MODIFIERS {
5859
PUBLIC = 'public',
5960
PRIVATE = 'private',
61+
PROTECTED = 'protected',
6062
}
6163

6264
enum STATEMENT_KEYWORD {

src/parser/tokenizer.ts

Lines changed: 115 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ export class Token {
4545
return this.type === Type.Alphanumeric;
4646
}
4747
isNumeric() {
48-
return this.type === Type.Numeric;
48+
return this.type === Type.Integer;
4949
}
5050
isLineComment() {
5151
return this.type === Type.LineComment;
@@ -268,7 +268,7 @@ class Tokenizer {
268268

269269
parseCharacter(char: string): boolean {
270270
if (this.tokenType === Type.Alphanumeric) {
271-
if (this.charType === Type.Alphanumeric || this.charType === Type.Numeric) {
271+
if (this.charType === Type.Alphanumeric || this.charType === Type.Integer) {
272272
this.tokenValue = this.tokenValue + char;
273273
this.parsed = true;
274274
this.documentColumn++;
@@ -277,8 +277,8 @@ class Tokenizer {
277277
this.finalizeToken(this.charType);
278278
return true;
279279
}
280-
} else if (this.tokenType === Type.Numeric) {
281-
if (this.charType === Type.Numeric) {
280+
} else if (this.tokenType === Type.Integer) {
281+
if (this.charType === Type.Integer) {
282282
this.tokenValue = this.tokenValue + char;
283283
this.parsed = true;
284284
this.documentColumn++;
@@ -400,13 +400,13 @@ class Tokenizer {
400400
this.documentColumn = 0;
401401
this.finalizeToken(0);
402402
return true;
403-
} else if (this.tokenType > 10) { // all other token types
403+
} else if (this.tokenType === -1) { // undefined
404404
this.tokenValue = this.tokenValue + char;
405405
this.parsed = true;
406406
this.documentColumn++;
407407
this.finalizeToken(0);
408408
return true;
409-
} else if (this.tokenType === -1) { // undefined
409+
} else if (this.tokenType >= 0) { // all other token types
410410
this.tokenValue = this.tokenValue + char;
411411
this.parsed = true;
412412
this.documentColumn++;
@@ -415,7 +415,6 @@ class Tokenizer {
415415
}
416416
return false;
417417
}
418-
419418
finalizeToken(newType: number): void {
420419
this.token = new Token(this.tokenType, this.tokenValue, this.tokenPosition);
421420
this.tokenType = newType;
@@ -426,19 +425,27 @@ class Tokenizer {
426425

427426
function getType(c: string): Type {
428427
const charCode: number = c.charCodeAt(0);
428+
429+
if (charCode === 12313) {
430+
return Type.Alphanumeric;
431+
if (charCode === 9) {
432+
return Type.Tab;
433+
} else if (charCode === 10) {
434+
return Type.LineFeed;
435+
}
429436
// Find a better way to incorporate the %
430437
if (charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122 || charCode === 37) {
431438
return Type.Alphanumeric;
432439
} else if (charCode >= 48 && charCode <= 57) {
433-
return Type.Numeric;
440+
return Type.Integer;
434441
} else if (charCode === 34) {
435442
return Type.DoubleQuotes;
436443
} else if (charCode === 47) {
437444
return Type.Slash;
438445
} else if (charCode === 9) {
439446
return Type.Tab;
440447
} else if (charCode === 10) {
441-
return Type.NewLine;
448+
return Type.LineFeed;
442449
} else if (charCode === 32) {
443450
return Type.Space;
444451
} else if (charCode === 33) {
@@ -508,24 +515,107 @@ function getType(c: string): Type {
508515
}
509516

510517
export const enum Type {
511-
Alphanumeric = 1,
512-
Numeric = 2,
513-
LineComment = 3,
514-
BlockComment = 4,
515-
String = 5,
516-
LineCommentInit = 6,
517-
BlockCommentInit = 7,
518-
BlockCommentTerm = 8,
519-
DoubleQuotes = 9,
520-
Slash = 10,
521-
522-
Tab = 11,
523-
NewLine = 13,
518+
// Special case for an undefined token.
519+
Undefined = -1,
520+
521+
// Other negative numbers are reserved for composite tokens.
522+
523+
//TODO: reg for Alphanumeric
524+
/*
525+
Expression: RegExp('') >> [%A-Za-z][A-Za-z0-9]* A-Z any Alpha character defined in unicode
526+
Examples:
527+
- %, %1, %A, %a, az12, a12, é12, ú13, josé
528+
*/
529+
//
530+
Alphanumeric = -2,
531+
532+
/*
533+
Expression: RegExp('^[0-9]*\.?[0-9]+$')
534+
Examples:
535+
- 0.5
536+
- .1
537+
- 10
538+
- 10.56
539+
*/
540+
Number = -3,
541+
542+
/*
543+
Expression: RegExp('')
544+
Examples:
545+
- //
546+
*/
547+
LineCommentInitPSL = -10,
548+
549+
/*
550+
Expression: RegExp('') TODO up to end of line
551+
Examples:
552+
- // Line comment
553+
- //Line comment
554+
*/
555+
LineCommentPSL = -4,
556+
557+
/*
558+
Expression: RegExp('') TODO up to end of line
559+
Examples:
560+
- ;Line comment
561+
- ; Line comment
562+
*/
563+
LineCommentMUMPS = -5,
564+
565+
/*
566+
Expression: RegExp('') TODO
567+
Examples:
568+
- /*
569+
*/
570+
BlockCommentInit = -6,
571+
572+
/*
573+
Expression: RegExp('') TODO
574+
Examples:
575+
- *\/
576+
Note: '/' is escaped to prevent problems with terminator of
577+
this block comment.
578+
*/
579+
BlockCommentTerm = -7,
580+
581+
/*
582+
Expression: Everything between BlockCommentInit and BlockCommentTerm
583+
Examples:
584+
- /* My block comment *\/
585+
- /*
586+
My block comment
587+
*\/
588+
Note: '/' is escaped to prevent problems with terminator of
589+
this block comment.
590+
*/
591+
BlockComment = -8,
592+
593+
/*
594+
Expression: RegExp('')
595+
Examples:
596+
- "Anything between double quotes"
597+
*/
598+
String = -9,
599+
600+
/*
601+
Expression: RegExp('')
602+
Examples:
603+
- LineFeed
604+
- CarriageReturn
605+
- CarriageReturn LineFeed
606+
*/
607+
EndOfLine = -12,
608+
609+
// Non-negative numbers are reserved for unicode codepoints.
610+
Tab = 9,
611+
LineFeed = 10,
612+
CarriageReturn = 13,
524613
Space = 32,
525614
ExclamationMark = 33,
615+
DoubleQuotes = 34,
526616
NumberSign = 35,
527617
DollarSign = 36,
528-
// PercentSign = 37,
618+
PercentSign = 37,
529619
Ampersand = 38,
530620
SingleQuote = 39,
531621
OpenParen = 40,
@@ -535,6 +625,7 @@ export const enum Type {
535625
Comma = 44,
536626
MinusSign = 45,
537627
Period = 46,
628+
ForwardSlash = 47,
538629
Colon = 58,
539630
SemiColon = 59,
540631
LessThan = 60,
@@ -553,5 +644,4 @@ export const enum Type {
553644
CloseBrace = 125,
554645
Tilde = 126,
555646

556-
Undefined = -1,
557-
}
647+
}

0 commit comments

Comments
 (0)