Skip to content

Commit eb6f0f1

Browse files
committed
Support unicode bytes in string literals.
1 parent 431958d commit eb6f0f1

File tree

2 files changed

+76
-45
lines changed

2 files changed

+76
-45
lines changed

UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
2-
Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]).
3-
Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file.
4-
*/
2+
Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]).
3+
Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Lexer.g4
4+
*/
55

66
lexer grammar UnrealAngelscriptLexer;
77

@@ -17,12 +17,12 @@ FloatingLiteral:
1717
Fractionalconstant Exponentpart? Floatingsuffix?
1818
| Digitsequence Exponentpart Floatingsuffix?;
1919

20-
StringLiteral:
21-
'"""' .*? '"""'
22-
| ('n' | 'f')? '"' (
23-
~["\\\u0085\u2028\u2029]
24-
| Escapesequence
25-
)* '"';
20+
// UnrealAngelscript string literals
21+
// https://angelscript.hazelight.se/scripting/fname-literals/
22+
// https://angelscript.hazelight.se/scripting/format-strings/
23+
fragment Angelscriptstringprefix: 'n' | 'f';
24+
25+
StringLiteral: (Encodingprefix | Angelscriptstringprefix)? (Rawstring | '"' Schar* '"');
2626

2727
BooleanLiteral: False | True;
2828

@@ -32,54 +32,75 @@ UserDefinedLiteral:
3232
| UserDefinedStringLiteral
3333
| UserDefinedCharacterLiteral;
3434

35-
/*Angelscript*/
35+
/*
36+
Angelscript reserved keywords
37+
https://www.angelcode.com/angelscript/sdk/docs/manual/doc_reserved_keywords.html
38+
*/
3639

37-
Cast: 'Cast';
40+
Abstract: 'abstract';
3841

39-
UClass: 'UCLASS';
42+
Cast: 'cast';
4043

41-
UStruct: 'USTRUCT';
44+
External: 'external';
4245

43-
UProperty: 'UPROPERTY';
46+
Int: 'int';
4447

45-
UFunction: 'UFUNCTION';
48+
Int8: 'int8';
4649

47-
UEnum: 'UENUM';
50+
Int16: 'int16';
4851

49-
UMeta: 'UMETA';
52+
Int32: 'int32';
5053

51-
Import: 'import';
54+
Int64: 'int64';
5255

53-
From: 'from';
56+
Mixin: 'mixin';
5457

55-
Out: 'out';
58+
Null: 'null';
5659

5760
Property: 'property';
5861

59-
Ensure: 'ensure';
60-
61-
EnsureAlways: 'ensureAlways';
62-
63-
Check: 'check';
64-
65-
Mixin: 'mixin';
62+
Super: 'super';
6663

67-
Int: 'int';
68-
Int8: 'int8';
69-
Int16: 'int16';
70-
Int32: 'int32';
71-
Int64: 'int64';
7264
UInt: 'uint';
65+
7366
UInt8: 'uint8';
67+
7468
UInt16: 'uint16';
69+
7570
UInt32: 'uint32';
71+
7672
UInt64: 'uint64';
73+
7774
Float: 'float';
75+
7876
Float32: 'float32';
77+
7978
Float64: 'float64';
79+
8080
Double: 'double';
81+
8182
Bool: 'bool';
8283

84+
/* UnrealAngelscript */
85+
86+
UClass: 'UCLASS';
87+
88+
UStruct: 'USTRUCT';
89+
90+
UProperty: 'UPROPERTY';
91+
92+
UFunction: 'UFUNCTION';
93+
94+
UEnum: 'UENUM';
95+
96+
UMeta: 'UMETA';
97+
98+
Ensure: 'ensure';
99+
100+
EnsureAlways: 'ensureAlways';
101+
102+
Check: 'check';
103+
83104
/*Keywords*/
84105

85106
Auto: 'auto';
@@ -156,6 +177,8 @@ This: 'this';
156177

157178
True: 'true';
158179

180+
Typedef: 'typedef';
181+
159182
Virtual: 'virtual';
160183

161184
Void: 'void';
@@ -250,6 +273,10 @@ Semi: ';';
250273

251274
Dot: '.';
252275

276+
fragment Hexquad: HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT;
277+
278+
fragment Universalcharactername: '\\u' Hexquad | '\\U' Hexquad Hexquad;
279+
253280
Identifier:
254281
/*
255282
Identifiernondigit | Identifier Identifiernondigit | Identifier DIGIT
@@ -266,9 +293,7 @@ DecimalLiteral: NONZERODIGIT ('\''? DIGIT)*;
266293

267294
OctalLiteral: '0' ('\''? OCTALDIGIT)*;
268295

269-
HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT (
270-
'\''? HEXADECIMALDIGIT
271-
)*;
296+
HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT ( '\''? HEXADECIMALDIGIT)*;
272297

273298
BinaryLiteral: ('0b' | '0B') BINARYDIGIT ('\''? BINARYDIGIT)*;
274299

@@ -292,12 +317,9 @@ fragment Longsuffix: [lL];
292317

293318
fragment Longlongsuffix: 'll' | 'LL';
294319

295-
fragment Cchar: ~ ['\\\r\n] | Escapesequence;
320+
fragment Cchar: ~ ['\\\r\n] | Escapesequence | Universalcharactername;
296321
297-
fragment Escapesequence:
298-
Simpleescapesequence
299-
| Octalescapesequence
300-
| Hexadecimalescapesequence;
322+
fragment Escapesequence: Simpleescapesequence | Octalescapesequence | Hexadecimalescapesequence;
301323
302324
fragment Simpleescapesequence:
303325
'\\\''
@@ -316,7 +338,8 @@ fragment Simpleescapesequence:
316338
fragment Octalescapesequence:
317339
'\\' OCTALDIGIT
318340
| '\\' OCTALDIGIT OCTALDIGIT
319-
| '\\' OCTALDIGIT OCTALDIGIT OCTALDIGIT;
341+
| '\\' OCTALDIGIT OCTALDIGIT OCTALDIGIT
342+
;
320343

321344
fragment Hexadecimalescapesequence: '\\x' HEXADECIMALDIGIT+;
322345

@@ -336,6 +359,10 @@ fragment Floatingsuffix: [flFL];
336359

337360
fragment Encodingprefix: 'u8' | 'u' | 'U' | 'L';
338361

362+
fragment Schar: ~ ["\\\r\n] | Escapesequence | Universalcharactername;
363+
364+
fragment Rawstring: 'R"' ( '\\' ["()] | ~[\r\n (])*? '(' ~[)]*? ')' ( '\\' ["()] | ~[\r\n "])*? '"';
365+
339366
UserDefinedIntegerLiteral:
340367
DecimalLiteral Udsuffix
341368
| OctalLiteral Udsuffix
@@ -355,7 +382,11 @@ fragment Udsuffix: Identifier;
355382
Whitespace: [ \t]+ -> skip;
356383
357384
Newline: ('\r' '\n'? | '\n') -> skip;
385+
358386
BlockComment: '/*' .*? '*/' -> skip;
387+
359388
LineComment: '//' ~ [\r\n]* -> skip;
389+
360390
PreprocessorBranchRemoval: '#else' .*? '#endif' -> skip;
361-
Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip;
391+
392+
Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip;

UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
*/
2222

2323
/*
24-
Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]).
25-
Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file.
26-
*/
24+
Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]).
25+
Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Parser.g4
26+
*/
2727

2828
parser grammar UnrealAngelscriptParser;
2929
options {

0 commit comments

Comments
 (0)