Skip to content

Commit 0ae1f97

Browse files
committed
Support unicode bytes in string literals.
1 parent 431958d commit 0ae1f97

File tree

2 files changed

+71
-47
lines changed

2 files changed

+71
-47
lines changed

UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4

Lines changed: 68 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
2-
Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]).
3-
Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file.
4-
*/
2+
Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]).
3+
Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Lexer.g4
4+
*/
55

66
lexer grammar UnrealAngelscriptLexer;
77

@@ -17,12 +17,12 @@ FloatingLiteral:
1717
Fractionalconstant Exponentpart? Floatingsuffix?
1818
| Digitsequence Exponentpart Floatingsuffix?;
1919

20-
StringLiteral:
21-
'"""' .*? '"""'
22-
| ('n' | 'f')? '"' (
23-
~["\\\u0085\u2028\u2029]
24-
| Escapesequence
25-
)* '"';
20+
// UnrealAngelscript string literals
21+
// https://angelscript.hazelight.se/scripting/fname-literals/
22+
// https://angelscript.hazelight.se/scripting/format-strings/
23+
fragment Angelscriptstringprefix: 'n' | 'f';
24+
25+
StringLiteral: (Encodingprefix | Angelscriptstringprefix)? (Rawstring | '"' Schar* '"');
2626

2727
BooleanLiteral: False | True;
2828

@@ -32,54 +32,69 @@ UserDefinedLiteral:
3232
| UserDefinedStringLiteral
3333
| UserDefinedCharacterLiteral;
3434

35-
/*Angelscript*/
36-
37-
Cast: 'Cast';
38-
39-
UClass: 'UCLASS';
40-
41-
UStruct: 'USTRUCT';
42-
43-
UProperty: 'UPROPERTY';
44-
45-
UFunction: 'UFUNCTION';
46-
47-
UEnum: 'UENUM';
35+
/*
36+
Angelscript reserved keywords
37+
https://www.angelcode.com/angelscript/sdk/docs/manual/doc_reserved_keywords.html
38+
*/
4839

49-
UMeta: 'UMETA';
40+
Cast: 'cast';
5041

5142
Import: 'import';
5243

53-
From: 'from';
54-
55-
Out: 'out';
44+
Int: 'int';
5645

57-
Property: 'property';
46+
Int8: 'int8';
5847

59-
Ensure: 'ensure';
48+
Int16: 'int16';
6049

61-
EnsureAlways: 'ensureAlways';
50+
Int32: 'int32';
6251

63-
Check: 'check';
52+
Int64: 'int64';
6453

6554
Mixin: 'mixin';
6655

67-
Int: 'int';
68-
Int8: 'int8';
69-
Int16: 'int16';
70-
Int32: 'int32';
71-
Int64: 'int64';
56+
Property: 'property';
57+
7258
UInt: 'uint';
59+
7360
UInt8: 'uint8';
61+
7462
UInt16: 'uint16';
63+
7564
UInt32: 'uint32';
65+
7666
UInt64: 'uint64';
67+
7768
Float: 'float';
69+
7870
Float32: 'float32';
71+
7972
Float64: 'float64';
73+
8074
Double: 'double';
75+
8176
Bool: 'bool';
8277

78+
/* UnrealAngelscript */
79+
80+
UClass: 'UCLASS';
81+
82+
UStruct: 'USTRUCT';
83+
84+
UProperty: 'UPROPERTY';
85+
86+
UFunction: 'UFUNCTION';
87+
88+
UEnum: 'UENUM';
89+
90+
UMeta: 'UMETA';
91+
92+
Ensure: 'ensure';
93+
94+
EnsureAlways: 'ensureAlways';
95+
96+
Check: 'check';
97+
8398
/*Keywords*/
8499

85100
Auto: 'auto';
@@ -156,6 +171,8 @@ This: 'this';
156171

157172
True: 'true';
158173

174+
Typedef: 'typedef';
175+
159176
Virtual: 'virtual';
160177

161178
Void: 'void';
@@ -250,6 +267,10 @@ Semi: ';';
250267

251268
Dot: '.';
252269

270+
fragment Hexquad: HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT;
271+
272+
fragment Universalcharactername: '\\u' Hexquad | '\\U' Hexquad Hexquad;
273+
253274
Identifier:
254275
/*
255276
Identifiernondigit | Identifier Identifiernondigit | Identifier DIGIT
@@ -266,9 +287,7 @@ DecimalLiteral: NONZERODIGIT ('\''? DIGIT)*;
266287

267288
OctalLiteral: '0' ('\''? OCTALDIGIT)*;
268289

269-
HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT (
270-
'\''? HEXADECIMALDIGIT
271-
)*;
290+
HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT ( '\''? HEXADECIMALDIGIT)*;
272291

273292
BinaryLiteral: ('0b' | '0B') BINARYDIGIT ('\''? BINARYDIGIT)*;
274293

@@ -292,12 +311,9 @@ fragment Longsuffix: [lL];
292311

293312
fragment Longlongsuffix: 'll' | 'LL';
294313

295-
fragment Cchar: ~ ['\\\r\n] | Escapesequence;
314+
fragment Cchar: ~ ['\\\r\n] | Escapesequence | Universalcharactername;
296315
297-
fragment Escapesequence:
298-
Simpleescapesequence
299-
| Octalescapesequence
300-
| Hexadecimalescapesequence;
316+
fragment Escapesequence: Simpleescapesequence | Octalescapesequence | Hexadecimalescapesequence;
301317
302318
fragment Simpleescapesequence:
303319
'\\\''
@@ -336,6 +352,10 @@ fragment Floatingsuffix: [flFL];
336352

337353
fragment Encodingprefix: 'u8' | 'u' | 'U' | 'L';
338354

355+
fragment Schar: ~ ["\\\r\n] | Escapesequence | Universalcharactername;
356+
357+
fragment Rawstring: 'R"' ( '\\' ["()] | ~[\r\n (])*? '(' ~[)]*? ')' ( '\\' ["()] | ~[\r\n "])*? '"';
358+
339359
UserDefinedIntegerLiteral:
340360
DecimalLiteral Udsuffix
341361
| OctalLiteral Udsuffix
@@ -355,7 +375,11 @@ fragment Udsuffix: Identifier;
355375
Whitespace: [ \t]+ -> skip;
356376
357377
Newline: ('\r' '\n'? | '\n') -> skip;
378+
358379
BlockComment: '/*' .*? '*/' -> skip;
380+
359381
LineComment: '//' ~ [\r\n]* -> skip;
382+
360383
PreprocessorBranchRemoval: '#else' .*? '#endif' -> skip;
361-
Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip;
384+
385+
Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip;

UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
*/
2222

2323
/*
24-
Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]).
25-
Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file.
26-
*/
24+
Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]).
25+
Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Parser.g4
26+
*/
2727

2828
parser grammar UnrealAngelscriptParser;
2929
options {

0 commit comments

Comments
 (0)