@@ -147,22 +147,125 @@ package au.com.integradev.delphi.antlr;
147147 }
148148
149149 public static class LexerException extends RuntimeException {
150+ public LexerException(String message) {
151+ super(message);
152+ }
153+
150154 public LexerException(String message, Throwable cause) {
151155 super(message, cause);
152156 }
153157 }
154158
155- private int lookaheadMultilineString() {
156- int startQuotes = lookaheadSingleQuotes(1 );
157- if (startQuotes >= 3 && (startQuotes & 1 ) != 0 && isNewLine(input.LA(startQuotes + 1 ))) {
158- int i = startQuotes;
159+ private int lookaheadMultilineComment(String end) {
160+ return lookaheadMultilineComment(end, 1 );
161+ }
162+
163+ private int lookaheadMultilineComment(String end, int i) {
164+ char endStart = end.charAt(0 );
165+ String directiveName = null;
166+
167+ if (input.LA(i) == ' $' ) {
168+ StringBuilder directiveNameBuilder = new StringBuilder();
169+ int character = input.LA(i + 1 );
170+
171+ while ((character >= ' a' && character <= ' z' )
172+ || (character >= ' A' && character <= ' Z' )
173+ || Character.isDigit(character)
174+ || character == ' _' ) {
175+ ++ i;
176+ directiveNameBuilder.append((char) character);
177+ character = input.LA(i + 1 );
178+ }
179+
180+ directiveName = directiveNameBuilder.toString();
181+ }
182+
183+ boolean nestedExpression =
184+ " if" .equalsIgnoreCase(directiveName) || " elseif" .equalsIgnoreCase(directiveName);
185+
186+ while (true ) {
187+ int character = input.LA(++ i);
188+
189+ if (character == endStart) {
190+ int j;
191+ for (j = 1 ; j < end.length(); ++ j) {
192+ if (input.LA(i + j) != end.charAt(j)) {
193+ break ;
194+ }
195+ }
196+ if (j == end.length()) {
197+ return i + j;
198+ }
199+ }
200+
201+ switch (character) {
202+ case ' \' ' :
203+ if (nestedExpression) {
204+ i = lookaheadString(i) - 1 ;
205+ }
206+ break ;
207+
208+ case ' /' :
209+ if (nestedExpression && input.LA(i + 1 ) == ' /' ) {
210+ i = lookaheadLineComment(i + 2 );
211+ }
212+ break ;
213+
214+ case ' {' :
215+ if (nestedExpression) {
216+ i = lookaheadMultilineComment(" }" , i + 1 ) - 1 ;
217+ }
218+ break ;
219+
220+ case ' (' :
221+ if (nestedExpression && input.LA(i + 1 ) == ' *' ) {
222+ i = lookaheadMultilineComment(" *)" , i + 2 ) - 1 ;
223+ }
224+ break ;
225+
226+ case EOF:
227+ throw new LexerException(
228+ " line "
229+ + state.tokenStartLine
230+ + " :"
231+ + state.tokenStartCharPositionInLine
232+ + " unterminated multi-line comment" );
233+
234+ default:
235+ // do nothing
236+ }
237+ }
238+ }
239+
240+ private int lookaheadLineComment(int i) {
241+ while (true ) {
242+ int character = input.LA(i);
243+ if (isNewLine(character) || character == EOF) {
244+ return i;
245+ }
246+ ++ i;
247+ }
248+ }
249+
250+ private int lookaheadString(int i) {
251+ int offset = lookaheadMultilineString(i);
252+ if (offset == 0 ) {
253+ offset = lookaheadSingleLineString(i);
254+ }
255+ return i + offset;
256+ }
257+
258+ private int lookaheadMultilineString(int i) {
259+ int startQuotes = lookaheadSingleQuotes(i);
260+ if (startQuotes >= 3 && (startQuotes & 1 ) != 0 && isNewLine(input.LA(i + startQuotes))) {
261+ int offset = startQuotes - 1 ;
159262 while (true ) {
160- switch (input.LA(++ i )) {
263+ switch (input.LA(i + ++ offset )) {
161264 case ' \' ' :
162- int quotes = Math.min(startQuotes, lookaheadSingleQuotes(i));
163- i += quotes;
265+ int quotes = Math.min(startQuotes, lookaheadSingleQuotes(i + offset ));
266+ offset += quotes;
164267 if (quotes == startQuotes) {
165- return i ;
268+ return offset ;
166269 }
167270 break ;
168271
@@ -185,6 +288,25 @@ package au.com.integradev.delphi.antlr;
185288 return result;
186289 }
187290
291+ private int lookaheadSingleLineString(int i) {
292+ int offset = 1 ;
293+
294+ int character;
295+
296+ while ((character = input.LA(i + offset)) != EOF && !isNewLine(character)) {
297+ ++ offset;
298+ if (character == ' \' ' ) {
299+ if (input.LA(i + offset) == ' \' ' ) {
300+ ++ offset;
301+ } else {
302+ break ;
303+ }
304+ }
305+ }
306+
307+ return offset;
308+ }
309+
188310 private static boolean isNewLine(int c) {
189311 return c == ' \r ' || c == ' \n ' ;
190312 }
@@ -1233,10 +1355,10 @@ TkAsmId : { asmMode }? => '@' '@'? (Alpha | '_' | Digit)+
12331355 ;
12341356TkAsmHexNum : { asmMode } ? => HexDigitSeq (' h' |' H' )
12351357 ;
1236- TkQuotedString @init { int multilineStringRemaining = lookaheadMultilineString(); }
1358+ TkQuotedString @init { int multilineStringRemaining = lookaheadMultilineString(1 ); }
12371359 : ' \' '
12381360 ({ multilineStringRemaining != 0 } ? => {
1239- int i = multilineStringRemaining - 1 ;
1361+ int i = multilineStringRemaining;
12401362 while (-- i > 0 ) {
12411363 matchAny();
12421364 }
@@ -1326,30 +1448,28 @@ fragment Z : 'z' | 'Z';
13261448// Hidden channel
13271449//----------------------------------------------------------------------------
13281450COMMENT : ' //' ~ (' \n ' |' \r ' )* { $channel= HIDDEN;}
1329- | ' (*' ( options { greedy = false ; } : . ) * ' *) '
1330- {
1331- $channel= HIDDEN;
1332- if ($text.startsWith( " (* \$ " )) {
1333- $type = TkCompilerDirective ;
1334- if ($text.startsWith( " (* \$ endif " ) || $text.startsWith( " (* \$ ifend " )) {
1335- -- directiveNesting;
1336- } else if ($text.startsWith( " (* \$ if " )) {
1337- ++ directiveNesting;
1338- }
1339- }
1340- }
1341- | ' { ' ( options { greedy = false ; } : . ) * ' } '
1342- {
1343- $channel = HIDDEN ;
1344- if ($text.startsWith(" { \$ " )) {
1345- $type = TkCompilerDirective ;
1346- if ($text.startsWith(" { \$ endif " ) || $text.startsWith( " { \$ ifend " )) {
1347- -- directiveNesting;
1348- } else if ($text.startsWith( " { \$ if " )) {
1349- ++ directiveNesting;
1451+ | ( ' (*' | ' { ' )
1452+ {
1453+ $channel= HIDDEN;
1454+
1455+ String start = $text ;
1456+ String end = start.equals( " { " ) ? " } " : " *) " ;
1457+
1458+ int multilineCommentRemaining = lookaheadMultilineComment(end);
1459+
1460+ while ( -- multilineCommentRemaining > 0 ) {
1461+ matchAny();
1462+ }
1463+
1464+ if ($text.startsWith(start + " \$ " )) {
1465+ $type = TkCompilerDirective ;
1466+ if ($text.startsWith(start + " \$ endif " ) || $text.startsWith(start + " \$ ifend " )) {
1467+ -- directiveNesting ;
1468+ } else if ($text.startsWith(start + " \$ if " )) {
1469+ ++ directiveNesting;
1470+ }
1471+ }
13501472 }
1351- }
1352- }
13531473 ;
13541474WHITESPACE : (' \u 0000' .. ' \u 0020' | ' \u 3000' )+ { $channel= HIDDEN;}
13551475 ;
0 commit comments