77import static info .codesaway .bex .matching .BEXMatchingStateOption .IN_SECONDARY_STRING_LITERAL ;
88import static info .codesaway .bex .matching .BEXMatchingStateOption .IN_STRING_LITERAL ;
99import static info .codesaway .bex .matching .BEXMatchingStateOption .IN_TAG ;
10+ import static info .codesaway .bex .matching .BEXMatchingStateOption .LINE_TERMINATOR ;
11+ import static info .codesaway .bex .matching .BEXMatchingStateOption .WHITESPACE ;
1012
1113import java .util .ArrayDeque ;
1214
@@ -224,6 +226,13 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJavaTextStates(fina
224226 if (c == '\n' || c == '\r' ) {
225227 int startTextInfo = startTextInfoStack .pop ();
226228 builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .pop ());
229+
230+ if (c == '\r' && nextChar (text , i ) == '\n' ) {
231+ builder .put (IntBEXRange .closed (i , i + 1 ), LINE_TERMINATOR );
232+ i ++;
233+ } else {
234+ builder .put (IntBEXRange .singleton (i ), LINE_TERMINATOR );
235+ }
227236 }
228237 // Other characters don't matter?
229238 } else if (stateStack .peek () == IN_MULTILINE_COMMENT ) {
@@ -246,6 +255,33 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJavaTextStates(fina
246255 } else if (c == '\'' ) {
247256 stateStack .push (IN_SECONDARY_STRING_LITERAL );
248257 startTextInfoStack .push (i );
258+ } else if (c == '\n' || c == '\r' ) {
259+ if (c == '\r' && nextChar (text , i ) == '\n' ) {
260+ builder .put (IntBEXRange .closed (i , i + 1 ), LINE_TERMINATOR );
261+ i ++;
262+ } else {
263+ builder .put (IntBEXRange .singleton (i ), LINE_TERMINATOR );
264+ }
265+ } else if (Character .isWhitespace (c )) {
266+ char nextChar = nextChar (text , i );
267+ if (hasNextChar (text , i ) && Character .isWhitespace (nextChar )) {
268+ // Multiple whitespace
269+ int start = i ;
270+
271+ do {
272+ if (nextChar == '\n' || nextChar == '\r' ) {
273+ break ;
274+ }
275+
276+ i ++;
277+ nextChar = nextChar (text , i );
278+ } while (hasNextChar (text , i ) && Character .isWhitespace (nextChar ));
279+
280+ builder .put (IntBEXRange .closed (start , i ), WHITESPACE );
281+ } else {
282+ // Single whitespace
283+ builder .put (IntBEXRange .singleton (i ), WHITESPACE );
284+ }
249285 }
250286 }
251287
@@ -304,7 +340,7 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJSPTextStates(final
304340 // HTML tag
305341 boolean isTag = false ;
306342 // TODO: should I refactor and use this? how would I use it?
307- String expectedEnd = "" ;
343+ // String expectedEnd = "";
308344
309345 for (int i = 0 ; i < text .length (); i ++) {
310346 char c = text .charAt (i );
@@ -347,17 +383,28 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJSPTextStates(final
347383 // TODO: handle unicode and other escaping in String literal
348384
349385 // TODO: Java comments only valid in <% code block %>
386+ } else if (isJava && hasText (text , i , "%>" )) {
387+ isJava = false ;
388+
389+ if (stateStack .peek () != IN_EXPRESSION_BLOCK ) {
390+ // End the current state on the prior character
391+ popMatchingStateOption (i - 1 , builder , stateStack , startTextInfoStack );
392+ }
393+
394+ i ++;
395+ popMatchingStateOption (i , builder , stateStack , startTextInfoStack );
350396 } else if (isJava && stateStack .peek () == IN_LINE_COMMENT ) {
351397 if (c == '\n' || c == '\r' ) {
352- int startTextInfo = startTextInfoStack .pop ();
353- builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .pop ());
398+ popMatchingStateOption (i , builder , stateStack , startTextInfoStack );
399+ // int startTextInfo = startTextInfoStack.pop();
400+ // builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
354401 }
355402 // Other characters don't matter?
356403 } else if (isJava && stateStack .peek () == IN_MULTILINE_COMMENT ) {
357404 if (hasText (text , i , "*/" )) {
358405 i ++;
359- int startTextInfo = startTextInfoStack .pop ();
360- builder .put (IntBEXRange .closed (startTextInfo , i ), stateStack .pop ());
406+ popMatchingStateOption ( i , builder , stateStack , startTextInfoStack ); // int startTextInfo = startTextInfoStack.pop();
407+ // builder.put(IntBEXRange.closed(startTextInfo, i), stateStack.pop());
361408 }
362409 } else if (stateStack .peek () == IN_MULTILINE_COMMENT ) {
363410 if (hasText (text , i , "--%>" )) {
@@ -369,27 +416,15 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJSPTextStates(final
369416 i += 2 ;
370417 popMatchingStateOption (i , builder , stateStack , startTextInfoStack );
371418 }
372- } else if (stateStack .peek () == IN_EXPRESSION_BLOCK ) {
373- if (hasText (text , i , "%>" )) {
374- isJava = false ;
375- i ++;
376- popMatchingStateOption (i , builder , stateStack , startTextInfoStack );
377- }
378- } else if (hasText (text , i , "<%--" )) {
379- stateStack .push (IN_MULTILINE_COMMENT );
380- startTextInfoStack .push (i );
381- i += 3 ;
382- } else if (hasText (text , i , "<!--" )) {
383- stateStack .push (IN_SECONDARY_MULTILINE_COMMENT );
384- startTextInfoStack .push (i );
385- i += 3 ;
386419 } else if (isJava && c == '/' && nextChar (text , i ) == '/' ) {
387- stateStack .push (IN_LINE_COMMENT );
388- startTextInfoStack .push (i );
420+ pushNextLevelMatchingStateOption (IN_LINE_COMMENT , i , builder , stateStack , startTextInfoStack );
421+ // stateStack.push(IN_LINE_COMMENT);
422+ // startTextInfoStack.push(i);
389423 i ++;
390424 } else if (isJava && c == '/' && nextChar (text , i ) == '*' ) {
391- stateStack .push (IN_MULTILINE_COMMENT );
392- startTextInfoStack .push (i );
425+ pushNextLevelMatchingStateOption (IN_MULTILINE_COMMENT , i , builder , stateStack , startTextInfoStack );
426+ // stateStack.push(IN_MULTILINE_COMMENT);
427+ // startTextInfoStack.push(i);
393428 i ++;
394429 } else if (c == '"' && isTag ) {
395430 pushNextLevelMatchingStateOption (IN_STRING_LITERAL , i , builder , stateStack , startTextInfoStack );
@@ -402,6 +437,14 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseJSPTextStates(final
402437 } else if (c == '\'' && isJava ) {
403438 stateStack .push (IN_SECONDARY_STRING_LITERAL );
404439 startTextInfoStack .push (i );
440+ } else if (hasText (text , i , "<%--" )) {
441+ stateStack .push (IN_MULTILINE_COMMENT );
442+ startTextInfoStack .push (i );
443+ i += 3 ;
444+ } else if (hasText (text , i , "<!--" )) {
445+ stateStack .push (IN_SECONDARY_MULTILINE_COMMENT );
446+ startTextInfoStack .push (i );
447+ i += 3 ;
405448 } else if (hasText (text , i , "<%=" )) {
406449 stateStack .push (IN_EXPRESSION_BLOCK );
407450 startTextInfoStack .push (i );
@@ -442,20 +485,20 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseSQLTextStates(final
442485 if (text .length () == 0 ) {
443486 return ImmutableIntRangeMap .of ();
444487 }
445-
488+
446489 // Parse text to get states
447490 // * Block comment
448491 // * Line comment
449492 // * In String literal
450493 // * Other stuff?
451-
494+
452495 ImmutableIntRangeMap .Builder <MatchingStateOption > builder = ImmutableIntRangeMap .builder ();
453496 ArrayDeque <MatchingStateOption > stateStack = new ArrayDeque <>();
454497 ArrayDeque <Integer > startTextInfoStack = new ArrayDeque <>();
455-
498+
456499 for (int i = 0 ; i < text .length (); i ++) {
457500 char c = text .charAt (i );
458-
501+
459502 if (stateStack .peek () == IN_STRING_LITERAL ) {
460503 // TODO: how to implement escaping, since cannot escape single quote with '\'
461504 if (c == '\'' && nextChar (text , i ) == '\'' ) {
@@ -501,22 +544,22 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseSQLTextStates(final
501544 i ++;
502545 int startTextInfo = startTextInfoStack .pop ();
503546 builder .put (IntBEXRange .closed (startTextInfo , i ), stateStack .pop ());
504-
547+
505548 if (!stateStack .isEmpty ()) {
506549 // Inside a first level, so add startTextInfo for after expression blocks ends
507550 startTextInfoStack .push (i + 1 );
508551 }
509552 } else if (hasText (text , i , "/*" )) {
510553 // SQL supports nested block comments
511-
554+
512555 // Going into second level, so end current level
513556 int startTextInfo = startTextInfoStack .pop ();
514557 if (startTextInfo != i ) {
515558 // Only add if not empty range
516559 // Would be empty for example if ended one expression then immediately started next one
517560 builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .peek ());
518561 }
519-
562+
520563 stateStack .push (IN_MULTILINE_COMMENT );
521564 startTextInfoStack .push (i );
522565 i ++;
@@ -537,14 +580,14 @@ public static ImmutableIntRangeMap<MatchingStateOption> parseSQLTextStates(final
537580 // startTextInfoStack.push(i);
538581 }
539582 }
540-
583+
541584 if (!stateStack .isEmpty ()) {
542585 // TODO: what if there are multiple entries?
543586 // (this would suggest improperly formatted code)
544587 int startTextInfo = startTextInfoStack .pop ();
545588 builder .put (IntBEXRange .of (startTextInfo , text .length ()), stateStack .pop ());
546589 }
547-
590+
548591 return builder .build ();
549592 }
550593
0 commit comments