1414import java .util .ArrayDeque ;
1515
1616import info .codesaway .bex .ImmutableIntRangeMap ;
17+ import info .codesaway .bex .ImmutableIntRangeMap .Builder ;
1718import info .codesaway .bex .Indexed ;
1819import info .codesaway .bex .IntBEXRange ;
1920
@@ -174,15 +175,34 @@ public static boolean hasCaseInsensitiveText(final CharSequence text, final int
174175
175176 /**
176177 *
177- * @param parsingState
178- * @param parent
179- * @return
178+ * @param parsingState the parsing state
179+ * @param parent the parent (or <code>null</code> if none)
180+ * @return the ParsingState with the parent or the passed ParsingState if parent is <code>null</code>
180181 * @since 0.13
181182 */
182183 public static ParsingState parsingState (final ParsingState parsingState , final Indexed <ParsingState > parent ) {
184+ if (parent == null ) {
185+ return parsingState ;
186+ }
187+
183188 return new ParsingStateValue (parsingState , parent );
184189 }
185190
191+ /**
192+ * Unwraps the parsing state for ParsingStateValue
193+ * @param parsingState the parsing state
194+ * @return the unwrapped parsing state for ParsingStateValue; otherwise, the passed ParsingState
195+ * @since 0.13
196+ */
197+ // Issue #108
198+ public static ParsingState unwrapParsingState (final ParsingState parsingState ) {
199+ if (parsingState instanceof ParsingStateValue ) {
200+ return ((ParsingStateValue ) parsingState ).getParsingState ();
201+ }
202+
203+ return parsingState ;
204+ }
205+
186206 /**
187207 * Parses the specified Java text and determines the <code>ParsingState</code>s
188208 * @param text the Java text
@@ -239,13 +259,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJavaTextStates(final CharS
239259 if (c == '\n' || c == '\r' ) {
240260 int startTextInfo = startTextInfoStack .pop ();
241261 builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .pop ());
242-
243- if (c == '\r' && nextChar (text , i ) == '\n' ) {
244- builder .put (IntBEXRange .closed (i , i + 1 ), LINE_TERMINATOR );
245- i ++;
246- } else {
247- builder .put (IntBEXRange .singleton (i ), LINE_TERMINATOR );
248- }
262+ i = handleLineTerminator (i , c , text , builder , stateStack , startTextInfoStack , null );
249263 }
250264 // Other characters don't matter?
251265 } else if (stateStack .peek () == IN_MULTILINE_COMMENT ) {
@@ -264,33 +278,8 @@ public static ImmutableIntRangeMap<ParsingState> parseJavaTextStates(final CharS
264278 pushParsingState (IN_STRING_LITERAL , i , stateStack , startTextInfoStack , startTextInfoStack );
265279 } else if (c == '\'' ) {
266280 pushParsingState (IN_SECONDARY_STRING_LITERAL , i , stateStack , startTextInfoStack , startTextInfoStack );
267- } else if (c == '\n' || c == '\r' ) {
268- if (c == '\r' && nextChar (text , i ) == '\n' ) {
269- builder .put (IntBEXRange .closed (i , i + 1 ), LINE_TERMINATOR );
270- i ++;
271- } else {
272- builder .put (IntBEXRange .singleton (i ), LINE_TERMINATOR );
273- }
274281 } else if (Character .isWhitespace (c )) {
275- char nextChar = nextChar (text , i );
276- if (hasNextChar (text , i ) && Character .isWhitespace (nextChar )) {
277- // Multiple whitespace
278- int start = i ;
279-
280- do {
281- if (nextChar == '\n' || nextChar == '\r' ) {
282- break ;
283- }
284-
285- i ++;
286- nextChar = nextChar (text , i );
287- } while (hasNextChar (text , i ) && Character .isWhitespace (nextChar ));
288-
289- builder .put (IntBEXRange .closed (start , i ), WHITESPACE );
290- } else {
291- // Single whitespace
292- builder .put (IntBEXRange .singleton (i ), WHITESPACE );
293- }
282+ i = handleWhitespace (i , c , text , builder , stateStack , startTextInfoStack , null );
294283 }
295284 }
296285
@@ -360,11 +349,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
360349 // + "Start %s%n"
361350 // + "Parent %s%n", i, c, stateStack, startTextInfoStack, parentStartStack);
362351
363- ParsingState currentState = stateStack .peek ();
364-
365- if (currentState instanceof ParsingStateValue ) {
366- currentState = ((ParsingStateValue ) currentState ).getParsingState ();
367- }
352+ ParsingState currentState = unwrapParsingState (stateStack .peek ());
368353
369354 if (currentState == IN_STRING_LITERAL ) {
370355 if (c == '\\' ) {
@@ -419,6 +404,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
419404 } else if (isJava && currentState == IN_LINE_COMMENT ) {
420405 if (c == '\n' || c == '\r' ) {
421406 popParsingState (i , builder , stateStack , startTextInfoStack , parentStartStack );
407+ i = handleLineTerminator (i , c , text , builder , stateStack , startTextInfoStack , parentStartStack );
422408 // int startTextInfo = startTextInfoStack.pop();
423409 // builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
424410 }
@@ -480,6 +466,8 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
480466 } else if (c == '>' && isTag ) {
481467 isTag = false ;
482468 popParsingState (i , builder , stateStack , startTextInfoStack , parentStartStack );
469+ } else if (Character .isWhitespace (c )) {
470+ i = handleWhitespace (i , c , text , builder , stateStack , startTextInfoStack , parentStartStack );
483471 }
484472 }
485473
@@ -557,6 +545,7 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
557545 if (c == '\n' || c == '\r' ) {
558546 int startTextInfo = startTextInfoStack .pop ();
559547 builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .pop ());
548+ i = handleLineTerminator (i , c , text , builder , stateStack , startTextInfoStack , null );
560549 }
561550 // Other characters don't matter?
562551 } else if (stateStack .peek () == IN_MULTILINE_COMMENT ) {
@@ -571,15 +560,7 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
571560 }
572561 } else if (hasText (text , i , "/*" )) {
573562 // SQL supports nested block comments
574-
575- // Going into second level, so end current level
576- int startTextInfo = startTextInfoStack .pop ();
577- if (startTextInfo != i ) {
578- // Only add if not empty range
579- // Would be empty for example if ended one expression then immediately started next one
580- builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .peek ());
581- }
582-
563+ endCurrentLevel (i , builder , stateStack , startTextInfoStack );
583564 stateStack .push (IN_MULTILINE_COMMENT );
584565 startTextInfoStack .push (i );
585566 i ++;
@@ -598,6 +579,8 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
598579 // } else if (c == '"') {
599580 // stateStack.push(IN_SECONDARY_STRING_LITERAL);
600581 // startTextInfoStack.push(i);
582+ } else if (Character .isWhitespace (c )) {
583+ i = handleWhitespace (i , c , text , builder , stateStack , startTextInfoStack , null );
601584 }
602585 }
603586
@@ -614,18 +597,22 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
614597 private static void pushNextLevelParsingState (final ParsingState parsingState , final int i ,
615598 final ImmutableIntRangeMap .Builder <ParsingState > builder , final ArrayDeque <ParsingState > stateStack ,
616599 final ArrayDeque <Integer > startTextInfoStack , final ArrayDeque <Integer > parentStartStack ) {
617- // Going into second level, so end current level
600+ endCurrentLevel (i , builder , stateStack , startTextInfoStack );
601+ Indexed <ParsingState > parent = index (parentStartStack .peek (), stateStack .peek ());
602+ ParsingState newParsingState = parsingState (parsingState , parent );
603+ pushParsingState (newParsingState , i , stateStack , startTextInfoStack , parentStartStack );
604+ }
605+
606+ private static void endCurrentLevel (final int index ,
607+ final ImmutableIntRangeMap .Builder <ParsingState > builder , final ArrayDeque <ParsingState > stateStack ,
608+ final ArrayDeque <Integer > startTextInfoStack ) {
609+ // Going into next level, so end current level
618610 int startTextInfo = startTextInfoStack .pop ();
619- if (startTextInfo != i ) {
611+ if (startTextInfo != index ) {
620612 // Only add if not empty range
621613 // Would be empty for example if ended one expression then immediately started next one
622- builder .put (IntBEXRange .of (startTextInfo , i ), stateStack .peek ());
614+ builder .put (IntBEXRange .of (startTextInfo , index ), stateStack .peek ());
623615 }
624-
625- // System.out.println("Parent: " + parentStartStack);
626- Indexed <ParsingState > parent = index (parentStartStack .peek (), stateStack .peek ());
627- ParsingState newParsingState = parsingState (parsingState , parent );
628- pushParsingState (newParsingState , i , stateStack , startTextInfoStack , parentStartStack );
629616 }
630617
631618 private static void pushParsingState (final ParsingState parsingState , final int i ,
@@ -652,4 +639,84 @@ private static void popParsingState(final int i, final ImmutableIntRangeMap.Buil
652639
653640 // System.out.println("Parent after popParsingState: " + parentStartStack);
654641 }
642+
643+ /**
644+ * Handle line terminator
645+ * @param i current index
646+ * @param c current character
647+ * @param text the text
648+ * @param builder the builder
649+ * @return the new index after handling the line terminator
650+ */
651+ private static int handleLineTerminator (final int i , final char c , final CharSequence text ,
652+ final Builder <ParsingState > builder , final ArrayDeque <ParsingState > stateStack ,
653+ final ArrayDeque <Integer > startTextInfoStack , final ArrayDeque <Integer > parentStartStack ) {
654+ int end = (c == '\r' && nextChar (text , i ) == '\n' ) ? i + 1 : i ;
655+
656+ boolean hasParentParsingState = !stateStack .isEmpty ();
657+ Indexed <ParsingState > parent ;
658+ if (hasParentParsingState ) {
659+ endCurrentLevel (i , builder , stateStack , startTextInfoStack );
660+ parent = index (parentStartStack .peek (), stateStack .peek ());
661+ } else {
662+ parent = null ;
663+ }
664+
665+ builder .put (IntBEXRange .closed (i , end ), parsingState (LINE_TERMINATOR , parent ));
666+
667+ if (hasParentParsingState ) {
668+ startTextInfoStack .push (end + 1 );
669+ }
670+
671+ return end ;
672+ }
673+
674+ /**
675+ * Handle whitespace
676+ * @param i current index
677+ * @param c current charecter
678+ * @param text the text
679+ * @param builder the builder
680+ * @return the new index after handling the whitespace
681+ */
682+ private static int handleWhitespace (final int i , final char c , final CharSequence text ,
683+ final Builder <ParsingState > builder , final ArrayDeque <ParsingState > stateStack ,
684+ final ArrayDeque <Integer > startTextInfoStack , final ArrayDeque <Integer > parentStartStack ) {
685+ if (c == '\n' || c == '\r' ) {
686+ return handleLineTerminator (i , c , text , builder , stateStack , startTextInfoStack , parentStartStack );
687+ }
688+
689+ int start = i ;
690+ int end = i ;
691+ char nextChar = nextChar (text , i );
692+ if (hasNextChar (text , i ) && Character .isWhitespace (nextChar )) {
693+ // Multiple whitespace
694+
695+ do {
696+ if (nextChar == '\n' || nextChar == '\r' ) {
697+ break ;
698+ }
699+
700+ end ++;
701+ nextChar = nextChar (text , end );
702+ } while (hasNextChar (text , end ) && Character .isWhitespace (nextChar ));
703+ }
704+
705+ boolean hasParentParsingState = !stateStack .isEmpty ();
706+ Indexed <ParsingState > parent ;
707+ if (hasParentParsingState ) {
708+ endCurrentLevel (start , builder , stateStack , startTextInfoStack );
709+ parent = index (parentStartStack .peek (), stateStack .peek ());
710+ } else {
711+ parent = null ;
712+ }
713+
714+ builder .put (IntBEXRange .closed (start , end ), parsingState (WHITESPACE , parent ));
715+
716+ if (hasParentParsingState ) {
717+ startTextInfoStack .push (end + 1 );
718+ }
719+
720+ return end ;
721+ }
655722}
0 commit comments