Skip to content

Commit 365341a

Browse files
committed
Refactored and simplified parsing code
Handle whitespace / line separator including if within parent ParsingState
1 parent 2c50f14 commit 365341a

File tree

4 files changed

+224
-73
lines changed

4 files changed

+224
-73
lines changed

BEXCodeCompare/src/main/java/info/codesaway/bex/parsing/BEXParsingUtilities.java

Lines changed: 125 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.ArrayDeque;
1515

1616
import info.codesaway.bex.ImmutableIntRangeMap;
17+
import info.codesaway.bex.ImmutableIntRangeMap.Builder;
1718
import info.codesaway.bex.Indexed;
1819
import info.codesaway.bex.IntBEXRange;
1920

@@ -174,15 +175,34 @@ public static boolean hasCaseInsensitiveText(final CharSequence text, final int
174175

175176
/**
176177
*
177-
* @param parsingState
178-
* @param parent
179-
* @return
178+
* @param parsingState the parsing state
179+
* @param parent the parent (or <code>null</code> if none)
180+
* @return the ParsingState with the parent or the passed ParsingState if parent is <code>null</code>
180181
* @since 0.13
181182
*/
182183
public static ParsingState parsingState(final ParsingState parsingState, final Indexed<ParsingState> parent) {
184+
if (parent == null) {
185+
return parsingState;
186+
}
187+
183188
return new ParsingStateValue(parsingState, parent);
184189
}
185190

191+
/**
192+
* Unwraps the parsing state for ParsingStateValue
193+
* @param parsingState the parsing state
194+
* @return the unwrapped parsing state for ParsingStateValue; otherwise, the passed ParsingState
195+
* @since 0.13
196+
*/
197+
// Issue #108
198+
public static ParsingState unwrapParsingState(final ParsingState parsingState) {
199+
if (parsingState instanceof ParsingStateValue) {
200+
return ((ParsingStateValue) parsingState).getParsingState();
201+
}
202+
203+
return parsingState;
204+
}
205+
186206
/**
187207
* Parses the specified Java text and determines the <code>ParsingState</code>s
188208
* @param text the Java text
@@ -239,13 +259,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJavaTextStates(final CharS
239259
if (c == '\n' || c == '\r') {
240260
int startTextInfo = startTextInfoStack.pop();
241261
builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
242-
243-
if (c == '\r' && nextChar(text, i) == '\n') {
244-
builder.put(IntBEXRange.closed(i, i + 1), LINE_TERMINATOR);
245-
i++;
246-
} else {
247-
builder.put(IntBEXRange.singleton(i), LINE_TERMINATOR);
248-
}
262+
i = handleLineTerminator(i, c, text, builder, stateStack, startTextInfoStack, null);
249263
}
250264
// Other characters don't matter?
251265
} else if (stateStack.peek() == IN_MULTILINE_COMMENT) {
@@ -264,33 +278,8 @@ public static ImmutableIntRangeMap<ParsingState> parseJavaTextStates(final CharS
264278
pushParsingState(IN_STRING_LITERAL, i, stateStack, startTextInfoStack, startTextInfoStack);
265279
} else if (c == '\'') {
266280
pushParsingState(IN_SECONDARY_STRING_LITERAL, i, stateStack, startTextInfoStack, startTextInfoStack);
267-
} else if (c == '\n' || c == '\r') {
268-
if (c == '\r' && nextChar(text, i) == '\n') {
269-
builder.put(IntBEXRange.closed(i, i + 1), LINE_TERMINATOR);
270-
i++;
271-
} else {
272-
builder.put(IntBEXRange.singleton(i), LINE_TERMINATOR);
273-
}
274281
} else if (Character.isWhitespace(c)) {
275-
char nextChar = nextChar(text, i);
276-
if (hasNextChar(text, i) && Character.isWhitespace(nextChar)) {
277-
// Multiple whitespace
278-
int start = i;
279-
280-
do {
281-
if (nextChar == '\n' || nextChar == '\r') {
282-
break;
283-
}
284-
285-
i++;
286-
nextChar = nextChar(text, i);
287-
} while (hasNextChar(text, i) && Character.isWhitespace(nextChar));
288-
289-
builder.put(IntBEXRange.closed(start, i), WHITESPACE);
290-
} else {
291-
// Single whitespace
292-
builder.put(IntBEXRange.singleton(i), WHITESPACE);
293-
}
282+
i = handleWhitespace(i, c, text, builder, stateStack, startTextInfoStack, null);
294283
}
295284
}
296285

@@ -360,11 +349,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
360349
// + "Start %s%n"
361350
// + "Parent %s%n", i, c, stateStack, startTextInfoStack, parentStartStack);
362351

363-
ParsingState currentState = stateStack.peek();
364-
365-
if (currentState instanceof ParsingStateValue) {
366-
currentState = ((ParsingStateValue) currentState).getParsingState();
367-
}
352+
ParsingState currentState = unwrapParsingState(stateStack.peek());
368353

369354
if (currentState == IN_STRING_LITERAL) {
370355
if (c == '\\') {
@@ -419,6 +404,7 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
419404
} else if (isJava && currentState == IN_LINE_COMMENT) {
420405
if (c == '\n' || c == '\r') {
421406
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
407+
i = handleLineTerminator(i, c, text, builder, stateStack, startTextInfoStack, parentStartStack);
422408
// int startTextInfo = startTextInfoStack.pop();
423409
// builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
424410
}
@@ -480,6 +466,8 @@ public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSe
480466
} else if (c == '>' && isTag) {
481467
isTag = false;
482468
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
469+
} else if (Character.isWhitespace(c)) {
470+
i = handleWhitespace(i, c, text, builder, stateStack, startTextInfoStack, parentStartStack);
483471
}
484472
}
485473

@@ -557,6 +545,7 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
557545
if (c == '\n' || c == '\r') {
558546
int startTextInfo = startTextInfoStack.pop();
559547
builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
548+
i = handleLineTerminator(i, c, text, builder, stateStack, startTextInfoStack, null);
560549
}
561550
// Other characters don't matter?
562551
} else if (stateStack.peek() == IN_MULTILINE_COMMENT) {
@@ -571,15 +560,7 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
571560
}
572561
} else if (hasText(text, i, "/*")) {
573562
// SQL supports nested block comments
574-
575-
// Going into second level, so end current level
576-
int startTextInfo = startTextInfoStack.pop();
577-
if (startTextInfo != i) {
578-
// Only add if not empty range
579-
// Would be empty for example if ended one expression then immediately started next one
580-
builder.put(IntBEXRange.of(startTextInfo, i), stateStack.peek());
581-
}
582-
563+
endCurrentLevel(i, builder, stateStack, startTextInfoStack);
583564
stateStack.push(IN_MULTILINE_COMMENT);
584565
startTextInfoStack.push(i);
585566
i++;
@@ -598,6 +579,8 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
598579
// } else if (c == '"') {
599580
// stateStack.push(IN_SECONDARY_STRING_LITERAL);
600581
// startTextInfoStack.push(i);
582+
} else if (Character.isWhitespace(c)) {
583+
i = handleWhitespace(i, c, text, builder, stateStack, startTextInfoStack, null);
601584
}
602585
}
603586

@@ -614,18 +597,22 @@ public static ImmutableIntRangeMap<ParsingState> parseSQLTextStates(final CharSe
614597
private static void pushNextLevelParsingState(final ParsingState parsingState, final int i,
615598
final ImmutableIntRangeMap.Builder<ParsingState> builder, final ArrayDeque<ParsingState> stateStack,
616599
final ArrayDeque<Integer> startTextInfoStack, final ArrayDeque<Integer> parentStartStack) {
617-
// Going into second level, so end current level
600+
endCurrentLevel(i, builder, stateStack, startTextInfoStack);
601+
Indexed<ParsingState> parent = index(parentStartStack.peek(), stateStack.peek());
602+
ParsingState newParsingState = parsingState(parsingState, parent);
603+
pushParsingState(newParsingState, i, stateStack, startTextInfoStack, parentStartStack);
604+
}
605+
606+
private static void endCurrentLevel(final int index,
607+
final ImmutableIntRangeMap.Builder<ParsingState> builder, final ArrayDeque<ParsingState> stateStack,
608+
final ArrayDeque<Integer> startTextInfoStack) {
609+
// Going into next level, so end current level
618610
int startTextInfo = startTextInfoStack.pop();
619-
if (startTextInfo != i) {
611+
if (startTextInfo != index) {
620612
// Only add if not empty range
621613
// Would be empty for example if ended one expression then immediately started next one
622-
builder.put(IntBEXRange.of(startTextInfo, i), stateStack.peek());
614+
builder.put(IntBEXRange.of(startTextInfo, index), stateStack.peek());
623615
}
624-
625-
// System.out.println("Parent: " + parentStartStack);
626-
Indexed<ParsingState> parent = index(parentStartStack.peek(), stateStack.peek());
627-
ParsingState newParsingState = parsingState(parsingState, parent);
628-
pushParsingState(newParsingState, i, stateStack, startTextInfoStack, parentStartStack);
629616
}
630617

631618
private static void pushParsingState(final ParsingState parsingState, final int i,
@@ -652,4 +639,84 @@ private static void popParsingState(final int i, final ImmutableIntRangeMap.Buil
652639

653640
// System.out.println("Parent after popParsingState: " + parentStartStack);
654641
}
642+
643+
/**
644+
* Handle line terminator
645+
* @param i current index
646+
* @param c current character
647+
* @param text the text
648+
* @param builder the builder
649+
* @return the new index after handling the line terminator
650+
*/
651+
private static int handleLineTerminator(final int i, final char c, final CharSequence text,
652+
final Builder<ParsingState> builder, final ArrayDeque<ParsingState> stateStack,
653+
final ArrayDeque<Integer> startTextInfoStack, final ArrayDeque<Integer> parentStartStack) {
654+
int end = (c == '\r' && nextChar(text, i) == '\n') ? i + 1 : i;
655+
656+
boolean hasParentParsingState = !stateStack.isEmpty();
657+
Indexed<ParsingState> parent;
658+
if (hasParentParsingState) {
659+
endCurrentLevel(i, builder, stateStack, startTextInfoStack);
660+
parent = index(parentStartStack.peek(), stateStack.peek());
661+
} else {
662+
parent = null;
663+
}
664+
665+
builder.put(IntBEXRange.closed(i, end), parsingState(LINE_TERMINATOR, parent));
666+
667+
if (hasParentParsingState) {
668+
startTextInfoStack.push(end + 1);
669+
}
670+
671+
return end;
672+
}
673+
674+
/**
675+
* Handle whitespace
676+
* @param i current index
677+
* @param c current charecter
678+
* @param text the text
679+
* @param builder the builder
680+
* @return the new index after handling the whitespace
681+
*/
682+
private static int handleWhitespace(final int i, final char c, final CharSequence text,
683+
final Builder<ParsingState> builder, final ArrayDeque<ParsingState> stateStack,
684+
final ArrayDeque<Integer> startTextInfoStack, final ArrayDeque<Integer> parentStartStack) {
685+
if (c == '\n' || c == '\r') {
686+
return handleLineTerminator(i, c, text, builder, stateStack, startTextInfoStack, parentStartStack);
687+
}
688+
689+
int start = i;
690+
int end = i;
691+
char nextChar = nextChar(text, i);
692+
if (hasNextChar(text, i) && Character.isWhitespace(nextChar)) {
693+
// Multiple whitespace
694+
695+
do {
696+
if (nextChar == '\n' || nextChar == '\r') {
697+
break;
698+
}
699+
700+
end++;
701+
nextChar = nextChar(text, end);
702+
} while (hasNextChar(text, end) && Character.isWhitespace(nextChar));
703+
}
704+
705+
boolean hasParentParsingState = !stateStack.isEmpty();
706+
Indexed<ParsingState> parent;
707+
if (hasParentParsingState) {
708+
endCurrentLevel(start, builder, stateStack, startTextInfoStack);
709+
parent = index(parentStartStack.peek(), stateStack.peek());
710+
} else {
711+
parent = null;
712+
}
713+
714+
builder.put(IntBEXRange.closed(start, end), parsingState(WHITESPACE, parent));
715+
716+
if (hasParentParsingState) {
717+
startTextInfoStack.push(end + 1);
718+
}
719+
720+
return end;
721+
}
655722
}

BEXCodeCompare/src/test/java/info/codesaway/bex/matching/BEXParseJSPTest.java

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
import static info.codesaway.bex.parsing.BEXParsingState.IN_LINE_COMMENT;
88
import static info.codesaway.bex.parsing.BEXParsingState.IN_MULTILINE_COMMENT;
99
import static info.codesaway.bex.parsing.BEXParsingState.IN_SECONDARY_MULTILINE_COMMENT;
10+
import static info.codesaway.bex.parsing.BEXParsingState.IN_SECONDARY_STRING_LITERAL;
1011
import static info.codesaway.bex.parsing.BEXParsingState.IN_STRING_LITERAL;
1112
import static info.codesaway.bex.parsing.BEXParsingState.IN_TAG;
13+
import static info.codesaway.bex.parsing.BEXParsingState.LINE_TERMINATOR;
14+
import static info.codesaway.bex.parsing.BEXParsingState.WHITESPACE;
1215
import static info.codesaway.bex.parsing.BEXParsingUtilities.parsingState;
1316
import static info.codesaway.bex.util.BEXUtilities.entry;
1417
import static info.codesaway.bex.util.BEXUtilities.index;
@@ -35,13 +38,23 @@ void testExpressionEndsThenAnotherImmediatelyStarts() {
3538

3639
assertThat(bexString.getTextStateMap().asMapOfRanges())
3740
.containsExactly(
38-
entry(closedOpen(0, 8), IN_TAG),
41+
entry(closedOpen(0, 2), IN_TAG),
42+
entry(singleton(2), parsingState(WHITESPACE, tagParent)),
43+
entry(closedOpen(3, 8), IN_TAG),
3944
entry(closedOpen(8, 9), parsingState(IN_STRING_LITERAL, tagParent)),
40-
entry(closed(9, 20), expressionInString),
41-
entry(closed(21, 33), expressionInString),
45+
entry(closedOpen(9, 18), expressionInString),
46+
entry(singleton(18), parsingState(WHITESPACE, index(9, expressionInString))),
47+
entry(closed(19, 20), expressionInString),
48+
entry(closedOpen(21, 24), expressionInString),
49+
entry(singleton(24), parsingState(WHITESPACE, index(21, expressionInString))),
50+
entry(closedOpen(25, 31), expressionInString),
51+
entry(singleton(31), parsingState(WHITESPACE, index(21, expressionInString))),
52+
entry(closed(32, 33), expressionInString),
4253
entry(singleton(34), parsingState(IN_STRING_LITERAL, tagParent)),
4354
entry(singleton(35), IN_TAG),
44-
entry(closed(36, 47), IN_EXPRESSION_BLOCK),
55+
entry(closedOpen(36, 45), IN_EXPRESSION_BLOCK),
56+
entry(singleton(45), parsingState(WHITESPACE, index(36, IN_EXPRESSION_BLOCK))),
57+
entry(closed(46, 47), IN_EXPRESSION_BLOCK),
4558
entry(closed(48, 51), IN_TAG));
4659
}
4760

@@ -55,19 +68,57 @@ void testComments() {
5568
BEXString bexString = new BEXString(text, BEXParsingLanguage.JSP);
5669

5770
Indexed<ParsingState> expressionParent = index(72, IN_EXPRESSION_BLOCK);
71+
ParsingState whitespaceInExpression = parsingState(WHITESPACE, expressionParent);
5872

5973
assertThat(bexString.getTextStateMap().asMapOfRanges())
6074
.containsExactly(
75+
entry(singleton(1), WHITESPACE),
76+
entry(singleton(6), WHITESPACE),
77+
entry(singleton(9), WHITESPACE),
6178
entry(closed(10, 31), IN_MULTILINE_COMMENT),
79+
entry(singleton(32), WHITESPACE),
80+
entry(singleton(36), WHITESPACE),
6281
entry(closed(37, 58), IN_SECONDARY_MULTILINE_COMMENT),
63-
entry(closedOpen(72, 83), IN_EXPRESSION_BLOCK),
64-
// Part of expression block
65-
// TODO: check this (issue #105)
82+
entry(closed(59, 60), LINE_TERMINATOR),
83+
entry(singleton(63), WHITESPACE),
84+
entry(singleton(68), WHITESPACE),
85+
entry(singleton(71), WHITESPACE),
86+
entry(closedOpen(72, 74), IN_EXPRESSION_BLOCK),
87+
entry(singleton(74), whitespaceInExpression),
88+
entry(closedOpen(75, 77), IN_EXPRESSION_BLOCK),
89+
entry(singleton(77), whitespaceInExpression),
90+
entry(closedOpen(78, 82), IN_EXPRESSION_BLOCK),
91+
entry(singleton(82), whitespaceInExpression),
92+
// Part of expression block (issue #105)
6693
entry(closed(83, 121), parsingState(IN_MULTILINE_COMMENT, expressionParent)),
67-
entry(closedOpen(122, 134), IN_EXPRESSION_BLOCK),
68-
// Part of expression block
69-
// TODO: check this (issue #105)
94+
entry(closed(122, 123), parsingState(LINE_TERMINATOR, expressionParent)),
95+
entry(closedOpen(124, 128), IN_EXPRESSION_BLOCK),
96+
entry(singleton(128), whitespaceInExpression),
97+
entry(closedOpen(129, 133), IN_EXPRESSION_BLOCK),
98+
entry(singleton(133), whitespaceInExpression),
99+
// Part of expression block (issue #105)
70100
entry(closed(134, 150), parsingState(IN_LINE_COMMENT, expressionParent)),
71101
entry(closed(151, 152), IN_EXPRESSION_BLOCK));
72102
}
103+
104+
@Test
105+
void testLineTerminatorInTag() {
106+
String text = "<blah\r\n"
107+
+ "text='text' \r\n"
108+
+ "></blah>";
109+
BEXString bexString = new BEXString(text, BEXParsingLanguage.JSP);
110+
111+
Indexed<ParsingState> tagParent = index(0, IN_TAG);
112+
113+
assertThat(bexString.getTextStateMap().asMapOfRanges())
114+
.containsExactly(
115+
entry(closedOpen(0, 5), IN_TAG),
116+
entry(closed(5, 6), parsingState(LINE_TERMINATOR, tagParent)),
117+
entry(closedOpen(7, 12), IN_TAG),
118+
entry(closed(12, 17), parsingState(IN_SECONDARY_STRING_LITERAL, tagParent)),
119+
entry(singleton(18), parsingState(WHITESPACE, tagParent)),
120+
entry(closed(19, 20), parsingState(LINE_TERMINATOR, tagParent)),
121+
entry(singleton(21), IN_TAG),
122+
entry(closed(22, 28), IN_TAG));
123+
}
73124
}

0 commit comments

Comments
 (0)