Skip to content

Commit fbda3ba

Browse files
committed
Fix to retry matching if run into mismatched brackes
1 parent be1fc97 commit fbda3ba

File tree

8 files changed

+205
-88
lines changed

8 files changed

+205
-88
lines changed

BEXCodeCompare/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<modelVersion>4.0.0</modelVersion>
55
<groupId>info.codesaway</groupId>
66
<artifactId>bex</artifactId>
7-
<version>0.10.0</version>
7+
<version>0.10.1</version>
88
<name>Be Enhanced Code Compare (BEϽC)</name>
99
<description>An enhanced code compare, utilities, and code matching</description>
1010
<properties>

BEXCodeCompare/src/main/java/info/codesaway/bex/matching/BEXMatcher.java

Lines changed: 158 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -161,66 +161,85 @@ private boolean match(final int from) {
161161
return false;
162162
}
163163

164-
// TODO: cache matchers?
165-
Matcher currentMatcher = patterns.get(0).matcher(this.text);
166-
// TODO: specify as option? (needed to handle spaces after group)
167-
currentMatcher.useTransparentBounds(true);
168-
169-
boolean foundMatch;
170-
int searchFrom = from;
171-
do {
172-
if (!currentMatcher.find(searchFrom)) {
173-
if (DEBUG) {
174-
System.out.println("Couldn't find match 0: " + from + "\t" + this.text());
175-
System.out.println("Pattern 0: @" + currentMatcher.pattern() + "@");
176-
}
177-
return false;
178-
}
179-
180-
int start = currentMatcher.start();
181-
int startWithOffset = start + this.offset;
182-
Entry<IntRange, BEXMatchingStateOption> entry = this.textStateMap.getEntry(startWithOffset);
183-
184-
if (entry != null && startWithOffset != entry.getKey().getStart()
185-
&& !entry.getValue().isCode()) {
186-
// Don't count as match, since part of string literal or comment
187-
// If match starts with the block, then okay to match
188-
// TODO: when else would it be okay to match?
189-
foundMatch = false;
190-
searchFrom = start + 1;
191-
} else {
192-
foundMatch = true;
193-
if (DEBUG) {
194-
if (start == startWithOffset) {
195-
System.out.printf("Found match! %d\t|%s|%n", start, currentMatcher.group());
196-
} else {
197-
System.out.printf("Found match! %d (%d)%n", start, startWithOffset);
198-
}
199-
System.out.println("Text states: " + this.textStateMap);
200-
}
201-
}
202-
} while (!foundMatch);
203-
204-
// Don't match if in string literal or comment
205-
// TODO: under what scenarios should it match stuff in comments?
206-
207-
// TODO: need to process code before match to detect if in block comment, line comment, or String literal
208-
209-
this.putCaptureGroups(currentMatcher);
210-
int regionStart = currentMatcher.end();
164+
// // TODO: cache matchers?
165+
// Matcher currentMatcher = patterns.get(0).matcher(this.text);
166+
// // TODO: specify as option? (needed to handle spaces after group)
167+
// currentMatcher.useTransparentBounds(true);
168+
//
169+
// boolean foundMatch;
170+
// int searchFrom = from;
171+
// do {
172+
// if (!currentMatcher.find(searchFrom)) {
173+
// if (DEBUG) {
174+
// System.out.println("Couldn't find match 0: " + from + "\t" + this.text());
175+
// System.out.println("Pattern 0: @" + currentMatcher.pattern() + "@");
176+
// }
177+
// return false;
178+
// }
179+
//
180+
// int start = currentMatcher.start();
181+
// int startWithOffset = start + this.offset;
182+
// Entry<IntRange, BEXMatchingStateOption> entry = this.textStateMap.getEntry(startWithOffset);
183+
//
184+
// // XXX: think this is what prevents test from passing that first match is in String literal
185+
// // TODO: see about refactoring to use for loop to handle first match no differently than other matches
186+
// // (only difference is wouldn't have an actual group, so this would just pass)
187+
// if (entry != null && startWithOffset != entry.getKey().getStart()
188+
// && !entry.getValue().isCode()) {
189+
// // Don't count as match, since part of string literal or comment
190+
// // If match starts with the block, then okay to match
191+
// // TODO: when else would it be okay to match?
192+
// foundMatch = false;
193+
// searchFrom = start + 1;
194+
// } else {
195+
// foundMatch = true;
196+
// if (DEBUG) {
197+
// if (start == startWithOffset) {
198+
// System.out.printf("Found match! %d\t|%s|%n", start, currentMatcher.group());
199+
// } else {
200+
// System.out.printf("Found match! %d (%d)%n", start, startWithOffset);
201+
// }
202+
// System.out.println("Text states: " + this.textStateMap);
203+
// }
204+
// }
205+
// } while (!foundMatch);
206+
//
207+
// // Don't match if in string literal or comment
208+
// // TODO: under what scenarios should it match stuff in comments?
209+
//
210+
// // TODO: need to process code before match to detect if in block comment, line comment, or String literal
211+
//
212+
// this.putCaptureGroups(currentMatcher);
213+
// int regionStart = currentMatcher.end();
214+
215+
int regionStart = from;
211216
// TODO: keep track of matchStart (such as if requires multiple passes to find next match)
212217

213-
for (int i = 0; i < patterns.size() - 1; i++) {
214-
Pattern nextPattern = patterns.get(i + 1);
215-
Matcher nextMatcher = nextPattern.matcher(this.text);
218+
int firstMatchStart = -1;
219+
220+
outer: for (int i = 0; i < patterns.size(); i++) {
221+
// for (int i = 0; i < patterns.size() - 1; i++) {
222+
Pattern pattern = patterns.get(i);
223+
// Pattern nextPattern = patterns.get(i + 1);
224+
// TODO: can we reuse the matcher and just change out the pattern?
225+
Matcher matcher = pattern.matcher(this.text);
226+
// Matcher nextMatcher = nextPattern.matcher(this.text);
216227

217228
if (DEBUG) {
218-
System.out.println("Trying matcher " + (i + 1));
229+
System.out.println("Trying matcher " + i);
230+
// System.out.println("Trying matcher " + (i + 1));
219231
System.out.println("Region start: " + regionStart);
220232
}
221233

222-
BEXGroupMatchSetting groupMatchSetting = this.parentPattern.getGroupMatchSettings()
223-
.getOrDefault(i, DEFAULT);
234+
BEXGroupMatchSetting groupMatchSetting;
235+
236+
if (i == 0) {
237+
// No group match (treat as "optional", since won't match anything)
238+
groupMatchSetting = DEFAULT.turnOn(BEXGroupMatchSetting.OPTIONAL);
239+
} else {
240+
groupMatchSetting = this.parentPattern.getGroupMatchSettings().getOrDefault(i - 1, DEFAULT);
241+
// groupMatchSetting = this.parentPattern.getGroupMatchSettings().getOrDefault(i, DEFAULT);
242+
}
224243

225244
// If the group isn't optional, start searching with next character
226245
// (since group must match something, so match would be at least 1 character)
@@ -237,25 +256,66 @@ private boolean match(final int from) {
237256
// return false;
238257
// }
239258

240-
nextMatcher.region(matcherRegionStart, this.text.length());
241-
nextMatcher.useTransparentBounds(true);
259+
matcher.region(matcherRegionStart, this.text.length());
260+
matcher.useTransparentBounds(true);
261+
// nextMatcher.region(matcherRegionStart, this.text.length());
262+
// nextMatcher.useTransparentBounds(true);
242263

243-
if (!nextMatcher.find()) {
264+
if (!matcher.find()) {
265+
// if (!nextMatcher.find()) {
244266
if (DEBUG) {
245-
System.out.printf("Didn't match next matcher %d%n", i + 1);
246-
System.out.println("Pattern: " + nextPattern);
267+
System.out.printf("Didn't match %d%n", i);
268+
// System.out.printf("Didn't match next matcher %d%n", i + 1);
269+
System.out.println("Pattern: " + pattern);
270+
// System.out.println("Pattern: " + nextPattern);
247271
System.out.println("Text: " + this.text.subSequence(regionStart, this.text.length()));
248272
}
249273
return false;
250274
}
251275

252276
if (DEBUG) {
253-
System.out.printf("Matched next match %d %s\t|%s|%n", i + 1, nextMatcher.pattern(),
254-
nextMatcher.group());
277+
System.out.printf("Matched %d %s\t|%s|%n", i, matcher.pattern(), matcher.group());
278+
// System.out.printf("Matched next match %d %s\t|%s|%n", i + 1, nextMatcher.pattern(),
279+
// nextMatcher.group());
255280
}
256281

257282
int start = regionStart;
258-
int end = nextMatcher.start();
283+
int end = matcher.start();
284+
// int end = nextMatcher.start();
285+
286+
// Handle first match (since has no group)
287+
if (i == 0) {
288+
// TODO: check that state is valid
289+
int startWithOffset = matcher.start() + this.offset;
290+
Entry<IntRange, BEXMatchingStateOption> entry = this.textStateMap.getEntry(startWithOffset);
291+
292+
boolean isValid;
293+
if (entry != null && startWithOffset != entry.getKey().getStart()
294+
&& !entry.getValue().isCode()) {
295+
// Don't count as match, since part of string literal or comment
296+
// If match starts with the block, then okay to match
297+
// TODO: when else would it be okay to match?
298+
isValid = false;
299+
if (DEBUG) {
300+
System.out.println("Invalid entry, try first match again: " + entry);
301+
}
302+
} else {
303+
isValid = true;
304+
}
305+
306+
if (!isValid) {
307+
i--;
308+
// TODO: should this instead be matcher.end()?
309+
// (start + 1 would be trying the next character, just like regex would do)
310+
regionStart = matcher.start() + 1;
311+
continue;
312+
}
313+
314+
this.putCaptureGroups(matcher);
315+
firstMatchStart = matcher.start();
316+
regionStart = matcher.end();
317+
continue;
318+
}
259319

260320
int startWithOffset = start + this.offset;
261321
Entry<IntRange, BEXMatchingStateOption> entry = this.textStateMap.getEntry(startWithOffset);
@@ -271,17 +331,26 @@ private boolean match(final int from) {
271331
if (DEBUG) {
272332
System.out.println("Performing search with initialState " + initialState);
273333
}
274-
275334
BEXMatchingState state = this.search(start, end, groupMatchSetting, initialState);
276335

277336
while (!state.isValid(end, initialState.getOptions())) {
278337
// TODO: if has mismatched brackets, start over and try to find after this?
279338
// This way, if one line in a file isn't valid, could still handle other lines (versus never matching ever)
280339
if (state.hasMismatchedBrackets()) {
281340
if (DEBUG) {
282-
System.out.println(state);
341+
System.out.println("Mismatched brackets: " + state);
283342
}
284-
return false;
343+
344+
// Start outer from first pattern and try again
345+
// TODO: when should we not start at first pattern?
346+
i = -1;
347+
this.clearGroups();
348+
349+
// TODO: is this the correct place to start the next try?
350+
// (seems likely, since this is right after the invalid bracket)
351+
regionStart = state.getPosition() + 1;
352+
353+
continue outer;
285354
}
286355

287356
// TODO: handle what if not valid (in this case, expand group)
@@ -310,30 +379,39 @@ private boolean match(final int from) {
310379
+ this.text.subSequence(start, position));
311380
}
312381

313-
nextMatcher.region(position, this.text.length());
382+
matcher.region(position, this.text.length());
383+
// nextMatcher.region(position, this.text.length());
314384
// TODO: specify as option? (needed to handle spaces after group)
315-
nextMatcher.useTransparentBounds(true);
385+
matcher.useTransparentBounds(true);
386+
// nextMatcher.useTransparentBounds(true);
316387

317-
if (!nextMatcher.find()) {
388+
if (!matcher.find()) {
389+
// if (!nextMatcher.find()) {
318390
// State is valid and cannot find another match
319391
// In this case, skip and try again from beginning at later point?
320392
if (DEBUG) {
321-
System.out.println("Cannot find next match: " + (i + 1));
393+
System.out.println("Cannot find match: " + i);
394+
// System.out.println("Cannot find next match: " + (i + 1));
322395
}
323396
return false;
324397
}
325398

326-
end = nextMatcher.start();
399+
end = matcher.start();
400+
// end = nextMatcher.start();
327401

328402
if (end != position) {
329403
// TODO: there may be extra stuff between the valid position and the next start
330404
// (if this is also valid, it would be okay)
331405
if (DEBUG) {
332-
System.out.printf("New scenario %d: %d\t%d\t%s%n", i + 1, nextMatcher.start(), position,
333-
this.text.subSequence(position, nextMatcher.start()));
334-
335-
System.out.printf("Position does not match next matcher start: %d != %d%n", position,
336-
nextMatcher.start());
406+
System.out.printf("New scenario %d: %d\t%d\t%s%n", i, matcher.start(), position,
407+
this.text.subSequence(position, matcher.start()));
408+
// System.out.printf("New scenario %d: %d\t%d\t%s%n", i + 1, nextMatcher.start(), position,
409+
// this.text.subSequence(position, nextMatcher.start()));
410+
411+
System.out.printf("Position does not match matcher start: %d != %d%n", position,
412+
matcher.start());
413+
// System.out.printf("Position does not match next matcher start: %d != %d%n", position,
414+
// nextMatcher.start());
337415
}
338416
// return false;
339417
// TODO: what should I pass for initial state
@@ -351,7 +429,8 @@ private boolean match(final int from) {
351429
return false;
352430
}
353431

354-
String group = this.parentPattern.getGroups().get(i);
432+
String group = this.parentPattern.getGroups().get(i - 1);
433+
// String group = this.parentPattern.getGroups().get(i);
355434
// String value = this.text.subSequence(start, end).toString();
356435

357436
if (DEBUG) {
@@ -393,13 +472,14 @@ private boolean match(final int from) {
393472
}
394473
}
395474

396-
this.putCaptureGroups(nextMatcher);
475+
this.putCaptureGroups(matcher);
397476

398477
// System.out.printf("%s: @%s@%n", group, value);
399-
regionStart = nextMatcher.end();
478+
regionStart = matcher.end();
400479
}
401480

402-
int matchStart = currentMatcher.start();
481+
int matchStart = firstMatchStart;
482+
// int matchStart = currentMatcher.start();
403483
int matchEnd = regionStart;
404484

405485
this.matchRange.set(matchStart, matchEnd);

BEXCodeCompare/src/test/java/info/codesaway/bex/matching/BEXMatcherTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package info.codesaway.bex.matching;
22

33
import static info.codesaway.bex.matching.MatcherTestHelper.testBEXMatch;
4+
import static info.codesaway.bex.matching.MatcherTestHelper.testBEXMatchEntries;
45
import static info.codesaway.bex.matching.MatcherTestHelper.testBEXMatchReplaceAll;
56
import static info.codesaway.bex.matching.MatcherTestHelper.testJustBEXMatch;
67
import static info.codesaway.bex.matching.MatcherTestHelper.testNoBEXMatch;
@@ -130,6 +131,13 @@ void testInStringLiteralShouldIgnore() {
130131
testNoBEXMatch(pattern, text);
131132
}
132133

134+
@Test
135+
void testInStringLiteralPatternHasContextSoMatch() {
136+
String pattern = "\"try { }\"";
137+
String text = "\"try { }\"";
138+
testJustBEXMatch(pattern, text);
139+
}
140+
133141
@Test
134142
void testInStringLiteralShouldIgnoreEvenWithoutEnding() {
135143
String pattern = "try { }";
@@ -464,4 +472,32 @@ void testOptionalLineSeparatorMatchWithActualNewLine() {
464472
String text = "something else is at the end of this line";
465473
testBEXMatch(pattern, text, "else is at the end of this line");
466474
}
475+
476+
@Test
477+
void testMatchViaBacktrace() {
478+
String pattern = "if (:[?before]blah:[?after])";
479+
String text = "if (something) blah\n"
480+
+ "if (something == blah)";
481+
482+
testBEXMatchEntries(pattern, text, entry("before", "something == "), entry("after", ""));
483+
}
484+
485+
@Test
486+
void testMatchViaBacktraceBeforeAndAfterRequiredNoMatch() {
487+
String pattern = "if (:[before]blah:[after])";
488+
String text = "if (something) blah\n"
489+
+ "if (something == blah)";
490+
491+
testNoBEXMatch(pattern, text);
492+
}
493+
494+
@Test
495+
void testMatchViaBacktraceBeforeAndAfterRequiredHasMatch() {
496+
String pattern = "if (:[before]blah:[after])";
497+
String text = "if (something) blah\n"
498+
+ "if (something == blah)\n"
499+
+ "if (something == blah && fun)";
500+
501+
testBEXMatchEntries(pattern, text, entry("before", "something == "), entry("after", " && fun"));
502+
}
467503
}

0 commit comments

Comments
 (0)