@@ -161,66 +161,85 @@ private boolean match(final int from) {
161161 return false ;
162162 }
163163
164- // TODO: cache matchers?
165- Matcher currentMatcher = patterns .get (0 ).matcher (this .text );
166- // TODO: specify as option? (needed to handle spaces after group)
167- currentMatcher .useTransparentBounds (true );
168-
169- boolean foundMatch ;
170- int searchFrom = from ;
171- do {
172- if (!currentMatcher .find (searchFrom )) {
173- if (DEBUG ) {
174- System .out .println ("Couldn't find match 0: " + from + "\t " + this .text ());
175- System .out .println ("Pattern 0: @" + currentMatcher .pattern () + "@" );
176- }
177- return false ;
178- }
179-
180- int start = currentMatcher .start ();
181- int startWithOffset = start + this .offset ;
182- Entry <IntRange , BEXMatchingStateOption > entry = this .textStateMap .getEntry (startWithOffset );
183-
184- if (entry != null && startWithOffset != entry .getKey ().getStart ()
185- && !entry .getValue ().isCode ()) {
186- // Don't count as match, since part of string literal or comment
187- // If match starts with the block, then okay to match
188- // TODO: when else would it be okay to match?
189- foundMatch = false ;
190- searchFrom = start + 1 ;
191- } else {
192- foundMatch = true ;
193- if (DEBUG ) {
194- if (start == startWithOffset ) {
195- System .out .printf ("Found match! %d\t |%s|%n" , start , currentMatcher .group ());
196- } else {
197- System .out .printf ("Found match! %d (%d)%n" , start , startWithOffset );
198- }
199- System .out .println ("Text states: " + this .textStateMap );
200- }
201- }
202- } while (!foundMatch );
203-
204- // Don't match if in string literal or comment
205- // TODO: under what scenarios should it match stuff in comments?
206-
207- // TODO: need to process code before match to detect if in block comment, line comment, or String literal
208-
209- this .putCaptureGroups (currentMatcher );
210- int regionStart = currentMatcher .end ();
164+ // // TODO: cache matchers?
165+ // Matcher currentMatcher = patterns.get(0).matcher(this.text);
166+ // // TODO: specify as option? (needed to handle spaces after group)
167+ // currentMatcher.useTransparentBounds(true);
168+ //
169+ // boolean foundMatch;
170+ // int searchFrom = from;
171+ // do {
172+ // if (!currentMatcher.find(searchFrom)) {
173+ // if (DEBUG) {
174+ // System.out.println("Couldn't find match 0: " + from + "\t" + this.text());
175+ // System.out.println("Pattern 0: @" + currentMatcher.pattern() + "@");
176+ // }
177+ // return false;
178+ // }
179+ //
180+ // int start = currentMatcher.start();
181+ // int startWithOffset = start + this.offset;
182+ // Entry<IntRange, BEXMatchingStateOption> entry = this.textStateMap.getEntry(startWithOffset);
183+ //
184+ // // XXX: think this is what prevents test from passing that first match is in String literal
185+ // // TODO: see about refactoring to use for loop to handle first match no differently than other matches
186+ // // (only difference is wouldn't have an actual group, so this would just pass)
187+ // if (entry != null && startWithOffset != entry.getKey().getStart()
188+ // && !entry.getValue().isCode()) {
189+ // // Don't count as match, since part of string literal or comment
190+ // // If match starts with the block, then okay to match
191+ // // TODO: when else would it be okay to match?
192+ // foundMatch = false;
193+ // searchFrom = start + 1;
194+ // } else {
195+ // foundMatch = true;
196+ // if (DEBUG) {
197+ // if (start == startWithOffset) {
198+ // System.out.printf("Found match! %d\t|%s|%n", start, currentMatcher.group());
199+ // } else {
200+ // System.out.printf("Found match! %d (%d)%n", start, startWithOffset);
201+ // }
202+ // System.out.println("Text states: " + this.textStateMap);
203+ // }
204+ // }
205+ // } while (!foundMatch);
206+ //
207+ // // Don't match if in string literal or comment
208+ // // TODO: under what scenarios should it match stuff in comments?
209+ //
210+ // // TODO: need to process code before match to detect if in block comment, line comment, or String literal
211+ //
212+ // this.putCaptureGroups(currentMatcher);
213+ // int regionStart = currentMatcher.end();
214+
215+ int regionStart = from ;
211216 // TODO: keep track of matchStart (such as if requires multiple passes to find next match)
212217
213- for (int i = 0 ; i < patterns .size () - 1 ; i ++) {
214- Pattern nextPattern = patterns .get (i + 1 );
215- Matcher nextMatcher = nextPattern .matcher (this .text );
218+ int firstMatchStart = -1 ;
219+
220+ outer : for (int i = 0 ; i < patterns .size (); i ++) {
221+ // for (int i = 0; i < patterns.size() - 1; i++) {
222+ Pattern pattern = patterns .get (i );
223+ // Pattern nextPattern = patterns.get(i + 1);
224+ // TODO: can we reuse the matcher and just change out the pattern?
225+ Matcher matcher = pattern .matcher (this .text );
226+ // Matcher nextMatcher = nextPattern.matcher(this.text);
216227
217228 if (DEBUG ) {
218- System .out .println ("Trying matcher " + (i + 1 ));
229+ System .out .println ("Trying matcher " + i );
230+ // System.out.println("Trying matcher " + (i + 1));
219231 System .out .println ("Region start: " + regionStart );
220232 }
221233
222- BEXGroupMatchSetting groupMatchSetting = this .parentPattern .getGroupMatchSettings ()
223- .getOrDefault (i , DEFAULT );
234+ BEXGroupMatchSetting groupMatchSetting ;
235+
236+ if (i == 0 ) {
237+ // No group match (treat as "optional", since won't match anything)
238+ groupMatchSetting = DEFAULT .turnOn (BEXGroupMatchSetting .OPTIONAL );
239+ } else {
240+ groupMatchSetting = this .parentPattern .getGroupMatchSettings ().getOrDefault (i - 1 , DEFAULT );
241+ // groupMatchSetting = this.parentPattern.getGroupMatchSettings().getOrDefault(i, DEFAULT);
242+ }
224243
225244 // If the group isn't optional, start searching with next character
226245 // (since group must match something, so match would be at least 1 character)
@@ -237,25 +256,66 @@ private boolean match(final int from) {
237256 // return false;
238257 // }
239258
240- nextMatcher .region (matcherRegionStart , this .text .length ());
241- nextMatcher .useTransparentBounds (true );
259+ matcher .region (matcherRegionStart , this .text .length ());
260+ matcher .useTransparentBounds (true );
261+ // nextMatcher.region(matcherRegionStart, this.text.length());
262+ // nextMatcher.useTransparentBounds(true);
242263
243- if (!nextMatcher .find ()) {
264+ if (!matcher .find ()) {
265+ // if (!nextMatcher.find()) {
244266 if (DEBUG ) {
245- System .out .printf ("Didn't match next matcher %d%n" , i + 1 );
246- System .out .println ("Pattern: " + nextPattern );
267+ System .out .printf ("Didn't match %d%n" , i );
268+ // System.out.printf("Didn't match next matcher %d%n", i + 1);
269+ System .out .println ("Pattern: " + pattern );
270+ // System.out.println("Pattern: " + nextPattern);
247271 System .out .println ("Text: " + this .text .subSequence (regionStart , this .text .length ()));
248272 }
249273 return false ;
250274 }
251275
252276 if (DEBUG ) {
253- System .out .printf ("Matched next match %d %s\t |%s|%n" , i + 1 , nextMatcher .pattern (),
254- nextMatcher .group ());
277+ System .out .printf ("Matched %d %s\t |%s|%n" , i , matcher .pattern (), matcher .group ());
278+ // System.out.printf("Matched next match %d %s\t|%s|%n", i + 1, nextMatcher.pattern(),
279+ // nextMatcher.group());
255280 }
256281
257282 int start = regionStart ;
258- int end = nextMatcher .start ();
283+ int end = matcher .start ();
284+ // int end = nextMatcher.start();
285+
286+ // Handle first match (since has no group)
287+ if (i == 0 ) {
288+ // TODO: check that state is valid
289+ int startWithOffset = matcher .start () + this .offset ;
290+ Entry <IntRange , BEXMatchingStateOption > entry = this .textStateMap .getEntry (startWithOffset );
291+
292+ boolean isValid ;
293+ if (entry != null && startWithOffset != entry .getKey ().getStart ()
294+ && !entry .getValue ().isCode ()) {
295+ // Don't count as match, since part of string literal or comment
296+ // If match starts with the block, then okay to match
297+ // TODO: when else would it be okay to match?
298+ isValid = false ;
299+ if (DEBUG ) {
300+ System .out .println ("Invalid entry, try first match again: " + entry );
301+ }
302+ } else {
303+ isValid = true ;
304+ }
305+
306+ if (!isValid ) {
307+ i --;
308+ // TODO: should this instead be matcher.end()?
309+ // (start + 1 would be trying the next character, just like regex would do)
310+ regionStart = matcher .start () + 1 ;
311+ continue ;
312+ }
313+
314+ this .putCaptureGroups (matcher );
315+ firstMatchStart = matcher .start ();
316+ regionStart = matcher .end ();
317+ continue ;
318+ }
259319
260320 int startWithOffset = start + this .offset ;
261321 Entry <IntRange , BEXMatchingStateOption > entry = this .textStateMap .getEntry (startWithOffset );
@@ -271,17 +331,26 @@ private boolean match(final int from) {
271331 if (DEBUG ) {
272332 System .out .println ("Performing search with initialState " + initialState );
273333 }
274-
275334 BEXMatchingState state = this .search (start , end , groupMatchSetting , initialState );
276335
277336 while (!state .isValid (end , initialState .getOptions ())) {
278337 // TODO: if has mismatched brackets, start over and try to find after this?
279338 // This way, if one line in a file isn't valid, could still handle other lines (versus never matching ever)
280339 if (state .hasMismatchedBrackets ()) {
281340 if (DEBUG ) {
282- System .out .println (state );
341+ System .out .println ("Mismatched brackets: " + state );
283342 }
284- return false ;
343+
344+ // Start outer from first pattern and try again
345+ // TODO: when should we not start at first pattern?
346+ i = -1 ;
347+ this .clearGroups ();
348+
349+ // TODO: is this the correct place to start the next try?
350+ // (seems likely, since this is right after the invalid bracket)
351+ regionStart = state .getPosition () + 1 ;
352+
353+ continue outer ;
285354 }
286355
287356 // TODO: handle what if not valid (in this case, expand group)
@@ -310,30 +379,39 @@ private boolean match(final int from) {
310379 + this .text .subSequence (start , position ));
311380 }
312381
313- nextMatcher .region (position , this .text .length ());
382+ matcher .region (position , this .text .length ());
383+ // nextMatcher.region(position, this.text.length());
314384 // TODO: specify as option? (needed to handle spaces after group)
315- nextMatcher .useTransparentBounds (true );
385+ matcher .useTransparentBounds (true );
386+ // nextMatcher.useTransparentBounds(true);
316387
317- if (!nextMatcher .find ()) {
388+ if (!matcher .find ()) {
389+ // if (!nextMatcher.find()) {
318390 // State is valid and cannot find another match
319391 // In this case, skip and try again from beginning at later point?
320392 if (DEBUG ) {
321- System .out .println ("Cannot find next match: " + (i + 1 ));
393+ System .out .println ("Cannot find match: " + i );
394+ // System.out.println("Cannot find next match: " + (i + 1));
322395 }
323396 return false ;
324397 }
325398
326- end = nextMatcher .start ();
399+ end = matcher .start ();
400+ // end = nextMatcher.start();
327401
328402 if (end != position ) {
329403 // TODO: there may be extra stuff between the valid position and the next start
330404 // (if this is also valid, it would be okay)
331405 if (DEBUG ) {
332- System .out .printf ("New scenario %d: %d\t %d\t %s%n" , i + 1 , nextMatcher .start (), position ,
333- this .text .subSequence (position , nextMatcher .start ()));
334-
335- System .out .printf ("Position does not match next matcher start: %d != %d%n" , position ,
336- nextMatcher .start ());
406+ System .out .printf ("New scenario %d: %d\t %d\t %s%n" , i , matcher .start (), position ,
407+ this .text .subSequence (position , matcher .start ()));
408+ // System.out.printf("New scenario %d: %d\t%d\t%s%n", i + 1, nextMatcher.start(), position,
409+ // this.text.subSequence(position, nextMatcher.start()));
410+
411+ System .out .printf ("Position does not match matcher start: %d != %d%n" , position ,
412+ matcher .start ());
413+ // System.out.printf("Position does not match next matcher start: %d != %d%n", position,
414+ // nextMatcher.start());
337415 }
338416 // return false;
339417 // TODO: what should I pass for initial state
@@ -351,7 +429,8 @@ private boolean match(final int from) {
351429 return false ;
352430 }
353431
354- String group = this .parentPattern .getGroups ().get (i );
432+ String group = this .parentPattern .getGroups ().get (i - 1 );
433+ // String group = this.parentPattern.getGroups().get(i);
355434 // String value = this.text.subSequence(start, end).toString();
356435
357436 if (DEBUG ) {
@@ -393,13 +472,14 @@ private boolean match(final int from) {
393472 }
394473 }
395474
396- this .putCaptureGroups (nextMatcher );
475+ this .putCaptureGroups (matcher );
397476
398477 // System.out.printf("%s: @%s@%n", group, value);
399- regionStart = nextMatcher .end ();
478+ regionStart = matcher .end ();
400479 }
401480
402- int matchStart = currentMatcher .start ();
481+ int matchStart = firstMatchStart ;
482+ // int matchStart = currentMatcher.start();
403483 int matchEnd = regionStart ;
404484
405485 this .matchRange .set (matchStart , matchEnd );
0 commit comments