2121import java .io .Serializable ;
2222import java .util .ArrayList ;
2323import java .util .Arrays ;
24+ import java .util .Collection ;
25+ import java .util .Collections ;
2426import java .util .HashMap ;
27+ import java .util .HashSet ;
2528import java .util .List ;
2629import java .util .Map ;
2730import java .util .Objects ;
2831import java .util .Optional ;
2932import java .util .Random ;
3033import java .util .StringJoiner ;
34+ import java .util .function .Predicate ;
3135import java .util .regex .Pattern ;
3236import java .util .stream .IntStream ;
3337import java .util .stream .Stream ;
@@ -113,9 +117,24 @@ public static Coregex literal(String literal, int flags) {
113117 return new Concat (first , rest );
114118 }
115119
120+ public static Coregex wordBoundary (int flags , boolean positive ) {
121+ Set word =
122+ Set .builder (flags ).range ('a' , 'z' ).range ('A' , 'Z' ).range ('0' , '9' ).single ('_' ).build ();
123+ Set notWord = Set .builder (flags ).union (word ).negate ().build ();
124+ Group .Type lookbehind = positive ? Group .Type .LOOKBEHIND : Group .Type .NEGATIVE_LOOKBEHIND ;
125+ Group .Type lookahead = positive ? Group .Type .LOOKAHEAD : Group .Type .NEGATIVE_LOOKAHEAD ;
126+ return new Union (
127+ new Concat (new Group (lookbehind , empty ()), new Group (lookahead , word )),
128+ new Concat (new Group (lookbehind , word ), new Group (lookahead , empty ())),
129+ new Concat (new Group (lookbehind , word ), new Group (lookahead , notWord )),
130+ new Concat (new Group (lookbehind , notWord ), new Group (lookahead , word )));
131+ }
132+
116133 Coregex () {}
117134
118- abstract void generate (Context ctx );
135+ abstract void generate (Context ctx ) throws RewindException ;
136+
137+ abstract String [] match (String input , Context ctx );
119138
120139 /**
121140 * Converts this coregex into ones that produce "smaller" values.
@@ -132,10 +151,26 @@ public static Coregex literal(String literal, int flags) {
132151 */
133152 public final String generate (long seed ) {
134153 Random rng = new Random (seed );
135- try (Context ctx = new Context (rng )) {
136- generate (ctx );
137- return ctx .toString ();
154+ int attempt = 0 ;
155+ do {
156+ try (Context ctx = new Context (rng )) {
157+ generate (ctx );
158+ return ctx .toString (true );
159+ } catch (RewindException ignored ) {
160+ }
161+ } while (++attempt < Context .maxAttempts );
162+ throw new IllegalStateException (
163+ "Cannot generate string for " + this + " after " + Context .maxAttempts + " attempts" );
164+ }
165+
166+ final boolean matches (String input , Context ctx ) {
167+ String [] matches = match (input , ctx );
168+ for (String match : matches ) {
169+ if (match .isEmpty ()) {
170+ return true ;
171+ }
138172 }
173+ return false ;
139174 }
140175
141176 /**
@@ -179,6 +214,20 @@ void generate(Context ctx) {
179214 }
180215 }
181216
217+ @ Override
218+ String [] match (String input , Context ctx ) {
219+ Collection <String > match = Arrays .asList (first .match (input , ctx ));
220+ for (int i = 0 ; i < rest .length && !match .isEmpty (); i ++) {
221+ Collection <String > nextMatch = new HashSet <>(match .size ());
222+ Coregex coregex = rest [i ];
223+ for (String tail : match ) {
224+ nextMatch .addAll (Arrays .asList (coregex .match (tail , ctx )));
225+ }
226+ match = nextMatch ;
227+ }
228+ return match .toArray (new String [0 ]);
229+ }
230+
182231 /** {@inheritDoc} */
183232 @ Override
184233 public Stream <Concat > shrink () {
@@ -240,11 +289,15 @@ public String toString() {
240289 }
241290
242291 static final class Context implements Appendable , AutoCloseable {
292+ static final int maxAttempts = 100 ;
293+
243294 private final Context parent ;
244295 private final int index ;
245296 private final String name ;
246297 private final StringBuilder buffer = new StringBuilder ();
247- private final Map <Serializable , Context > groups ;
298+ private final Map <Serializable , String > groups ;
299+
300+ private Predicate <Context > lookbehind = null ;
248301
249302 final Random rng ;
250303
@@ -282,32 +335,46 @@ public Context append(CharSequence csq, int start, int end) {
282335 return this ;
283336 }
284337
285- int index () {
286- return parent .index ();
338+ int length (boolean full ) {
339+ if (!full || null == parent ) {
340+ return buffer .length ();
341+ }
342+ return parent .length (true ) + buffer .length ();
287343 }
288344
289- Context ref (Serializable ref ) {
345+ String ref (Serializable ref ) {
290346 return groups .get (ref );
291347 }
292348
349+ void finalizeWithLookbehind (Predicate <Context > lookbehind ) {
350+ Predicate <Context > oldLookbehind = this .lookbehind ;
351+ this .lookbehind = null == oldLookbehind ? lookbehind : oldLookbehind .and (lookbehind );
352+ }
353+
293354 @ Override
294- public void close () {
355+ public void close () throws RewindException {
295356 Context parent = this .parent ;
296357 if (null == parent ) {
297- return ;
298- }
299- parent .append (this .buffer );
300- if (0 < index ) {
301- parent .groups .put (index , this );
302- }
303- if (null != name ) {
304- parent .groups .put (name , this );
358+ if (null != lookbehind && !lookbehind .test (this )) {
359+ throw new RewindException ();
360+ }
361+ } else {
362+ parent .append (this .buffer );
363+ if (0 < index ) {
364+ parent .groups .put (index , toString (false ));
365+ }
366+ if (null != name ) {
367+ parent .groups .put (name , toString (false ));
368+ }
369+ parent .finalizeWithLookbehind (lookbehind );
305370 }
306371 }
307372
308- @ Override
309- public String toString () {
310- return buffer .toString ();
373+ String toString (boolean full ) {
374+ if (!full || null == parent ) {
375+ return buffer .toString ();
376+ }
377+ return parent .toString (true ) + buffer ;
311378 }
312379 }
313380
@@ -362,17 +429,32 @@ private Group(Type type, String name, Coregex group) {
362429 }
363430
364431 @ Override
365- void generate (Context ctx ) {
432+ @ SuppressWarnings ("fallthrough" )
433+ void generate (Context ctx ) throws RewindException {
434+ boolean positive = true ;
366435 switch (type ) {
367436 case NON_CAPTURING :
368437 case ATOMIC :
369438 group .generate (ctx );
370439 break ;
371- case LOOKAHEAD :
440+ case NEGATIVE_LOOKBEHIND :
441+ positive = false ;
442+ // fall through
372443 case LOOKBEHIND :
444+ if (positive == group .matches (ctx .toString (true ), ctx )) {
445+ break ;
446+ }
447+ throw new RewindException ();
373448 case NEGATIVE_LOOKAHEAD :
374- case NEGATIVE_LOOKBEHIND :
375- // FIXME
449+ positive = false ;
450+ // fall through
451+ case LOOKAHEAD :
452+ int length = ctx .length (true );
453+ Coregex fullLookbehind =
454+ new Concat (any ().quantify (length , length , Quantified .Type .GREEDY ), group );
455+ final boolean pos = positive ;
456+ ctx .finalizeWithLookbehind (
457+ root -> pos == fullLookbehind .matches (root .toString (true ), root ));
376458 break ;
377459 default :
378460 try (Context childCtx = new Context (ctx , name )) {
@@ -381,6 +463,24 @@ void generate(Context ctx) {
381463 }
382464 }
383465
466+ @ Override
467+ String [] match (String input , Context ctx ) {
468+ switch (type ) {
469+ case NON_CAPTURING :
470+ case ATOMIC :
471+ return group .match (input , ctx );
472+ case LOOKAHEAD :
473+ case LOOKBEHIND :
474+ case NEGATIVE_LOOKAHEAD :
475+ case NEGATIVE_LOOKBEHIND :
476+ return new String [] {input };
477+ default :
478+ try (Context childCtx = new Context (ctx , name )) {
479+ return group .match (input , childCtx );
480+ }
481+ }
482+ }
483+
384484 /** {@inheritDoc} */
385485 @ Override
386486 public Stream <Group > shrink () {
@@ -538,16 +638,39 @@ public Quantified(Coregex quantified, int min, int max, Type type) {
538638 }
539639
540640 @ Override
541- void generate (Context ctx ) {
641+ void generate (Context ctx ) throws RewindException {
542642 int quantifier = 0 , min = this .min , max = this .max ;
543643 for (; quantifier < min ; quantifier ++) {
544644 quantified .generate (ctx );
545645 }
546- while ( (-1 == max || quantifier ++ < max ) && 0 != ctx .rng .nextInt (4 )) {
646+ for (; (-1 == max || quantifier < max ) && 0 != ctx .rng .nextInt (4 ); quantifier ++ ) {
547647 quantified .generate (ctx );
548648 }
549649 }
550650
651+ @ Override
652+ String [] match (String input , Context ctx ) {
653+ int quantifier = 0 , min = this .min , max = this .max ;
654+ java .util .Set <String > match = new HashSet <>(Collections .singleton (input ));
655+ for (; quantifier < min && !match .isEmpty (); quantifier ++) {
656+ java .util .Set <String > nextMatch = new HashSet <>(match .size ());
657+ for (String tail : match ) {
658+ nextMatch .addAll (Arrays .asList (quantified .match (tail , ctx )));
659+ }
660+ match = nextMatch ;
661+ }
662+ for (; (-1 == max || quantifier < max ) && !match .isEmpty (); quantifier ++) {
663+ java .util .Set <String > nextMatch = new HashSet <>(match .size ());
664+ for (String tail : match ) {
665+ nextMatch .addAll (Arrays .asList (quantified .match (tail , ctx )));
666+ }
667+ if (!match .addAll (nextMatch )) {
668+ break ;
669+ }
670+ }
671+ return match .toArray (new String [0 ]);
672+ }
673+
551674 /** {@inheritDoc} */
552675 @ Override
553676 public Stream <Quantified > shrink () {
@@ -683,7 +806,13 @@ public Ref(int index) {
683806
684807 @ Override
685808 void generate (Context ctx ) {
686- ctx .append (ctx .ref (ref ).toString ());
809+ ctx .append (ctx .ref (ref ));
810+ }
811+
812+ @ Override
813+ String [] match (String input , Context ctx ) {
814+ String ref = ctx .ref (this .ref );
815+ return input .startsWith (ref ) ? new String [] {input .substring (ref .length ())} : new String [0 ];
687816 }
688817
689818 /** {@inheritDoc} */
@@ -737,11 +866,21 @@ public Union(Coregex first, Coregex... rest) {
737866 }
738867
739868 @ Override
740- void generate (Context ctx ) {
869+ void generate (Context ctx ) throws RewindException {
741870 int index = ctx .rng .nextInt (rest .length + 1 );
742871 (index < rest .length ? rest [index ] : first ).generate (ctx );
743872 }
744873
874+ @ Override
875+ String [] match (String input , Context ctx ) {
876+ Collection <String > match = new HashSet <>(rest .length + 1 );
877+ match .addAll (Arrays .asList (first .match (input , ctx )));
878+ for (Coregex coregex : rest ) {
879+ match .addAll (Arrays .asList (coregex .match (input , ctx )));
880+ }
881+ return match .toArray (new String [0 ]);
882+ }
883+
745884 /** {@inheritDoc} */
746885 @ Override
747886 public Stream <Union > shrink () {
0 commit comments