Skip to content

Commit 932c3eb

Browse files
authored
Merge pull request #175 from SimY4/topic/look-around
Topic/look around
2 parents 96ac4a7 + a8c46f7 commit 932c3eb

File tree

7 files changed

+307
-82
lines changed

7 files changed

+307
-82
lines changed

core/src/main/java/com/github/simy4/coregex/core/Coregex.java

Lines changed: 167 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@
2121
import java.io.Serializable;
2222
import java.util.ArrayList;
2323
import java.util.Arrays;
24+
import java.util.Collection;
25+
import java.util.Collections;
2426
import java.util.HashMap;
27+
import java.util.HashSet;
2528
import java.util.List;
2629
import java.util.Map;
2730
import java.util.Objects;
2831
import java.util.Optional;
2932
import java.util.Random;
3033
import java.util.StringJoiner;
34+
import java.util.function.Predicate;
3135
import java.util.regex.Pattern;
3236
import java.util.stream.IntStream;
3337
import java.util.stream.Stream;
@@ -113,9 +117,24 @@ public static Coregex literal(String literal, int flags) {
113117
return new Concat(first, rest);
114118
}
115119

120+
public static Coregex wordBoundary(int flags, boolean positive) {
121+
Set word =
122+
Set.builder(flags).range('a', 'z').range('A', 'Z').range('0', '9').single('_').build();
123+
Set notWord = Set.builder(flags).union(word).negate().build();
124+
Group.Type lookbehind = positive ? Group.Type.LOOKBEHIND : Group.Type.NEGATIVE_LOOKBEHIND;
125+
Group.Type lookahead = positive ? Group.Type.LOOKAHEAD : Group.Type.NEGATIVE_LOOKAHEAD;
126+
return new Union(
127+
new Concat(new Group(lookbehind, empty()), new Group(lookahead, word)),
128+
new Concat(new Group(lookbehind, word), new Group(lookahead, empty())),
129+
new Concat(new Group(lookbehind, word), new Group(lookahead, notWord)),
130+
new Concat(new Group(lookbehind, notWord), new Group(lookahead, word)));
131+
}
132+
116133
Coregex() {}
117134

118-
abstract void generate(Context ctx);
135+
abstract void generate(Context ctx) throws RewindException;
136+
137+
abstract String[] match(String input, Context ctx);
119138

120139
/**
121140
* Converts this coregex into ones that produce "smaller" values.
@@ -132,10 +151,26 @@ public static Coregex literal(String literal, int flags) {
132151
*/
133152
public final String generate(long seed) {
134153
Random rng = new Random(seed);
135-
try (Context ctx = new Context(rng)) {
136-
generate(ctx);
137-
return ctx.toString();
154+
int attempt = 0;
155+
do {
156+
try (Context ctx = new Context(rng)) {
157+
generate(ctx);
158+
return ctx.toString(true);
159+
} catch (RewindException ignored) {
160+
}
161+
} while (++attempt < Context.maxAttempts);
162+
throw new IllegalStateException(
163+
"Cannot generate string for " + this + " after " + Context.maxAttempts + " attempts");
164+
}
165+
166+
final boolean matches(String input, Context ctx) {
167+
String[] matches = match(input, ctx);
168+
for (String match : matches) {
169+
if (match.isEmpty()) {
170+
return true;
171+
}
138172
}
173+
return false;
139174
}
140175

141176
/**
@@ -179,6 +214,20 @@ void generate(Context ctx) {
179214
}
180215
}
181216

217+
@Override
218+
String[] match(String input, Context ctx) {
219+
Collection<String> match = Arrays.asList(first.match(input, ctx));
220+
for (int i = 0; i < rest.length && !match.isEmpty(); i++) {
221+
Collection<String> nextMatch = new HashSet<>(match.size());
222+
Coregex coregex = rest[i];
223+
for (String tail : match) {
224+
nextMatch.addAll(Arrays.asList(coregex.match(tail, ctx)));
225+
}
226+
match = nextMatch;
227+
}
228+
return match.toArray(new String[0]);
229+
}
230+
182231
/** {@inheritDoc} */
183232
@Override
184233
public Stream<Concat> shrink() {
@@ -240,11 +289,15 @@ public String toString() {
240289
}
241290

242291
static final class Context implements Appendable, AutoCloseable {
292+
static final int maxAttempts = 100;
293+
243294
private final Context parent;
244295
private final int index;
245296
private final String name;
246297
private final StringBuilder buffer = new StringBuilder();
247-
private final Map<Serializable, Context> groups;
298+
private final Map<Serializable, String> groups;
299+
300+
private Predicate<Context> lookbehind = null;
248301

249302
final Random rng;
250303

@@ -282,32 +335,46 @@ public Context append(CharSequence csq, int start, int end) {
282335
return this;
283336
}
284337

285-
int index() {
286-
return parent.index();
338+
int length(boolean full) {
339+
if (!full || null == parent) {
340+
return buffer.length();
341+
}
342+
return parent.length(true) + buffer.length();
287343
}
288344

289-
Context ref(Serializable ref) {
345+
String ref(Serializable ref) {
290346
return groups.get(ref);
291347
}
292348

349+
void finalizeWithLookbehind(Predicate<Context> lookbehind) {
350+
Predicate<Context> oldLookbehind = this.lookbehind;
351+
this.lookbehind = null == oldLookbehind ? lookbehind : oldLookbehind.and(lookbehind);
352+
}
353+
293354
@Override
294-
public void close() {
355+
public void close() throws RewindException {
295356
Context parent = this.parent;
296357
if (null == parent) {
297-
return;
298-
}
299-
parent.append(this.buffer);
300-
if (0 < index) {
301-
parent.groups.put(index, this);
302-
}
303-
if (null != name) {
304-
parent.groups.put(name, this);
358+
if (null != lookbehind && !lookbehind.test(this)) {
359+
throw new RewindException();
360+
}
361+
} else {
362+
parent.append(this.buffer);
363+
if (0 < index) {
364+
parent.groups.put(index, toString(false));
365+
}
366+
if (null != name) {
367+
parent.groups.put(name, toString(false));
368+
}
369+
parent.finalizeWithLookbehind(lookbehind);
305370
}
306371
}
307372

308-
@Override
309-
public String toString() {
310-
return buffer.toString();
373+
String toString(boolean full) {
374+
if (!full || null == parent) {
375+
return buffer.toString();
376+
}
377+
return parent.toString(true) + buffer;
311378
}
312379
}
313380

@@ -362,17 +429,32 @@ private Group(Type type, String name, Coregex group) {
362429
}
363430

364431
@Override
365-
void generate(Context ctx) {
432+
@SuppressWarnings("fallthrough")
433+
void generate(Context ctx) throws RewindException {
434+
boolean positive = true;
366435
switch (type) {
367436
case NON_CAPTURING:
368437
case ATOMIC:
369438
group.generate(ctx);
370439
break;
371-
case LOOKAHEAD:
440+
case NEGATIVE_LOOKBEHIND:
441+
positive = false;
442+
// fall through
372443
case LOOKBEHIND:
444+
if (positive == group.matches(ctx.toString(true), ctx)) {
445+
break;
446+
}
447+
throw new RewindException();
373448
case NEGATIVE_LOOKAHEAD:
374-
case NEGATIVE_LOOKBEHIND:
375-
// FIXME
449+
positive = false;
450+
// fall through
451+
case LOOKAHEAD:
452+
int length = ctx.length(true);
453+
Coregex fullLookbehind =
454+
new Concat(any().quantify(length, length, Quantified.Type.GREEDY), group);
455+
final boolean pos = positive;
456+
ctx.finalizeWithLookbehind(
457+
root -> pos == fullLookbehind.matches(root.toString(true), root));
376458
break;
377459
default:
378460
try (Context childCtx = new Context(ctx, name)) {
@@ -381,6 +463,24 @@ void generate(Context ctx) {
381463
}
382464
}
383465

466+
@Override
467+
String[] match(String input, Context ctx) {
468+
switch (type) {
469+
case NON_CAPTURING:
470+
case ATOMIC:
471+
return group.match(input, ctx);
472+
case LOOKAHEAD:
473+
case LOOKBEHIND:
474+
case NEGATIVE_LOOKAHEAD:
475+
case NEGATIVE_LOOKBEHIND:
476+
return new String[] {input};
477+
default:
478+
try (Context childCtx = new Context(ctx, name)) {
479+
return group.match(input, childCtx);
480+
}
481+
}
482+
}
483+
384484
/** {@inheritDoc} */
385485
@Override
386486
public Stream<Group> shrink() {
@@ -538,16 +638,39 @@ public Quantified(Coregex quantified, int min, int max, Type type) {
538638
}
539639

540640
@Override
541-
void generate(Context ctx) {
641+
void generate(Context ctx) throws RewindException {
542642
int quantifier = 0, min = this.min, max = this.max;
543643
for (; quantifier < min; quantifier++) {
544644
quantified.generate(ctx);
545645
}
546-
while ((-1 == max || quantifier++ < max) && 0 != ctx.rng.nextInt(4)) {
646+
for (; (-1 == max || quantifier < max) && 0 != ctx.rng.nextInt(4); quantifier++) {
547647
quantified.generate(ctx);
548648
}
549649
}
550650

651+
@Override
652+
String[] match(String input, Context ctx) {
653+
int quantifier = 0, min = this.min, max = this.max;
654+
java.util.Set<String> match = new HashSet<>(Collections.singleton(input));
655+
for (; quantifier < min && !match.isEmpty(); quantifier++) {
656+
java.util.Set<String> nextMatch = new HashSet<>(match.size());
657+
for (String tail : match) {
658+
nextMatch.addAll(Arrays.asList(quantified.match(tail, ctx)));
659+
}
660+
match = nextMatch;
661+
}
662+
for (; (-1 == max || quantifier < max) && !match.isEmpty(); quantifier++) {
663+
java.util.Set<String> nextMatch = new HashSet<>(match.size());
664+
for (String tail : match) {
665+
nextMatch.addAll(Arrays.asList(quantified.match(tail, ctx)));
666+
}
667+
if (!match.addAll(nextMatch)) {
668+
break;
669+
}
670+
}
671+
return match.toArray(new String[0]);
672+
}
673+
551674
/** {@inheritDoc} */
552675
@Override
553676
public Stream<Quantified> shrink() {
@@ -683,7 +806,13 @@ public Ref(int index) {
683806

684807
@Override
685808
void generate(Context ctx) {
686-
ctx.append(ctx.ref(ref).toString());
809+
ctx.append(ctx.ref(ref));
810+
}
811+
812+
@Override
813+
String[] match(String input, Context ctx) {
814+
String ref = ctx.ref(this.ref);
815+
return input.startsWith(ref) ? new String[] {input.substring(ref.length())} : new String[0];
687816
}
688817

689818
/** {@inheritDoc} */
@@ -737,11 +866,21 @@ public Union(Coregex first, Coregex... rest) {
737866
}
738867

739868
@Override
740-
void generate(Context ctx) {
869+
void generate(Context ctx) throws RewindException {
741870
int index = ctx.rng.nextInt(rest.length + 1);
742871
(index < rest.length ? rest[index] : first).generate(ctx);
743872
}
744873

874+
@Override
875+
String[] match(String input, Context ctx) {
876+
Collection<String> match = new HashSet<>(rest.length + 1);
877+
match.addAll(Arrays.asList(first.match(input, ctx)));
878+
for (Coregex coregex : rest) {
879+
match.addAll(Arrays.asList(coregex.match(input, ctx)));
880+
}
881+
return match.toArray(new String[0]);
882+
}
883+
745884
/** {@inheritDoc} */
746885
@Override
747886
public Stream<Union> shrink() {

0 commit comments

Comments
 (0)