Skip to content

Commit 9dfdf47

Browse files
authored
Merge pull request #174 from SimY4/topic/shrinking
New and improved shrinking.
2 parents dea4565 + e6a7254 commit 9dfdf47

File tree

11 files changed

+153
-130
lines changed

11 files changed

+153
-130
lines changed

core/src/main/java/com/github/simy4/coregex/core/Coregex.java

Lines changed: 60 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import java.util.Random;
3030
import java.util.StringJoiner;
3131
import java.util.regex.Pattern;
32+
import java.util.stream.IntStream;
33+
import java.util.stream.Stream;
3234

3335
/**
3436
* Data representation of regex language.
@@ -116,11 +118,11 @@ public static Coregex literal(String literal, int flags) {
116118
abstract void generate(Context ctx);
117119

118120
/**
119-
* Converts this coregex into one that produce "smaller" values.
121+
* Converts this coregex into ones that produce "smaller" values.
120122
*
121-
* @return smaller coregex or empty if this coregex is already the smallest possible.
123+
* @return smaller coregexes or empty if this coregex is already the smallest possible.
122124
*/
123-
public abstract Optional<Coregex> shrink();
125+
public abstract Stream<? extends Coregex> shrink();
124126

125127
/**
126128
* Samples one random string that matches this regex.
@@ -179,21 +181,22 @@ void generate(Context ctx) {
179181

180182
/** {@inheritDoc} */
181183
@Override
182-
public Optional<Coregex> shrink() {
183-
// shrink until every concatenated piece can shrink.
184-
Coregex first = this.first.shrink().orElse(null);
185-
Coregex[] rest = new Coregex[this.rest.length];
186-
for (int i = 0; i < rest.length; i++) {
187-
rest[i] = this.rest[i].shrink().orElse(null);
188-
}
189-
if (null == first && Arrays.stream(rest).allMatch(Objects::isNull)) {
190-
return Optional.empty();
191-
}
192-
if (null == first) first = this.first;
193-
for (int i = 0; i < rest.length; i++) {
194-
if (null == rest[i]) rest[i] = this.rest[i];
195-
}
196-
return Optional.of(new Concat(first, rest));
184+
public Stream<Concat> shrink() {
185+
// shrinking until every concatenated piece can shrink.
186+
return Stream.concat(
187+
first.shrink().map(chunk -> new Concat(chunk, rest)),
188+
IntStream.range(0, rest.length)
189+
.boxed()
190+
.flatMap(
191+
i ->
192+
rest[i]
193+
.shrink()
194+
.map(
195+
chunk -> {
196+
Coregex[] rest = Arrays.copyOf(this.rest, this.rest.length);
197+
rest[i] = chunk;
198+
return new Concat(first, rest);
199+
})));
197200
}
198201

199202
/**
@@ -380,16 +383,13 @@ void generate(Context ctx) {
380383

381384
/** {@inheritDoc} */
382385
@Override
383-
public Optional<Coregex> shrink() {
386+
public Stream<Group> shrink() {
384387
switch (type) {
385-
case NON_CAPTURING:
386-
case ATOMIC:
387-
return group.shrink().map(group -> new Group(type, group));
388388
case LOOKAHEAD:
389389
case LOOKBEHIND:
390390
case NEGATIVE_LOOKAHEAD:
391391
case NEGATIVE_LOOKBEHIND:
392-
return Optional.of(this);
392+
return Stream.empty();
393393
default:
394394
return group.shrink().map(group -> new Group(type, name, group));
395395
}
@@ -539,28 +539,25 @@ public Quantified(Coregex quantified, int min, int max, Type type) {
539539

540540
@Override
541541
void generate(Context ctx) {
542-
int quantifier = 0;
542+
int quantifier = 0, min = this.min, max = this.max;
543543
for (; quantifier < min; quantifier++) {
544544
quantified.generate(ctx);
545545
}
546-
int max = -1 == this.max ? Integer.MAX_VALUE : this.max;
547-
while (quantifier++ < max && 0 != ctx.rng.nextInt(4)) {
546+
while ((-1 == max || quantifier++ < max) && 0 != ctx.rng.nextInt(4)) {
548547
quantified.generate(ctx);
549548
}
550549
}
551550

552551
/** {@inheritDoc} */
553552
@Override
554-
public Optional<Coregex> shrink() {
555-
// reducing size first, only then reducing quantified piece.
556-
if (min == max) {
557-
return quantified.shrink().map(quantified -> new Quantified(quantified, min, max, type));
558-
}
559-
if (-1 == max) {
560-
return Optional.of(new Quantified(quantified, min, min + 128, type));
561-
}
562-
int max = Math.max(min, this.max / 2);
563-
return Optional.of(new Quantified(quantified, min, Math.max(min, max / 2)));
553+
public Stream<Quantified> shrink() {
554+
// reducing size and shrinking quantified piece.
555+
int min = this.min, max = this.max;
556+
return Stream.concat(
557+
IntStream.of(0, 1, 2, 3, 5, 8, 13)
558+
.filter(i -> -1 == max || min + i < max)
559+
.mapToObj(i -> new Quantified(quantified, min, min + i, type)),
560+
quantified.shrink().map(quantified -> new Quantified(quantified, min, max, type)));
564561
}
565562

566563
/**
@@ -691,8 +688,8 @@ void generate(Context ctx) {
691688

692689
/** {@inheritDoc} */
693690
@Override
694-
public Optional<Coregex> shrink() {
695-
return Optional.of(this);
691+
public Stream<Ref> shrink() {
692+
return Stream.empty();
696693
}
697694

698695
public Serializable ref() {
@@ -747,21 +744,31 @@ void generate(Context ctx) {
747744

748745
/** {@inheritDoc} */
749746
@Override
750-
public Optional<Coregex> shrink() {
751-
// shrink until every union piece can shrink.
752-
Coregex first = this.first.shrink().orElse(null);
753-
Coregex[] rest = new Coregex[this.rest.length];
754-
for (int i = 0; i < rest.length; i++) {
755-
rest[i] = this.rest[i].shrink().orElse(null);
756-
}
757-
if (null == first && Arrays.stream(rest).allMatch(Objects::isNull)) {
758-
return Optional.empty();
759-
}
760-
if (null == first) first = this.first;
761-
for (int i = 0; i < rest.length; i++) {
762-
if (null == rest[i]) rest[i] = this.rest[i];
763-
}
764-
return Optional.of(new Union(first, rest));
747+
public Stream<Union> shrink() {
748+
// reducing and shrinking choices until every union piece can shrink or ran out of choices.
749+
return Stream.concat(
750+
IntStream.range(0, rest.length)
751+
.mapToObj(
752+
i -> {
753+
Coregex[] rest = new Coregex[this.rest.length - 1];
754+
System.arraycopy(this.rest, 0, rest, 0, i);
755+
System.arraycopy(this.rest, i + 1, rest, i, this.rest.length - i - 1);
756+
return new Union(first, rest);
757+
}),
758+
Stream.concat(
759+
first.shrink().map(chunk -> new Union(chunk, rest)),
760+
IntStream.range(0, rest.length)
761+
.boxed()
762+
.flatMap(
763+
i ->
764+
rest[i]
765+
.shrink()
766+
.map(
767+
chunk -> {
768+
Coregex[] rest = Arrays.copyOf(this.rest, this.rest.length);
769+
rest[i] = chunk;
770+
return new Union(first, rest);
771+
}))));
765772
}
766773

767774
/**

core/src/main/java/com/github/simy4/coregex/core/Set.java

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@
2020

2121
import java.io.Serializable;
2222
import java.util.BitSet;
23-
import java.util.Objects;
24-
import java.util.Optional;
2523
import java.util.OptionalInt;
2624
import java.util.function.IntPredicate;
2725
import java.util.regex.Pattern;
26+
import java.util.stream.Stream;
2827

2928
/**
3029
* Data representation of a set of characters AKA regular expression's char classes.
@@ -62,9 +61,6 @@ public final class Set extends Coregex implements IntPredicate, Serializable {
6261
chars.set(Character.MIN_VALUE, Character.MIN_SURROGATE);
6362
return new Set(chars, ".");
6463
});
65-
static final Lazy<Set> SMALLER =
66-
new Lazy<>(
67-
() -> builder().range('0', '9').range('a', 'z').range('A', 'Z').single('_').build());
6864

6965
/**
7066
* Creates an instance of {@link Set} builder.
@@ -123,13 +119,21 @@ public Set negate() {
123119

124120
/** {@inheritDoc} */
125121
@Override
126-
public Optional<Coregex> shrink() {
127-
BitSet chars = BitSet.valueOf(this.chars.toLongArray());
128-
chars.and(SMALLER.get().chars);
129-
if (chars.isEmpty() || chars.equals(this.chars)) {
130-
return Optional.empty();
122+
public Stream<Set> shrink() {
123+
Stream.Builder<Set> builder = Stream.builder();
124+
Set.Builder smaller = builder().range('0', '9').intersect(this);
125+
if (!smaller.chars.isEmpty() && !smaller.chars.equals(this.chars)) {
126+
builder.add(smaller.build());
127+
}
128+
smaller = builder(0).range('0', '9').range('a', 'z').intersect(this);
129+
if (!smaller.chars.isEmpty() && !smaller.chars.equals(this.chars)) {
130+
builder.add(smaller.build());
131+
}
132+
smaller = builder(0).range('0', '9').range('a', 'z').range('A', 'Z').intersect(this);
133+
if (!smaller.chars.isEmpty() && !smaller.chars.equals(this.chars)) {
134+
builder.add(smaller.build());
131135
}
132-
return Optional.of(new Set(chars, "~" + description));
136+
return builder.build();
133137
}
134138

135139
/**
@@ -164,12 +168,12 @@ public boolean equals(Object o) {
164168
return false;
165169
}
166170
Set set = (Set) o;
167-
return chars.equals(set.chars) && description.equals(set.description);
171+
return chars.equals(set.chars);
168172
}
169173

170174
@Override
171175
public int hashCode() {
172-
return Objects.hash(chars, description);
176+
return chars.hashCode();
173177
}
174178

175179
@Override

core/src/test/scala/com/github/simy4/coregex/core/CoregexArbitraries.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616

1717
package com.github.simy4.coregex.core
1818

19-
import org.scalacheck.{ Arbitrary, Gen }
19+
import org.scalacheck.{ Arbitrary, Gen, Shrink }
2020
import org.scalacheck.Arbitrary.arbitrary
2121

2222
import java.util.regex.Pattern
2323

2424
trait CoregexArbitraries {
25+
import scala.jdk.CollectionConverters._
26+
2527
type Flags <: Int
2628
implicit val arbitraryFlags: Arbitrary[Flags] = Arbitrary(genFlags)
2729
def genFlags: Gen[Flags] =
@@ -96,4 +98,8 @@ trait CoregexArbitraries {
9698
size <- Gen.size
9799
rest <- Gen.listOfN(size % 10, Gen.resize(size / 4, genCoregex(charGen)))
98100
} yield new Coregex.Union(first, rest: _*)
101+
102+
implicit def coregexShrink[C <: Coregex]: Shrink[C] = Shrink.withLazyList { larger =>
103+
LazyList.from(larger.shrink().iterator().asScala).asInstanceOf[LazyList[C]]
104+
}
99105
}

core/src/test/scala/com/github/simy4/coregex/core/CoregexSuite.scala

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ import munit.ScalaCheckSuite
2020
import org.scalacheck.Prop._
2121

2222
class CoregexSuite extends ScalaCheckSuite with CoregexArbitraries {
23+
import Coregex._
24+
25+
import scala.jdk.CollectionConverters._
26+
2327
property("quantified zero times should give empty") {
24-
forAll { (coregex: Coregex, `type`: Coregex.Quantified.Type, seed: Long) =>
28+
forAll { (coregex: Coregex, `type`: Quantified.Type, seed: Long) =>
2529
coregex.quantify(0, 0, `type`).generate(seed).isEmpty
2630
}
2731
}
@@ -44,33 +48,33 @@ class CoregexSuite extends ScalaCheckSuite with CoregexArbitraries {
4448
// region Concat
4549
property("concat with empty should be identity") {
4650
forAll { (coregex: Coregex, seed: Long) =>
47-
val concat1 = new Coregex.Concat(coregex, Coregex.empty())
48-
val concat2 = new Coregex.Concat(Coregex.empty(), coregex)
51+
val concat1 = new Concat(coregex, Coregex.empty())
52+
val concat2 = new Concat(Coregex.empty(), coregex)
4953
(coregex.generate(seed) ?= concat1.generate(seed)) && (coregex.generate(seed) ?= concat2.generate(seed))
5054
}
5155
}
5256

5357
property("concat is associative") {
5458
forAll { (fst: Coregex, snd: Coregex, trd: Coregex, seed: Long) =>
55-
val left = new Coregex.Concat(new Coregex.Concat(fst, snd), trd).generate(seed)
56-
val right = new Coregex.Concat(fst, new Coregex.Concat(snd, trd)).generate(seed)
59+
val left = new Concat(new Concat(fst, snd), trd).generate(seed)
60+
val right = new Concat(fst, new Concat(snd, trd)).generate(seed)
5761
(left ?= right) :| s"$left == $right"
5862
}
5963
}
6064
// endregion
6165

6266
// region Ref
6367
property("generated ref by name should return group second time") {
64-
forAll { (group: Coregex, ref: Coregex.Ref, seed: Long) =>
65-
val re = new Coregex.Concat(new Coregex.Group(ref.ref().toString, group), ref).generate(seed)
68+
forAll { (group: Coregex, ref: Ref, seed: Long) =>
69+
val re = new Concat(new Group(ref.ref().toString, group), ref).generate(seed)
6670
val generated = group.generate(seed)
6771
(generated + generated ?= re) :| s"$generated$generated == $re"
6872
}
6973
}
7074

7175
property("generated ref by index should return group second time") {
7276
forAll { (group: Coregex, seed: Long) =>
73-
val re = new Coregex.Concat(new Coregex.Group(group), new Coregex.Ref(1)).generate(seed)
77+
val re = new Concat(new Group(group), new Ref(1)).generate(seed)
7478
val generated = group.generate(seed)
7579
(generated + generated ?= re) :| s"$generated$generated == $re"
7680
}
@@ -80,10 +84,26 @@ class CoregexSuite extends ScalaCheckSuite with CoregexArbitraries {
8084
// region Union
8185
property("generated should be in union") {
8286
forAll { (fst: String, snd: String, trd: String, seed: Long) =>
83-
val generated =
84-
new Coregex.Union(Coregex.literal(fst, 0), Coregex.literal(snd, 0), Coregex.literal(trd, 0)).generate(seed)
87+
val generated = new Union(literal(fst, 0), literal(snd, 0), literal(trd, 0)).generate(seed)
8588
((generated ?= fst) || (generated ?= snd) || (generated ?= trd)) :| s"$generated in ($fst|$snd|$trd)"
8689
}
8790
}
91+
92+
property("shrink should eliminate options") {
93+
forAll { (fst: Byte, snd: Byte) =>
94+
val union = new Union(literal(fst.toString, 0), literal(snd.toString, 0))
95+
96+
union
97+
.shrink()
98+
.iterator()
99+
.asScala
100+
.map { shrunk =>
101+
(shrunk == new Union(literal(fst.toString, 0)) || shrunk == new Union(
102+
literal(snd.toString, 0)
103+
)) :| s"$union was not shrunk to segments: $shrunk"
104+
}
105+
.reduce(_ && _)
106+
}
107+
}
88108
// endregion
89109
}

0 commit comments

Comments
 (0)