Skip to content
Merged
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,9 @@ public Query regexpQuery(
public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException {
Query result = null;
switch (r.kind) {
case REGEXP_CHAR_CLASS:
result = createCharacterClassQuery(r);
break;
case REGEXP_UNION:
result = createUnionQuery(r);
break;
Expand All @@ -426,7 +429,6 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti
// Repeat is zero or more times so zero matches = match all
result = new MatchAllDocsQuery();
break;

case REGEXP_REPEAT_MIN:
case REGEXP_REPEAT_MINMAX:
if (r.min > 0) {
Expand Down Expand Up @@ -454,7 +456,6 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti
case REGEXP_INTERSECTION:
case REGEXP_COMPLEMENT:
case REGEXP_CHAR_RANGE:
case REGEXP_CHAR_CLASS:
case REGEXP_ANYCHAR:
case REGEXP_INTERVAL:
case REGEXP_EMPTY:
Expand Down Expand Up @@ -497,11 +498,38 @@ private static Query createConcatenationQuery(RegExp r) {

}

private static Query createCharacterClassQuery(RegExp r) {
List<Query> queries = new ArrayList<>();
int maxClauseCount = 0;
for (int i = 0; i < r.from.length; i++) {
// TODO: consider expanding this to allow for character ranges as well (need additional tests and performance eval)
if (r.from[i] == r.to[i]) {
maxClauseCount += r.to[i] - r.from[i];
if (maxClauseCount > MAX_CLAUSES_IN_APPROXIMATION_QUERY) {
return new MatchAllDocsQuery();
}
for (int j = r.from[i]; j <= r.to[i]; j++) {
String cs = Character.toString(j);
String normalizedChar = toLowerCase(cs);
queries.add(new TermQuery(new Term("", normalizedChar)));
}
} else {
// immediately exit because we can't currently optimize a combination of range and classes
return new MatchAllDocsQuery();
}
}
return formQuery(queries);
}

private static Query createUnionQuery(RegExp r) {
// Create an OR of clauses
ArrayList<Query> queries = new ArrayList<>();
List<Query> queries = new ArrayList<>();
findLeaves(r.exp1, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
findLeaves(r.exp2, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
return formQuery(queries);
}

private static Query formQuery(List<Query> queries) {
BooleanQuery.Builder bOr = new BooleanQuery.Builder();
HashSet<Query> uniqueClauses = new HashSet<>();
for (Query query : queries) {
Expand Down