@@ -407,7 +407,6 @@ public Query regexpQuery(
407407 public static Query toApproximationQuery (RegExp r ) throws IllegalArgumentException {
408408 Query result = null ;
409409 switch (r .kind ) {
410- case REGEXP_CHAR_RANGE :
411410 case REGEXP_CHAR_CLASS :
412411 result = createCharacterClassQuery (r );
413412 break ;
@@ -456,6 +455,7 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti
456455 case REGEXP_OPTIONAL :
457456 case REGEXP_INTERSECTION :
458457 case REGEXP_COMPLEMENT :
458+ case REGEXP_CHAR_RANGE :
459459 case REGEXP_ANYCHAR :
460460 case REGEXP_INTERVAL :
461461 case REGEXP_EMPTY :
@@ -502,15 +502,21 @@ private static Query createCharacterClassQuery(RegExp r) {
502502 List <Query > queries = new ArrayList <>();
503503 int maxClauseCount = 0 ;
504504 for (int i = 0 ; i < r .from .length ; i ++) {
505- maxClauseCount += r .to [i ] - r .from [i ];
506- if (maxClauseCount > MAX_CLAUSES_IN_APPROXIMATION_QUERY ) {
505+ // TODO: consider expanding this to allow for character ranges as well (need additional tests and performance eval)
506+ if (r .from [i ] == r .to [i ]) {
507+ maxClauseCount += r .to [i ] - r .from [i ];
508+ if (maxClauseCount > MAX_CLAUSES_IN_APPROXIMATION_QUERY ) {
509+ return new MatchAllDocsQuery ();
510+ }
511+ for (int j = r .from [i ]; j <= r .to [i ]; j ++) {
512+ String cs = Character .toString (j );
513+ String normalizedChar = toLowerCase (cs );
514+ queries .add (new TermQuery (new Term ("" , normalizedChar )));
515+ }
516+ } else {
517+ // immediately exit because we can't currently optimize a combination of range and classes
507518 return new MatchAllDocsQuery ();
508519 }
509- for (int j = r .from [i ]; j <= r .to [i ]; j ++) {
510- String cs = Character .toString (j );
511- String normalizedChar = toLowerCase (cs );
512- queries .add (new TermQuery (new Term ("" , normalizedChar )));
513- }
514520 }
515521 return formQuery (queries );
516522 }
0 commit comments