Skip to content

Commit 7f6ffcf

Browse files
criminosisli-boxuan
authored andcommitted
Fixes #4165
Closes #4164 Added new test document and updated assertions Use tokenizer that mimics Solr's standardized tokenizer Geo predicate tweaking Implemented TextContainsPhrase for Solr Signed-off-by: Allan Clements <[email protected]>
1 parent 5366ccc commit 7f6ffcf

File tree

4 files changed

+91
-31
lines changed

4 files changed

+91
-31
lines changed

janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,16 @@ private void storeTest(String... stores) throws Exception {
228228
true);
229229
final Multimap<String, Object> doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Geoshape.box(46.9, 9.9, 47.1, 10.1), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3),
230230
false);
231+
final Multimap<String, Object> doc4 = getDocument("foo.com bar/test", -1001, 2, Geoshape.point(0, 0.0), Geoshape.box(46.6, 0, 46.9, 0.1), Arrays.asList("10", "11", "12"), Sets.newHashSet("9", "10"), Instant.ofEpochSecond(0),
232+
false);
231233

232234
for (final String store : stores) {
233235
initialize(store);
234236

235237
add(store, "doc1", doc1, true);
236238
add(store, "doc2", doc2, true);
237-
add(store, "doc3", doc3, false);
239+
add(store, "doc3", doc3, true);
240+
add(store, "doc4", doc4, false);
238241

239242
}
240243

@@ -262,23 +265,24 @@ private void storeTest(String... stores) throws Exception {
262265
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "worl"))).count());
263266
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "Tomorrow world"))).count());
264267
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "WorLD HELLO"))).count());
268+
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "foo.com"))).count());
265269
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS_FUZZY, "boby"))).count());
266270

267-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
271+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
268272
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "z"))).count());
269273
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "world"))).count());
270274

271-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
275+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
272276
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "z"))).count());
273277
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "world"))).count());
274278

275279
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "A"))).count());
276-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
277-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());
280+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
281+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());
278282

279283
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "A"))).count());
280-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
281-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());
284+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
285+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());
282286

283287
//Ordering
284288
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "world"), orderTimeDesc))
@@ -357,25 +361,25 @@ private void storeTest(String... stores) throws Exception {
357361
//String
358362
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "Tomorrow is the world"))).count());
359363
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "world"))).count());
360-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
364+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
361365
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "Tomorrow"))).count());
362366
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "wor"))).count());
363367
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.FUZZY, "Tomorow is the world"))).count());
364368

365-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
369+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
366370
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "z"))).count());
367-
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());
371+
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());
368372

369-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
373+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
370374
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "z"))).count());
371-
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());
375+
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());
372376

373377
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "A"))).count());
374-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
378+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
375379
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "Hello world"))).count());
376380

377381
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "A"))).count());
378-
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
382+
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
379383
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "Hello world"))).count());
380384

381385
try {
@@ -413,8 +417,7 @@ private void storeTest(String... stores) throws Exception {
413417
assertEquals(2, result.size());
414418

415419
result = tx.queryStream(new IndexQuery(store, Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world")))).collect(Collectors.toList());
416-
assertEquals(1, result.size());
417-
assertEquals("doc3", result.get(0));
420+
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));
418421

419422
result = tx.queryStream(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.EQUAL, -500), Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world"))))).collect(Collectors.toList());
420423
assertEquals(1, result.size());
@@ -449,8 +452,8 @@ private void storeTest(String... stores) throws Exception {
449452
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));
450453

451454
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))).collect(Collectors.toList());
452-
assertEquals(3,result.size());
453-
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result));
455+
assertEquals(4, result.size());
456+
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3", "doc4"), ImmutableSet.copyOf(result));
454457

455458
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
456459
assertEquals(2, result.size());
@@ -471,8 +474,8 @@ private void storeTest(String... stores) throws Exception {
471474

472475
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.polygon(Arrays.asList(new double[][]
473476
{{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))).collect(Collectors.toList());
474-
assertEquals(1, result.size());
475-
assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result));
477+
assertEquals(2, result.size());
478+
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));
476479
}
477480

478481
if (indexFeatures.supportsGeoContains()) {
@@ -486,8 +489,8 @@ private void storeTest(String... stores) throws Exception {
486489
assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result));
487490

488491
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
489-
assertEquals(2, result.size());
490-
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));
492+
assertEquals(3, result.size());
493+
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), ImmutableSet.copyOf(result));
491494

492495
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.polygon(Arrays.asList(new double[][] {{-1.0,48.0},{2.0,48.0},{2.0,49.0},{-1.0,49.0},{-1.0,48.0}}))))).collect(Collectors.toList());
493496
assertEquals(2, result.size());
@@ -516,13 +519,13 @@ private void storeTest(String... stores) throws Exception {
516519
assertEquals(2, tx.queryStream(new RawQuery(store,"text:\"world\"",NO_PARAS)).count());
517520
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO 1020]",NO_PARAS)).count());
518521
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO *]",NO_PARAS)).count());
519-
assertEquals(3, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
522+
assertEquals(4, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
520523
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:[5.1 TO 8.3]",NO_PARAS)).count());
521524
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:5.2",NO_PARAS)).count());
522525
assertEquals(1, tx.queryStream(new RawQuery(store,"text:world AND time:1001",NO_PARAS)).count());
523526
assertEquals(1, tx.queryStream(new RawQuery(store,"name:\"Hello world\"",NO_PARAS)).count());
524527
assertEquals(1, tx.queryStream(new RawQuery(store, "boolean:true", NO_PARAS)).count());
525-
assertEquals(2, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
528+
assertEquals(3, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
526529
assertEquals(2, tx.queryStream(new RawQuery(store, "date:{1970-01-01T00:00:01Z TO 1970-01-01T00:00:03Z]", NO_PARAS)).count());
527530
assertEquals(3, tx.queryStream(new RawQuery(store, "date:[1970-01-01T00:00:01Z TO *]", NO_PARAS)).count());
528531
assertEquals(1, tx.queryStream(new RawQuery(store, "date:\"1970-01-01T00:00:02Z\"", NO_PARAS)).count());
@@ -558,9 +561,9 @@ private void storeTest(String... stores) throws Exception {
558561
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.EQUAL, Instant.ofEpochSecond(3)))).findFirst().get());
559562
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN, Instant.ofEpochSecond(2)))).findFirst().get());
560563
assertEquals(ImmutableSet.of("doc2", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
561-
assertEquals(ImmutableSet.of("doc1"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
562-
assertEquals(ImmutableSet.of("doc1", "doc2"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
563-
assertEquals(ImmutableSet.of("doc1", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
564+
assertEquals(ImmutableSet.of("doc1", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
565+
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
566+
assertEquals(ImmutableSet.of("doc1", "doc3", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
564567

565568

566569
//Update some data

janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
import org.apache.http.impl.auth.KerberosScheme;
2626
import org.apache.lucene.analysis.Analyzer;
2727
import org.apache.lucene.analysis.CachingTokenFilter;
28+
import org.apache.lucene.analysis.TokenStream;
2829
import org.apache.lucene.analysis.Tokenizer;
30+
import org.apache.lucene.analysis.standard.StandardTokenizer;
31+
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
2932
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
3033
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
3134
import org.apache.solr.client.solrj.SolrClient;
@@ -95,6 +98,7 @@
9598
import org.slf4j.LoggerFactory;
9699

97100
import java.io.IOException;
101+
import java.io.StringReader;
98102
import java.io.UncheckedIOException;
99103
import java.lang.reflect.Constructor;
100104
import java.text.DateFormat;
@@ -862,7 +866,8 @@ public String buildQueryFilter(Condition<JanusGraphElement> condition, KeyInform
862866
return tokenize(ParameterType.TEXT_ANALYZER, information, value, key, predicate);
863867
} else if (predicate == Text.PREFIX || predicate == Text.CONTAINS_PREFIX
864868
|| predicate == Text.REGEX || predicate == Text.CONTAINS_REGEX
865-
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) {
869+
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY
870+
|| predicate == Text.CONTAINS_PHRASE) {
866871
return buildQueryFilterStringValue(key, (String) value, predicate, information);
867872
} else if (predicate == Cmp.LESS_THAN || predicate == Cmp.LESS_THAN_EQUAL
868873
|| predicate == Cmp.GREATER_THAN || predicate == Cmp.GREATER_THAN_EQUAL) {
@@ -991,6 +996,8 @@ public String buildQueryFilterStringValue(String key, String value, JanusGraphPr
991996
return (stringKey + ":" + escapeValue(value) + "*");
992997
} else if (predicate == Text.CONTAINS_PREFIX) {
993998
return (key + ":" + escapeValue(value) + "*");
999+
} else if (predicate == Text.CONTAINS_PHRASE) {
1000+
return (key + ":\"" + escapeValue(value) + "\"");
9941001
} else if (predicate == Text.REGEX) {
9951002
return (stringKey + ":/" + value + "/");
9961003
} else if (predicate == Text.CONTAINS_REGEX) {
@@ -1027,7 +1034,8 @@ private String tokenize(ParameterType parameterType, KeyInformation.StoreRetriev
10271034
if (analyzer != null) {
10281035
terms = customTokenize(analyzer, key, (String) value);
10291036
} else if (parameterType == ParameterType.TEXT_ANALYZER) {
1030-
terms = Text.tokenize((String) value);
1037+
//If a custom tokenizer was not specified, assume the standard one as defined in the default Solr Configset
1038+
terms = standardTokenizer((String) value);
10311039
} else {
10321040
return buildQueryFilterStringValue(key, (String) value, janusgraphPredicate, information);
10331041
}
@@ -1162,13 +1170,15 @@ public boolean supports(KeyInformation information, JanusGraphPredicate predicat
11621170
case DEFAULT:
11631171
case TEXT:
11641172
return predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
1165-
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
1173+
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
1174+
|| predicate == Text.CONTAINS_PHRASE;
11661175
case STRING:
11671176
return predicate instanceof Cmp || predicate==Text.REGEX || predicate==Text.PREFIX || predicate == Text.FUZZY;
11681177
case TEXTSTRING:
11691178
return predicate instanceof Cmp || predicate == Text.REGEX || predicate == Text.PREFIX || predicate == Text.FUZZY
11701179
|| predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
1171-
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
1180+
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
1181+
|| predicate == Text.CONTAINS_PHRASE;
11721182
}
11731183
} else if (dataType == Date.class || dataType == Instant.class) {
11741184
return predicate instanceof Cmp;
@@ -1267,6 +1277,20 @@ public boolean exists() throws BackendException {
12671277
/*
12681278
################# UTILITY METHODS #######################
12691279
*/
1280+
static List<String> standardTokenizer(String text) {
1281+
List<String> result = new ArrayList<>();
1282+
try (Tokenizer tokenizer = new StandardTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY)) {
1283+
tokenizer.setReader(new StringReader(text));
1284+
CharTermAttribute attr = tokenizer.addAttribute(CharTermAttribute.class);
1285+
tokenizer.reset();
1286+
while (tokenizer.incrementToken()) {
1287+
result.add(attr.toString());
1288+
}
1289+
return result;
1290+
} catch (IOException e) {
1291+
throw new UncheckedIOException(e);
1292+
}
1293+
}
12701294

12711295
static Optional<String> getDualFieldName(String fieldKey, KeyInformation ki) {
12721296
if (AttributeUtils.isString(ki.getDataType()) && Mapping.getMapping(ki) == Mapping.TEXTSTRING) {

0 commit comments

Comments
 (0)