Skip to content

Commit 1799981

Browse files
committed
use custom PassageScorer for OGKUnifiedHighlighter
fixes #4357
1 parent 0ae60b5 commit 1799981

File tree

6 files changed

+601
-5
lines changed

6 files changed

+601
-5
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextArgs.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public short getContextSurround() {
7878
* is displayed to allow the user to view full match results.
7979
* <p>
8080
* (N.b. the value is used with Lucene {@code uhighlight}, and {@code short}
81-
* is safer, though syntactically inconvenient, to avoid numeric overlow
81+
* is safer, though syntactically inconvenient, to avoid numeric overflow
8282
* that may occur with {@code int} in that library.)
8383
* @return a positive value
8484
*/

opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/OGKUnifiedHighlighter.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*
17+
*/
18+
/*
19+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
1720
* Portions Copyright (c) 2018, Chris Fraire <[email protected]>.
1821
*/
1922

@@ -37,6 +40,7 @@
3740
import org.apache.lucene.index.StoredFields;
3841
import org.apache.lucene.search.DocIdSetIterator;
3942
import org.apache.lucene.search.Query;
43+
import org.apache.lucene.search.uhighlight.PassageScorer;
4044
import org.apache.lucene.search.uhighlight.UHComponents;
4145
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
4246
import org.apache.lucene.util.BytesRef;
@@ -324,4 +328,9 @@ private Reader getReader(InputStream in) throws IOException {
324328
BufferedReader bufrdr = new BufferedReader(bsrdr);
325329
return ExpandTabsReader.wrap(bufrdr, tabSize);
326330
}
331+
332+
@Override
333+
protected PassageScorer getScorer(String field) {
334+
return new OGPassageScorer();
335+
}
327336
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.search.context;
24+
25+
import org.apache.lucene.search.uhighlight.Passage;
26+
import org.apache.lucene.search.uhighlight.PassageScorer;
27+
import org.opengrok.indexer.logger.LoggerFactory;
28+
29+
import java.util.logging.Level;
30+
import java.util.logging.Logger;
31+
32+
/**
33+
* Custom {@link PassageScorer} used in {@link OGKUnifiedHighlighter}.
34+
* The goal is to have ordering of passages based strictly on their start offsets.
35+
*/
36+
public class OGPassageScorer extends PassageScorer {
37+
38+
private static final Logger LOGGER = LoggerFactory.getLogger(OGPassageScorer.class);
39+
40+
public OGPassageScorer() {
41+
// Use non-default values so that the scorer object is easier to identify when debugging.
42+
super(1, 2, 3);
43+
}
44+
45+
@Override
46+
public float score(Passage passage, int contentLength) {
47+
LOGGER.log(Level.FINEST, "{0} -> {1}", new Object[]{passage, passage.getStartOffset()});
48+
return -passage.getStartOffset();
49+
}
50+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.search.context;
24+
25+
import org.junit.jupiter.api.AfterAll;
26+
import org.junit.jupiter.api.BeforeAll;
27+
import org.junit.jupiter.api.Test;
28+
import org.opengrok.indexer.configuration.RuntimeEnvironment;
29+
import org.opengrok.indexer.history.HistoryGuru;
30+
import org.opengrok.indexer.history.RepositoryFactory;
31+
import org.opengrok.indexer.index.Indexer;
32+
import org.opengrok.indexer.search.SearchEngine;
33+
import org.opengrok.indexer.util.TestRepository;
34+
import org.w3c.dom.Document;
35+
import org.w3c.dom.NodeList;
36+
37+
import javax.xml.parsers.DocumentBuilder;
38+
import javax.xml.parsers.DocumentBuilderFactory;
39+
import java.io.ByteArrayInputStream;
40+
import java.nio.file.Paths;
41+
import java.util.Arrays;
42+
43+
import static org.junit.jupiter.api.Assertions.assertEquals;
44+
import static org.junit.jupiter.api.Assertions.assertNotNull;
45+
import static org.junit.jupiter.api.Assertions.assertTrue;
46+
import static org.opengrok.indexer.search.context.SearchAndContextFormatterTest.getFirstFragments;
47+
48+
/**
49+
* Make sure that passages within search results are ordered strictly based on the line numbers.
50+
*/
51+
public class PassageScorerTest {
52+
private static RuntimeEnvironment env;
53+
private static TestRepository repository;
54+
55+
@BeforeAll
56+
public static void setUpClass() throws Exception {
57+
repository = new TestRepository();
58+
repository.create(HistoryGuru.class.getResource("/sources"));
59+
60+
env = RuntimeEnvironment.getInstance();
61+
env.setCtags(System.getProperty("org.opengrok.indexer.analysis.Ctags", "ctags"));
62+
env.setSourceRoot(repository.getSourceRoot());
63+
env.setDataRoot(repository.getDataRoot());
64+
RepositoryFactory.initializeIgnoredNames(env);
65+
env.setHistoryEnabled(false);
66+
67+
assertTrue(Paths.get(env.getSourceRootPath(), "c", "sdt.h").toFile().exists());
68+
69+
Indexer.getInstance().doIndexerExecution(null, null);
70+
}
71+
72+
@AfterAll
73+
public static void tearDownClass() {
74+
repository.destroy();
75+
}
76+
77+
@Test
78+
void testSearch() throws Exception {
79+
SearchEngine instance = new SearchEngine();
80+
instance.setFreetext("DTRACE_PROBE4");
81+
instance.setFile("sdt");
82+
int noHits = instance.search();
83+
assertTrue(noHits > 0, "noHits should be positive");
84+
String[] frags = getFirstFragments(instance, env, new ContextArgs((short) 0, (short) 10));
85+
assertNotNull(frags, "getFirstFragments() should return something");
86+
assertEquals(1, frags.length, "frags should have one element");
87+
88+
// Create XML from the result and parse it, get the line numbers, compare.
89+
String docString = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
90+
"<document>\n" +
91+
frags[0] +
92+
"\n</document>";
93+
94+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
95+
assertNotNull(factory, "DocumentBuilderFactory is null");
96+
97+
DocumentBuilder builder = factory.newDocumentBuilder();
98+
assertNotNull(builder, "DocumentBuilder is null");
99+
100+
final int[] lineNumbers = {58, 97, 155, 172, 189, 200, 232, 264, 297, 324};
101+
102+
Document document = builder.parse(new ByteArrayInputStream(docString.getBytes()));
103+
NodeList nl = document.getElementsByTagName("a");
104+
assertEquals(lineNumbers.length, nl.getLength());
105+
int[] lines = new int[nl.getLength()];
106+
for (int i = 0; i < nl.getLength(); i++) {
107+
String href = nl.item(i).getAttributes().getNamedItem("href").getNodeValue();
108+
assertNotNull(href);
109+
String lineNumStr = href.substring(href.indexOf("#") + 1);
110+
assertNotNull(lineNumStr);
111+
int lineNum = Integer.parseInt(lineNumStr);
112+
lines[i] = lineNum;
113+
}
114+
assertEquals(0, Arrays.compare(lineNumbers, lines));
115+
116+
instance.destroy();
117+
}
118+
}

opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ void testSearch() throws IOException {
100100
instance.setFile("bkexlib.cpp");
101101
int noHits = instance.search();
102102
assertTrue(noHits > 0, "noHits should be positive");
103-
String[] frags = getFirstFragments(instance);
103+
String[] frags = getFirstFragments(instance, env, new ContextArgs((short) 1, (short) 10));
104104
assertNotNull(frags, "getFirstFragments() should return something");
105105
assertEquals(1, frags.length, "frags should have one element");
106106

@@ -113,9 +113,8 @@ void testSearch() throws IOException {
113113
instance.destroy();
114114
}
115115

116-
private String[] getFirstFragments(SearchEngine instance) throws IOException {
117-
118-
ContextArgs args = new ContextArgs((short) 1, (short) 10);
116+
static String[] getFirstFragments(SearchEngine instance, RuntimeEnvironment env, ContextArgs args)
117+
throws IOException {
119118

120119
/*
121120
* The following `anz' should go unused, but UnifiedHighlighter demands

0 commit comments

Comments
 (0)