Skip to content

Commit f812a10

Browse files
committed
Use a TYPE-specific analyzer if re-ANALYSIS is done
1 parent efe6abf commit f812a10

File tree

2 files changed

+76
-13
lines changed

2 files changed

+76
-13
lines changed

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,13 @@ public class AnalyzerGuru {
213213

214214
private static final Map<String, String> fileTypeDescriptions = new TreeMap<>();
215215

216+
/**
217+
* Maps from {@link FileAnalyzer#getFileTypeName()} to
218+
* {@link FileAnalyzerFactory}
219+
*/
220+
private static final Map<String, FileAnalyzerFactory> FILETYPE_FACTORIES =
221+
new HashMap<>();
222+
216223
/*
217224
* If you write your own analyzer please register it here. The order is
218225
* important for any factory that uses a FileAnalyzerFactory.Matcher
@@ -338,6 +345,9 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) {
338345
}
339346
matchers.addAll(factory.getMatchers());
340347
factories.add(factory);
348+
349+
FileAnalyzer fa = factory.getAnalyzer();
350+
FILETYPE_FACTORIES.put(fa.getFileTypeName(), factory);
341351
}
342352

343353
/**
@@ -385,6 +395,17 @@ public static FileAnalyzer getAnalyzer() {
385395
return DEFAULT_ANALYZER_FACTORY.getAnalyzer();
386396
}
387397

398+
/**
399+
* Gets an analyzer for the specified {@code fileTypeName} if it accords
400+
* with a known {@link FileAnalyzer#getFileTypeName()}.
401+
* @param fileTypeName a defined name
402+
* @return a defined instance if known or otherwise {@code null}
403+
*/
404+
public static FileAnalyzer getAnalyzer(String fileTypeName) {
405+
FileAnalyzerFactory factory = FILETYPE_FACTORIES.get(fileTypeName);
406+
return factory == null ? null : factory.getAnalyzer();
407+
}
408+
388409
/**
389410
* Get an analyzer suited to analyze a file. This function will reuse
390411
* analyzers since they are costly.

src/org/opensolaris/opengrok/search/context/OGKUnifiedHighlighter.java

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
4242
import org.apache.lucene.util.BytesRef;
4343
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
44+
import org.opensolaris.opengrok.analysis.AnalyzerGuru;
4445
import org.opensolaris.opengrok.analysis.ExpandTabsReader;
4546
import org.opensolaris.opengrok.analysis.StreamSource;
4647
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
@@ -62,6 +63,8 @@ public class OGKUnifiedHighlighter extends UnifiedHighlighter {
6263

6364
private int tabSize;
6465

66+
private String fileTypeName;
67+
6568
/**
6669
* Initializes an instance with
6770
* {@link UnifiedHighlighter#UnifiedHighlighter(org.apache.lucene.search.IndexSearcher, org.apache.lucene.analysis.Analyzer)}
@@ -82,6 +85,22 @@ public OGKUnifiedHighlighter(RuntimeEnvironment env,
8285
this.env = env;
8386
}
8487

88+
/**
89+
* Gets a file type name-specific analyzer during the execution of
90+
* {@link #highlightFieldsUnion(java.lang.String[], org.apache.lucene.search.Query, int, int)},
91+
* or just gets the object passed in to the constructor at all other times.
92+
* @return a defined instance
93+
*/
94+
@Override
95+
public Analyzer getIndexAnalyzer() {
96+
String ftname = fileTypeName;
97+
if (ftname == null) {
98+
return indexAnalyzer;
99+
}
100+
Analyzer fa = AnalyzerGuru.getAnalyzer(ftname);
101+
return fa == null ? indexAnalyzer : fa;
102+
}
103+
85104
public int getTabSize() {
86105
return tabSize;
87106
}
@@ -90,6 +109,34 @@ public void setTabSize(int value) {
90109
this.tabSize = value;
91110
}
92111

112+
/**
113+
* Transiently arranges that {@link #getIndexAnalyzer()} returns a file type
114+
* name-specific analyzer during a subsequent call of
115+
* {@link #highlightFieldsUnionWork(java.lang.String[], org.apache.lucene.search.Query, int, int)}.
116+
* @param fields a defined instance
117+
* @param query a defined instance
118+
* @param docId a valid document ID
119+
* @param lineLimit the maximum number of lines to return
120+
* @return a defined instance or else {@code null} if there are no results
121+
* @throws IOException if accessing the Lucene document fails
122+
*/
123+
public String highlightFieldsUnion(String[] fields, Query query,
124+
int docId, int lineLimit) throws IOException {
125+
/**
126+
* Setting fileTypeName has to happen before getFieldHighlighter() is
127+
* called by highlightFieldsAsObjects() so that the result of
128+
* getIndexAnalyzer() (if it is called due to requiring ANALYSIS) can be
129+
* influenced by fileTypeName.
130+
*/
131+
Document doc = searcher.doc(docId);
132+
fileTypeName = doc == null ? null : doc.get(QueryBuilder.TYPE);
133+
try {
134+
return highlightFieldsUnionWork(fields, query, docId, lineLimit);
135+
} finally {
136+
fileTypeName = null;
137+
}
138+
}
139+
93140
/**
94141
* Calls
95142
* {@link #highlightFieldsAsObjects(java.lang.String[], org.apache.lucene.search.Query, int[], int[])},
@@ -99,10 +146,10 @@ public void setTabSize(int value) {
99146
* @param query a defined instance
100147
* @param docId a valid document ID
101148
* @param lineLimit the maximum number of lines to return
102-
* @return {@code null} if there are no results or a defined instance
103-
* @throws IOException
149+
* @return a defined instance or else {@code null} if there are no results
150+
* @throws IOException if accessing the Lucene document fails
104151
*/
105-
public String highlightFieldsUnion(String[] fields, Query query,
152+
protected String highlightFieldsUnionWork(String[] fields, Query query,
106153
int docId, int lineLimit) throws IOException {
107154
int[] maxPassagesCopy = new int[fields.length];
108155
/**
@@ -154,9 +201,6 @@ public String highlightFieldsUnion(String[] fields, Query query,
154201
* {@code cacheCharsThreshold} is exceeded. Specifically if that number is
155202
* 0, then only one document is fetched no matter what. Values in the array
156203
* of {@link CharSequence} will be {@code null} if no value was found."
157-
* @param fields
158-
* @param docIter
159-
* @param cacheCharsThreshold
160204
* @return a defined instance
161205
* @throws IOException if an I/O error occurs
162206
*/
@@ -214,13 +258,11 @@ protected OffsetSource getOptimizedOffsetSource(String field,
214258
* postings should be sufficient in the comment for
215259
* shouldHandleMultiTermQuery(String): "MTQ highlighting can be
216260
* expensive, particularly when using offsets in postings."
217-
* DEFS are stored with term vectors to avoid this problem.
218-
* FULL should be approximately fine with re-analysis using an
219-
* on-the-fly PlainAnalyzer.
220-
* REFS should be approximately fine with re-analysis using an
221-
* on-the-fly PlainAnalyzer. It might not accord with the true
222-
* language symbol tokenizer, but it should not be wildly
223-
* divergent.
261+
* DEFS are stored with term vectors to avoid this problem,
262+
* since re-analysis would not at all accord with ctags Definitions.
263+
* For FULL and REFS, highlightFieldsUnion() arranges that
264+
* getIndexAnalyzer() can return a TYPE-specific analyzer for use by
265+
* getOffsetStrategy() -- if re-ANALYSIS is required.
224266
*/
225267
switch (field) {
226268
case QueryBuilder.FULL:

0 commit comments

Comments
 (0)