Skip to content

Commit a8c3d34

Browse files
Lubos KoscoLubos Kosco
authored andcommitted
cleanup spellchecker code
use Direct spellchecker to avoid another index creation closes #637
1 parent 0595269 commit a8c3d34

File tree

4 files changed

+95
-116
lines changed

4 files changed

+95
-116
lines changed

src/org/opensolaris/opengrok/index/IndexDatabase.java

Lines changed: 7 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@
5151
import org.apache.lucene.search.IndexSearcher;
5252
import org.apache.lucene.search.Query;
5353
import org.apache.lucene.search.TopDocs;
54-
import org.apache.lucene.search.spell.LuceneDictionary;
55-
import org.apache.lucene.search.spell.SpellChecker;
5654
import org.apache.lucene.store.FSDirectory;
5755
import org.apache.lucene.store.LockFactory;
5856
import org.apache.lucene.store.NoLockFactory;
@@ -82,8 +80,7 @@
8280
public class IndexDatabase {
8381

8482
private Project project;
85-
private FSDirectory indexDirectory;
86-
private FSDirectory spellDirectory;
83+
private FSDirectory indexDirectory;
8784
private IndexWriter writer;
8885
private TermsEnum uidIter;
8986
private IgnoredNames ignoredNames;
@@ -241,31 +238,22 @@ public void run() {
241238
private void initialize() throws IOException {
242239
synchronized (this) {
243240
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
244-
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
245-
File spellDir = new File(env.getDataRootFile(), "spellIndex");
241+
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
246242
if (project != null) {
247-
indexDir = new File(indexDir, project.getPath());
248-
spellDir = new File(spellDir, project.getPath());
243+
indexDir = new File(indexDir, project.getPath());
249244
}
250245

251246
if (!indexDir.exists() && !indexDir.mkdirs()) {
252247
// to avoid race conditions, just recheck..
253248
if (!indexDir.exists()) {
254249
throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
255250
}
256-
}
257-
258-
if (!spellDir.exists() && !spellDir.mkdirs()) {
259-
if (!spellDir.exists()) {
260-
throw new FileNotFoundException("Failed to create root directory [" + spellDir.getAbsolutePath() + "]");
261-
}
262-
}
251+
}
263252

264253
if (!env.isUsingLuceneLocking()) {
265254
lockfact = NoLockFactory.getNoLockFactory();
266255
}
267-
indexDirectory = FSDirectory.open(indexDir, lockfact);
268-
spellDirectory = FSDirectory.open(spellDir, lockfact);
256+
indexDirectory = FSDirectory.open(indexDir, lockfact);
269257
ignoredNames = env.getIgnoredNames();
270258
includedNames = env.getIncludedNames();
271259
analyzerGuru = new AnalyzerGuru();
@@ -426,8 +414,7 @@ public void update() throws IOException, HistoryException {
426414
if (!isInterrupted() && isDirty()) {
427415
if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
428416
optimize();
429-
}
430-
createSpellingSuggestions();
417+
}
431418
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
432419
File timestamp = new File(env.getDataRootFile(), "timestamp");
433420
if (timestamp.exists()) {
@@ -518,40 +505,7 @@ public void optimize() {
518505
}
519506
}
520507
}
521-
522-
/**
523-
* Generate a spelling suggestion for the definitions stored in defs
524-
*/
525-
public void createSpellingSuggestions() {
526-
IndexReader indexReader = null;
527-
SpellChecker checker;
528-
529-
try {
530-
log.info("Generating spelling suggestion index ... ");
531-
indexReader = DirectoryReader.open(indexDirectory);
532-
checker = new SpellChecker(spellDirectory);
533-
//TODO below seems only to index "defs" , possible bug ?
534-
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
535-
IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
536-
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
537-
checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false);
538-
log.info("done");
539-
} catch (IOException e) {
540-
log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
541-
} finally {
542-
if (indexReader != null) {
543-
try {
544-
indexReader.close();
545-
} catch (IOException e) {
546-
log.log(Level.WARNING, "An error occured while closing reader", e);
547-
}
548-
}
549-
if (spellDirectory != null) {
550-
spellDirectory.close();
551-
}
552-
}
553-
}
554-
508+
555509
private boolean isDirty() {
556510
synchronized (lock) {
557511
return dirty;

src/org/opensolaris/opengrok/search/Summarizer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ private Token[] getTokens(String text) throws IOException {
285285
//FIXME somehow integrate below cycle to getSummary to save the cloning and memory,
286286
//also creating Tokens is suboptimal with 3.0.0 , this whole class could be replaced by highlighter
287287
ArrayList<Token> result = new ArrayList<Token>();
288-
TokenStream ts = analyzer.tokenStream("full", new StringReader(text));
288+
TokenStream ts = analyzer.tokenStream("full", text);
289289
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
290290
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
291291
while(ts.incrementToken()) {
@@ -297,7 +297,7 @@ private Token[] getTokens(String text) throws IOException {
297297

298298

299299
/**
300-
* Get the terms from a query and adds them to hightlite
300+
* Get the terms from a query and adds them to highlight
301301
* a stream of tokens
302302
*
303303
* @param query

src/org/opensolaris/opengrok/web/SearchHelper.java

Lines changed: 74 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.io.FileNotFoundException;
2828
import java.io.IOException;
2929
import java.util.ArrayList;
30-
import java.util.Arrays;
3130
import java.util.List;
3231
import java.util.Map;
3332
import java.util.Set;
@@ -42,9 +41,12 @@
4241
import org.apache.lucene.index.DirectoryReader;
4342
import org.apache.lucene.index.IndexReader;
4443
import org.apache.lucene.index.MultiReader;
44+
import org.apache.lucene.index.Term;
4545
import org.apache.lucene.queryparser.classic.ParseException;
4646
import org.apache.lucene.search.*;
47-
import org.apache.lucene.search.spell.SpellChecker;
47+
import org.apache.lucene.search.spell.DirectSpellChecker;
48+
import org.apache.lucene.search.spell.SuggestMode;
49+
import org.apache.lucene.search.spell.SuggestWord;
4850
import org.apache.lucene.store.FSDirectory;
4951
import org.opensolaris.opengrok.OpenGrokLogger;
5052
import org.opensolaris.opengrok.analysis.CompatibleAnalyser;
@@ -64,6 +66,10 @@
6466
*/
6567
public class SearchHelper {
6668

69+
/**
70+
* max number of words to suggest for spellcheck
71+
*/
72+
public int SPELLCHECK_SUGGEST_WORD_COUNT=5;
6773
/**
6874
* opengrok's data root: used to find the search index file
6975
*/
@@ -128,7 +134,7 @@ public class SearchHelper {
128134
* the searcher used to open/search the index. Automatically set via
129135
* {@link #prepareExec(SortedSet)}.
130136
*/
131-
public IndexSearcher searcher;
137+
public IndexSearcher searcher;
132138
/**
133139
* list of docs which result from the executing the query
134140
*/
@@ -147,6 +153,10 @@ public class SearchHelper {
147153
* {@link #prepareExec(SortedSet)}.
148154
*/
149155
protected Sort sort;
156+
/**
157+
* the spellchecker object
158+
*/
159+
protected DirectSpellChecker checker;
150160
/**
151161
* projects to use to setup indexer searchers. Usually setup via
152162
* {@link #prepareExec(SortedSet)}.
@@ -211,7 +221,8 @@ public class SearchHelper {
211221
public static Set<Map.Entry<String, String>> getFileTypeDescirptions() {
212222
return fileTypeDescription.entrySet();
213223
}
214-
224+
225+
File indexDir;
215226
/**
216227
* Create the searcher to use wrt. to currently set parameters and the given
217228
* projects. Does not produce any {@link #redirect} link. It also does
@@ -235,13 +246,13 @@ public SearchHelper prepareExec(SortedSet<String> projects) {
235246
}
236247
// the Query created by the QueryBuilder
237248
try {
249+
indexDir=new File(dataRoot, "index");
238250
query = builder.build();
239251
if (projects == null) {
240252
errorMsg = "No project selected!";
241253
return this;
242254
}
243-
this.projects = projects;
244-
File indexDir = new File(dataRoot, "index");
255+
this.projects = projects;
245256
if (projects.isEmpty()) {
246257
//no project setup
247258
FSDirectory dir = FSDirectory.open(indexDir);
@@ -285,6 +296,7 @@ public SearchHelper prepareExec(SortedSet<String> projects) {
285296
sort = Sort.RELEVANCE;
286297
break;
287298
}
299+
checker=new DirectSpellChecker();
288300
} catch (ParseException e) {
289301
errorMsg = PARSE_ERROR_MSG + e.getMessage();
290302
} catch (FileNotFoundException e) {
@@ -357,17 +369,20 @@ public SearchHelper executeQuery() {
357369
}
358370
private static final Pattern TABSPACE = Pattern.compile("[\t ]+");
359371

360-
private static void getSuggestion(String term, SpellChecker checker,
372+
private void getSuggestion(Term term, IndexReader ir,
361373
List<String> result) throws IOException {
362374
if (term == null) {
363375
return;
364376
}
365-
String[] toks = TABSPACE.split(term, 0);
377+
String[] toks = TABSPACE.split(term.text(), 0);
366378
for (int j = 0; j < toks.length; j++) {
367-
if (toks[j].length() <= 3) {
368-
continue;
369-
}
370-
result.addAll(Arrays.asList(checker.suggestSimilar(toks[j].toLowerCase(), 5)));
379+
//TODO below seems to be case insensitive ... for refs/defs this is bad
380+
SuggestWord[] words=checker.suggestSimilar(
381+
new Term(term.field(),toks[j]), SPELLCHECK_SUGGEST_WORD_COUNT, ir,
382+
SuggestMode.SUGGEST_ALWAYS);
383+
for (SuggestWord w: words) {
384+
result.add(w.string);
385+
}
371386
}
372387
}
373388

@@ -379,74 +394,78 @@ private static void getSuggestion(String term, SpellChecker checker,
379394
* <li>{@link #projects}</li> <li>{@link #dataRoot}</li>
380395
* <li>{@link #builder}</li> </ul>
381396
*
382-
* @return a possible empty list of sugeestions.
397+
* @return a possible empty list of suggestions.
383398
*/
384399
public List<Suggestion> getSuggestions() {
385400
if (projects == null) {
386-
return new ArrayList<Suggestion>(0);
401+
return new ArrayList<>(0);
387402
}
388-
File[] spellIndex = null;
403+
String name[];
389404
if (projects.isEmpty()) {
390-
spellIndex = new File[]{new File(dataRoot, "spellIndex")};
405+
name=new String[]{"/"};
391406
} else if (projects.size() == 1) {
392-
spellIndex = new File[]{
393-
new File(dataRoot, "spellIndex/" + projects.first())
394-
};
407+
name=new String[]{projects.first()};
395408
} else {
396-
spellIndex = new File[projects.size()];
397-
int ii = 0;
398-
File indexDir = new File(dataRoot, "spellIndex");
409+
name = new String[projects.size()];
410+
int ii = 0;
399411
for (String proj : projects) {
400-
spellIndex[ii++] = new File(indexDir, proj);
412+
name[ii++] = proj;
401413
}
402414
}
403-
List<Suggestion> res = new ArrayList<Suggestion>();
404-
List<String> dummy = new ArrayList<String>();
405-
for (int idx = 0; idx < spellIndex.length; idx++) {
406-
if (!spellIndex[idx].exists()) {
407-
continue;
408-
}
409-
FSDirectory spellDirectory = null;
410-
SpellChecker checker = null;
411-
Suggestion s = new Suggestion(spellIndex[idx].getName());
415+
List<Suggestion> res = new ArrayList<>();
416+
List<String> dummy = new ArrayList<>();
417+
FSDirectory dir;
418+
IndexReader ir=null;
419+
Term t;
420+
for (int idx = 0; idx < name.length; idx++) {
421+
Suggestion s = new Suggestion(name[idx]);
412422
try {
413-
spellDirectory = FSDirectory.open(spellIndex[idx]);
414-
checker = new SpellChecker(spellDirectory);
415-
getSuggestion(builder.getFreetext(), checker, dummy);
423+
dir = FSDirectory.open(new File(indexDir, name[idx]));
424+
ir = DirectoryReader.open(dir);
425+
if (builder.getFreetext()!=null &&
426+
!builder.getFreetext().isEmpty()) {
427+
t=new Term(QueryBuilder.FULL,builder.getFreetext());
428+
getSuggestion(t, ir, dummy);
416429
s.freetext = dummy.toArray(new String[dummy.size()]);
417430
dummy.clear();
418-
getSuggestion(builder.getRefs(), checker, dummy);
431+
}
432+
if (builder.getRefs()!=null && !builder.getRefs().isEmpty()) {
433+
t=new Term(QueryBuilder.REFS,builder.getRefs());
434+
getSuggestion(t, ir, dummy);
419435
s.refs = dummy.toArray(new String[dummy.size()]);
420436
dummy.clear();
421-
// TODO it seems the only true spellchecker is for
422-
// below field, see IndexDatabase
423-
// createspellingsuggestions ...
424-
getSuggestion(builder.getDefs(), checker, dummy);
437+
}
438+
if (builder.getDefs()!=null && !builder.getDefs().isEmpty()) {
439+
t=new Term(QueryBuilder.DEFS,builder.getDefs());
440+
getSuggestion(t, ir, dummy);
425441
s.defs = dummy.toArray(new String[dummy.size()]);
426442
dummy.clear();
427-
if (s.freetext.length > 0 || s.defs.length > 0 || s.refs.length > 0) {
443+
}
444+
//TODO suggest also for path and history?
445+
if ((s.freetext!=null && s.freetext.length > 0) ||
446+
(s.defs!=null && s.defs.length > 0) ||
447+
(s.refs!=null && s.refs.length > 0) ) {
428448
res.add(s);
429449
}
430450
} catch (IOException e) {
431-
log.log(Level.WARNING, "Got excption while getting spelling suggestions: ", e);
451+
log.log(Level.WARNING, "Got exception while getting "
452+
+ "spelling suggestions: ", e);
432453
} finally {
433-
if (spellDirectory != null) {
434-
spellDirectory.close();
435-
}
436-
if (checker != null) {
437-
try {
438-
checker.close();
439-
} catch (Exception x) {
440-
log.log(Level.WARNING, "Got excption while closing spelling suggestions: ", x);
441-
}
442-
}
443-
}
444-
}
454+
if (ir != null) {
455+
try {
456+
ir.close();
457+
} catch (IOException ex) {
458+
log.log(Level.WARNING, "Got exception while "
459+
+ "getting spelling suggestions: ", ex);
460+
}
461+
}
462+
}
463+
}
445464
return res;
446465
}
447466

448467
/**
449-
* Prepare the fields to support printing a fullblown summary. Does nothing
468+
* Prepare the fields to support printing a full blown summary. Does nothing
450469
* if {@link #redirect} or {@link #errorMsg} have a none-{@code null} value.
451470
*
452471
* <p> Parameters which should be populated/set at this time: <ul>

web/search.jsp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,18 +133,24 @@ include file="menu.jspf"
133133
List<Suggestion> hints = searchHelper.getSuggestions();
134134
for (Suggestion hint : hints) {
135135
%><p><font color="#cc0000">Did you mean (for <%= hint.name %>)</font>:<%
136-
for (String word : hint.freetext) {
136+
if (hint.freetext!=null) {
137+
for (String word : hint.freetext) {
137138
%> <a href="search?q=<%= Util.URIEncode(word) %>"><%=
138139
Util.htmlize(word) %></a> &nbsp; <%
139-
}
140-
for (String word : hint.refs) {
140+
}
141+
}
142+
if (hint.refs!=null) {
143+
for (String word : hint.refs) {
141144
%> <a href="search?refs=<%= Util.URIEncode(word) %>"><%=
142145
Util.htmlize(word) %></a> &nbsp; <%
143-
}
144-
for (String word : hint.defs) {
146+
}
147+
}
148+
if (hint.defs!=null) {
149+
for (String word : hint.defs) {
145150
%> <a href="search?defs=<%= Util.URIEncode(word) %>"><%=
146151
Util.htmlize(word) %></a> &nbsp; <%
147152
}
153+
}
148154
%></p><%
149155
}
150156
%>
@@ -233,4 +239,4 @@ include file="menu.jspf"
233239
234240
include file="foot.jspf"
235241
236-
%>
242+
%>

0 commit comments

Comments
 (0)