Skip to content

Commit d7440ea

Browse files
committed
Improve cleanup of resources opened by the analyze() methods.
Before issue #8, there would be exactly one FileInputStream per file that was being analyzed, and it was pretty easy to ensure that this stream was closed. Issue #8 changed this, but it didn't add logic to ensure that the extra streams were closed if there was an error before the document was added to the index. This change attempts to improve the situation by making sure that readers or token streams associated with the fields in the Lucene document are closed if something goes wrong.
1 parent 03ddb24 commit d7440ea

File tree

2 files changed

+38
-12
lines changed

2 files changed

+38
-12
lines changed

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,19 +231,17 @@ public static FileAnalyzer getAnalyzer(InputStream in, String file) throws IOExc
231231
}
232232

233233
/**
234-
* Create a Lucene document and fill in the required fields
234+
* Populate a Lucene document with the required fields.
235+
* @param doc The document to populate
235236
* @param file The file to index
236237
* @param path Where the file is located (from source root)
237238
* @param fa The analyzer to use on the file
238239
* @param xrefOut Where to write the xref (possibly {@code null})
239-
* @return The Lucene document to add to the index database
240-
* @throws java.io.IOException If an exception occurs while collecting the
241-
* data
240+
* @throws IOException If an exception occurs while collecting the data
242241
*/
243-
public Document getDocument(File file, String path,
244-
FileAnalyzer fa, Writer xrefOut)
242+
public void populateDocument(Document doc, File file, String path,
243+
FileAnalyzer fa, Writer xrefOut)
245244
throws IOException {
246-
Document doc = new Document();
247245
String date = DateTools.timeToString(file.lastModified(),
248246
DateTools.Resolution.MILLISECOND);
249247
doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date),
@@ -277,8 +275,6 @@ public Document getDocument(File file, String path,
277275
}
278276
fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
279277
}
280-
281-
return doc;
282278
}
283279

284280
/**

src/org/opensolaris/opengrok/index/IndexDatabase.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.apache.lucene.analysis.standard.StandardAnalyzer;
4545
import org.apache.lucene.document.DateTools;
4646
import org.apache.lucene.document.Document;
47+
import org.apache.lucene.document.Field;
4748
import org.apache.lucene.index.*;
4849
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
4950
import org.apache.lucene.queryparser.classic.ParseException;
@@ -68,6 +69,7 @@
6869
import org.opensolaris.opengrok.history.HistoryGuru;
6970
import org.opensolaris.opengrok.search.QueryBuilder;
7071
import org.opensolaris.opengrok.search.SearchEngine;
72+
import org.opensolaris.opengrok.util.IOUtils;
7173
import org.opensolaris.opengrok.web.Util;
7274

7375
/**
@@ -630,9 +632,9 @@ private void addFile(File file, String path) throws IOException {
630632
fa.setCtags(ctags);
631633
fa.setProject(Project.getProject(path));
632634

633-
Document d;
635+
Document doc = new Document();
634636
try (Writer xrefOut = getXrefWriter(fa, path)) {
635-
d = analyzerGuru.getDocument(file, path, fa, xrefOut);
637+
analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
636638
} catch (Exception e) {
637639
log.log(Level.INFO,
638640
"Skipped file ''{0}'' because the analyzer didn''t "
@@ -649,16 +651,44 @@ private void addFile(File file, String path) throws IOException {
649651
}
650652
}
651653
log.log(Level.FINE, "Exception from analyzer {0}: {1} {2}{3}{4}{5}{6}", new String[]{fa.getClass().getName(), e.toString(), System.lineSeparator(), stack.toString(), System.lineSeparator(), sstack.toString()});
654+
cleanupResources(doc);
652655
return;
653656
}
654657

655-
writer.addDocument(d, fa);
658+
try {
659+
writer.addDocument(doc, fa);
660+
} catch (Throwable t) {
661+
cleanupResources(doc);
662+
throw t;
663+
}
664+
656665
setDirty();
657666
for (IndexChangedListener listener : listeners) {
658667
listener.fileAdded(path, fa.getClass().getSimpleName());
659668
}
660669
}
661670

671+
/**
672+
* Do a best effort to clean up all resources allocated when populating
673+
* a Lucene document. On normal execution, these resources should be
674+
* closed automatically by the index writer once it's done with them, but
675+
* we may not get that far if something fails.
676+
*
677+
* @param doc the document whose resources to clean up
678+
*/
679+
private void cleanupResources(Document doc) {
680+
for (IndexableField f : doc) {
681+
// If the field takes input from a reader, close the reader.
682+
IOUtils.close(f.readerValue());
683+
684+
// If the field takes input from a token stream, close the
685+
// token stream.
686+
if (f instanceof Field) {
687+
IOUtils.close(((Field) f).tokenStreamValue());
688+
}
689+
}
690+
}
691+
662692
/**
663693
* Check if I should accept this file into the index database
664694
*

0 commit comments

Comments
 (0)