Improve cleanup of resources opened by the analyze() methods.

kahatlen · kahatlen · commit d7440ea2eb4b · 2013-04-21T19:29:23.000+02:00
Before issue #8, there would be exactly one FileInputStream per file that was being analyzed, and it was pretty easy to ensure that this stream was closed. Issue #8 changed this, but it didn't add logic to ensure that the extra streams were closed if there was an error before the document was added to the index. This change attempts to improve the situation by making sure that readers or token streams associated with the fields in the Lucene document are closed if something goes wrong.
diff --git a/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java b/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
@@ -231,19 +231,17 @@ public static FileAnalyzer getAnalyzer(InputStream in, String file) throws IOExc
     }
         
     /**
-     * Create a Lucene document and fill in the required fields
+     * Populate a Lucene document with the required fields.
+     * @param doc The document to populate
      * @param file The file to index
      * @param path Where the file is located (from source root)
      * @param fa The analyzer to use on the file
      * @param xrefOut Where to write the xref (possibly {@code null})
-     * @return The Lucene document to add to the index database
-     * @throws java.io.IOException If an exception occurs while collecting the
-     *                             data
+     * @throws IOException If an exception occurs while collecting the data
      */
-    public Document getDocument(File file, String path,
-                                FileAnalyzer fa, Writer xrefOut)
+    public void populateDocument(Document doc, File file, String path,
+                                 FileAnalyzer fa, Writer xrefOut)
             throws IOException {
-        Document doc = new Document();
         String date = DateTools.timeToString(file.lastModified(),
             DateTools.Resolution.MILLISECOND);
         doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date),
@@ -277,8 +275,6 @@ public Document getDocument(File file, String path,
             }                   
             fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
         }
-
-        return doc;
     }
 
     /**
diff --git a/src/org/opensolaris/opengrok/index/IndexDatabase.java b/src/org/opensolaris/opengrok/index/IndexDatabase.java
@@ -44,6 +44,7 @@
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.queryparser.classic.ParseException;
@@ -68,6 +69,7 @@
 import org.opensolaris.opengrok.history.HistoryGuru;
 import org.opensolaris.opengrok.search.QueryBuilder;
 import org.opensolaris.opengrok.search.SearchEngine;
+import org.opensolaris.opengrok.util.IOUtils;
 import org.opensolaris.opengrok.web.Util;
 
 /**
@@ -630,9 +632,9 @@ private void addFile(File file, String path) throws IOException {
         fa.setCtags(ctags);
         fa.setProject(Project.getProject(path));
 
-        Document d;
+        Document doc = new Document();
         try (Writer xrefOut = getXrefWriter(fa, path)) {
-            d = analyzerGuru.getDocument(file, path, fa, xrefOut);
+            analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
         } catch (Exception e) {
             log.log(Level.INFO,
                     "Skipped file ''{0}'' because the analyzer didn''t "
@@ -649,16 +651,44 @@ private void addFile(File file, String path) throws IOException {
                 }
             }
             log.log(Level.FINE, "Exception from analyzer {0}: {1} {2}{3}{4}{5}{6}", new String[]{fa.getClass().getName(), e.toString(), System.lineSeparator(), stack.toString(), System.lineSeparator(), sstack.toString()});
+            cleanupResources(doc);
             return;
         }
 
-        writer.addDocument(d, fa);
+        try {
+            writer.addDocument(doc, fa);
+        } catch (Throwable t) {
+            cleanupResources(doc);
+            throw t;
+        }
+
         setDirty();
         for (IndexChangedListener listener : listeners) {
             listener.fileAdded(path, fa.getClass().getSimpleName());
         }
     }
 
+    /**
+     * Do a best effort to clean up all resources allocated when populating
+     * a Lucene document. On normal execution, these resources should be
+     * closed automatically by the index writer once it's done with them, but
+     * we may not get that far if something fails.
+     *
+     * @param doc the document whose resources to clean up
+     */
+    private void cleanupResources(Document doc) {
+        for (IndexableField f : doc) {
+            // If the field takes input from a reader, close the reader.
+            IOUtils.close(f.readerValue());
+
+            // If the field takes input from a token stream, close the
+            // token stream.
+            if (f instanceof Field) {
+                IOUtils.close(((Field) f).tokenStreamValue());
+            }
+        }
+    }
+
     /**
      * Check if I should accept this file into the index database
      *