oracle
diff --git a/‎src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
Lines changed: 6 additions & 4 deletions b/‎src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
Lines changed: 6 additions & 4 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/FileAnalyzer.java
Lines changed: 9 additions & 31 deletions b/‎src/org/opensolaris/opengrok/analysis/FileAnalyzer.java
Lines changed: 9 additions & 31 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/StreamSource.java
Lines changed: 67 additions & 0 deletions b/‎src/org/opensolaris/opengrok/analysis/StreamSource.java
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/TextAnalyzer.java
Lines changed: 7 additions & 7 deletions b/‎src/org/opensolaris/opengrok/analysis/TextAnalyzer.java
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/archive/BZip2Analyzer.java
Lines changed: 27 additions & 23 deletions b/‎src/org/opensolaris/opengrok/analysis/archive/BZip2Analyzer.java
Lines changed: 27 additions & 23 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/archive/GZIPAnalyzer.java
Lines changed: 21 additions & 17 deletions b/‎src/org/opensolaris/opengrok/analysis/archive/GZIPAnalyzer.java
Lines changed: 21 additions & 17 deletions
@@ -232,14 +232,16 @@ public static FileAnalyzer getAnalyzer(InputStream in, String file) throws IOExc
     /**
      * Create a Lucene document and fill in the required fields
      * @param file The file to index
-     * @param in The data to generate the index for
      * @param path Where the file is located (from source root)
+     * @param fa The analyzer to use on the file
+     * @param xrefOut Where to write the xref (possibly {@code null})
      * @return The Lucene document to add to the index database
      * @throws java.io.IOException If an exception occurs while collecting the
      *                             datas
      */
-    public Document getDocument(File file, InputStream in, String path,
-                                FileAnalyzer fa) throws IOException {
+    public Document getDocument(File file, String path,
+                                FileAnalyzer fa, Writer xrefOut)
+            throws IOException {
         Document doc = new Document();
         String date = DateTools.timeToString(file.lastModified(),
             DateTools.Resolution.MILLISECOND);
@@ -272,7 +274,7 @@ public Document getDocument(File file, InputStream in, String path,
                 doc.add(new Field("t", g.typeName(), string_ft_stored_nanalyzed_norms
                     ));
             }                   
-            fa.analyze(doc, in);
+            fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
         }
 
         return doc;
 
@@ -23,24 +23,16 @@
  */
 package org.opensolaris.opengrok.analysis;
 
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.Writer;
 import java.util.logging.Level;
-import java.util.zip.GZIPOutputStream;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.opensolaris.opengrok.OpenGrokLogger;
 import org.opensolaris.opengrok.analysis.plain.PlainFullTokenizer;
 import org.opensolaris.opengrok.analysis.plain.PlainSymbolTokenizer;
 import org.opensolaris.opengrok.configuration.Project;
-import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
 
 /**
  * Base class for all different File Analyzers
@@ -138,7 +130,15 @@ public FileAnalyzer(FileAnalyzerFactory factory) {
 
     }
 
-    public void analyze(Document doc, InputStream in) throws IOException {
+    /**
+     * Analyze the contents of a source file. This includes populating the
+     * Lucene document with fields to add to the index, and writing the
+     * cross-referenced data to the specified destination.
+     * @param doc the Lucene document
+     * @param src the input data source
+     * @param xrefOut where to write the xref (may be {@code null})
+     */
+    public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
         // not used
     }
 
@@ -161,26 +161,4 @@ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
                 return null;
         }
     }
-
-    /**
-     * Write a cross referenced HTML file.
-     * @param out to writer HTML cross-reference
-     * @throws java.io.IOException if an error occurs
-     */
-    public void writeXref(Writer out) throws IOException {
-        out.write("Error General File X-Ref writer!");
-    }
-
-    public void writeXref(File xrefDir, String path) throws IOException {
-        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
-
-        final boolean compressed = env.isCompressXref();
-        final File file = new File(xrefDir, path + (compressed ? ".gz" : ""));
-        try (OutputStream out = compressed ?
-                    new GZIPOutputStream(new FileOutputStream(file)) :
-                    new FileOutputStream(file);
-                Writer w = new BufferedWriter(new OutputStreamWriter(out))) {
-            writeXref(w);
-        }
-    }
 }
@@ -0,0 +1,67 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ */
+package org.opensolaris.opengrok.analysis;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * This class lets you create {@code InputStream}s that read data from a
+ * specific source. It could be used if you need to pass a stream as an
+ * argument to a method where the stream may need to be read multiple times.
+ * Instead of passing the stream directly, you pass a {@code StreamSource}
+ * instance that generates the stream. The receiver may call
+ * {@link #getStream()} multiple times, getting a fresh stream each time,
+ * so that there may be multiple, concurrent readers that don't interfere
+ * with each other.
+ */
+public abstract class StreamSource {
+    /**
+     * Get a stream that reads data from the input source. Every call should
+     * return a new instance so that multiple readers can read from the source
+     * without interfering with each other.
+     *
+     * @return an {@code InputStream}
+     * @throws IOException if an error occurs when opening the stream
+     */
+    public abstract InputStream getStream() throws IOException;
+
+    /**
+     * Helper method that creates a {@code StreamSource} instance that
+     * reads data from a file.
+     *
+     * @param file the data file
+     * @return a stream source that reads from {@code file}
+     */
+    public static StreamSource fromFile(final File file) {
+        return new StreamSource() {
+            @Override
+            public InputStream getStream() throws IOException {
+                return new BufferedInputStream(new FileInputStream(file));
+            }
+        };
+    }
+}
@@ -18,25 +18,27 @@
  */
 
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 package org.opensolaris.opengrok.analysis;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
-import org.apache.lucene.document.Document;
 
 public abstract class TextAnalyzer extends FileAnalyzer {
 
     public TextAnalyzer(FileAnalyzerFactory factory) {
         super(factory);
     }
 
-    @Override
-    public final void analyze(Document doc, InputStream in) throws IOException {
+    protected Reader getReader(InputStream stream) throws IOException {
+        InputStream in = stream.markSupported() ?
+                stream : new BufferedInputStream(stream);
+
         String charset = null;
 
         in.mark(3);
@@ -61,8 +63,6 @@ public final void analyze(Document doc, InputStream in) throws IOException {
             charset = Charset.defaultCharset().name();
         }
 
-        analyze(doc, new InputStreamReader(in, charset));
+        return new InputStreamReader(in, charset);
     }
-
-    protected abstract void analyze(Document doc, Reader reader) throws IOException;
 }
@@ -18,7 +18,7 @@
  */
 
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 package org.opensolaris.opengrok.analysis.archive;
 
@@ -33,6 +33,7 @@
 import org.opensolaris.opengrok.analysis.AnalyzerGuru;
 import org.opensolaris.opengrok.analysis.FileAnalyzer;
 import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
+import org.opensolaris.opengrok.analysis.StreamSource;
 
 /**
  * Analyzes a BZip2 file Created on September 22, 2005
@@ -57,20 +58,16 @@ protected BZip2Analyzer(FileAnalyzerFactory factory) {
     private FileAnalyzer fa;
 
     @Override
-    public void analyze(Document doc, InputStream in) throws IOException {
-        if (in.read() != 'B') {
-            throw new IOException("Not BZIP2 format");
-        }
-        if (in.read() != 'Z') {
-            throw new IOException("Not BZIP2 format");
-        }
-        BufferedInputStream gzis = new BufferedInputStream(new CBZip2InputStream(in));
+    public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
+        StreamSource bzSrc = wrap(src);
         String path = doc.get("path");
         if (path != null
                 && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) {
             String newname = path.substring(0, path.lastIndexOf('.'));
             //System.err.println("BZIPPED OF = " + newname);
-            fa = AnalyzerGuru.getAnalyzer(gzis, newname);
+            try (InputStream in = bzSrc.getStream()) {
+                fa = AnalyzerGuru.getAnalyzer(in, newname);
+            }
             if (fa instanceof BZip2Analyzer) {
                 fa = null;
             } else {
@@ -79,7 +76,7 @@ public void analyze(Document doc, InputStream in) throws IOException {
                 } else {
                     this.g = Genre.DATA;
                 }
-                fa.analyze(doc, gzis);
+                fa.analyze(doc, bzSrc, xrefOut);
                 if (doc.get("t") != null) {
                     doc.removeField("t");
                     if (g == Genre.XREFABLE) {
@@ -90,23 +87,30 @@ public void analyze(Document doc, InputStream in) throws IOException {
         }
     }
 
+    /**
+     * Wrap the raw stream source in one that returns the uncompressed stream.
+     */
+    private static StreamSource wrap(final StreamSource src) {
+        return new StreamSource() {
+            @Override
+            public InputStream getStream() throws IOException {
+                InputStream raw = src.getStream();
+                // A BZip2 file starts with "BZ", but CBZip2InputStream
+                // expects the magic bytes to be stripped off first.
+                if (raw.read() == 'B' && raw.read() == 'Z') {
+                    return new BufferedInputStream(new CBZip2InputStream(raw));
+                } else {
+                    throw new IOException("Not BZIP2 format");
+                }
+            }
+        };
+    }
+
     @Override
     public TokenStreamComponents createComponents(String fieldName, Reader reader) {
         if (fa != null) {
             return fa.createComponents(fieldName, reader);
         }
         return super.createComponents(fieldName, reader);
     }
-
-    /**
-     * Write a cross referenced HTML file.
-     *
-     * @param out Writer to store HTML cross-reference
-     */
-    @Override
-    public void writeXref(Writer out) throws IOException {
-        if ((fa != null) && (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE)) {
-            fa.writeXref(out);
-        }
-    }
 }
@@ -18,7 +18,7 @@
  */
 
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 package org.opensolaris.opengrok.analysis.archive;
 
@@ -36,6 +36,7 @@
 import org.opensolaris.opengrok.analysis.FileAnalyzer;
 import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
 import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
+import org.opensolaris.opengrok.analysis.StreamSource;
 
 /**
  * Analyzes GZip files Created on September 22, 2005
@@ -60,14 +61,16 @@ protected GZIPAnalyzer(FileAnalyzerFactory factory) {
     private FileAnalyzer fa;
 
     @Override
-    public void analyze(Document doc, InputStream in) throws IOException {
-        BufferedInputStream gzis = new BufferedInputStream(new GZIPInputStream(in));
+    public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
+        StreamSource gzSrc = wrap(src);
         String path = doc.get("path");
         if (path != null
                 && (path.endsWith(".gz") || path.endsWith(".GZ") || path.endsWith(".Gz"))) {
             String newname = path.substring(0, path.length() - 3);
             //System.err.println("GZIPPED OF = " + newname);
-            fa = AnalyzerGuru.getAnalyzer(gzis, newname);
+            try (InputStream gzis = gzSrc.getStream()) {
+                fa = AnalyzerGuru.getAnalyzer(gzis, newname);
+            }
             if (fa == null) {
                 this.g = Genre.DATA;
                 OpenGrokLogger.getLogger().log(Level.WARNING, "Did not analyze {0}, detected as data.", newname);
@@ -79,7 +82,7 @@ public void analyze(Document doc, InputStream in) throws IOException {
                 } else {
                     this.g = Genre.DATA;
                 }
-                fa.analyze(doc, gzis);
+                fa.analyze(doc, gzSrc, xrefOut);
                 if (doc.get("t") != null) {
                     doc.removeField("t");
                     if (g == Genre.XREFABLE) {
@@ -91,23 +94,24 @@ public void analyze(Document doc, InputStream in) throws IOException {
         }
     }
 
+    /**
+     * Wrap the raw stream source in one that returns the uncompressed stream.
+     */
+    private static StreamSource wrap(final StreamSource src) {
+        return new StreamSource() {
+            @Override
+            public InputStream getStream() throws IOException {
+                return new BufferedInputStream(
+                        new GZIPInputStream(src.getStream()));
+            }
+        };
+    }
+
     @Override
     public TokenStreamComponents createComponents(String fieldName, Reader reader) {
         if (fa != null) {
             return fa.createComponents(fieldName, reader);
         }
         return super.createComponents(fieldName, reader);
     }
-
-    /**
-     * Write a cross referenced HTML file.
-     *
-     * @param out Writer to store HTML cross-reference
-     */
-    @Override
-    public void writeXref(Writer out) throws IOException {
-        if ((fa != null) && (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE)) {
-            fa.writeXref(out);
-        }
-    }
 }