Make Iterable2TokenStream a Reader.

kahatlen · kahatlen · commit b5a3e3418c7a · 2013-04-03T09:07:41.000+02:00
Iterable2TokenStream splits the input into tokens in a way similar to
PlainSymbolTokenizer, but not identical. If it is a Reader instead, the
input could be passed on to PlainSymbolTokenizer to produce tokens in
a consistent and uniform way.
diff --git a/src/org/opensolaris/opengrok/analysis/FileAnalyzer.java b/src/org/opensolaris/opengrok/analysis/FileAnalyzer.java
@@ -38,6 +38,7 @@
 import org.apache.lucene.document.Document;
 import org.opensolaris.opengrok.OpenGrokLogger;
 import org.opensolaris.opengrok.analysis.plain.PlainFullTokenizer;
+import org.opensolaris.opengrok.analysis.plain.PlainSymbolTokenizer;
 import org.opensolaris.opengrok.configuration.Project;
 import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
 
@@ -151,6 +152,9 @@ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
                 return new TokenStreamComponents(new PathTokenizer(reader));
             case "hist":
                 return new HistoryAnalyzer().createComponents(fieldName, reader);
+            case "refs":
+            case "defs":
+                return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
             default:
                 OpenGrokLogger.getLogger().log(
                         Level.WARNING, "Have no analyzer for: {0}", fieldName);
diff --git a/src/org/opensolaris/opengrok/analysis/Iterable2TokenStream.java b/src/org/opensolaris/opengrok/analysis/Iterable2TokenStream.java
diff --git a/src/org/opensolaris/opengrok/analysis/IteratorReader.java b/src/org/opensolaris/opengrok/analysis/IteratorReader.java
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ */
+package org.opensolaris.opengrok.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Iterator;
+
+/**
+ * Class that presents the contents of an {@code Iterator} as a {@code Reader}.
+ * All elements in the {@code Iterator} are separated by a newline character.
+ */
+public final class IteratorReader extends Reader {
+    private Iterator<String> iterator;
+    private StringReader current;
+
+    public IteratorReader(Iterable<String> iterable) {
+        this(iterable.iterator());
+    }
+
+    public IteratorReader(Iterator<String> iterator) {
+        if (iterator == null) {
+            throw new NullPointerException();
+        }
+        this.iterator = iterator;
+    }
+
+    @Override
+    public int read(char[] cbuf, int off, int len) throws IOException {
+        if (current != null) {
+            int ret = current.read(cbuf, off, len);
+            if (ret > 0 || len == 0) {
+                // If some data was read, or if no data was requested,
+                // we're OK. Return the number of characters read.
+                return ret;
+            }
+        }
+
+        // No more data was found in the current element. Read data from
+        // the next element, or return -1 if there are no more elements.
+        if (iterator.hasNext()) {
+            current = new StringReader(iterator.next() + '\n');
+            return current.read(cbuf, off, len);
+        } else {
+            return -1;
+        }
+    }
+
+    @Override
+    public void close() {
+        iterator = null;
+        current = null;
+    }
+}
diff --git a/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java b/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java
@@ -53,7 +53,7 @@
 import org.apache.lucene.document.TextField;
 import org.opensolaris.opengrok.analysis.FileAnalyzer;
 import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
-import org.opensolaris.opengrok.analysis.Iterable2TokenStream;
+import org.opensolaris.opengrok.analysis.IteratorReader;
 import org.opensolaris.opengrok.analysis.TagFilter;
 import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
 
@@ -99,8 +99,8 @@ public void analyze(Document doc, InputStream in) throws IOException {
         }
         String constants = out.toString();
 
-        doc.add(new TextField("defs", new Iterable2TokenStream(defs)));
-        doc.add(new TextField("refs", new Iterable2TokenStream(refs)));
+        doc.add(new TextField("defs", new IteratorReader(defs)));
+        doc.add(new TextField("refs", new IteratorReader(refs)));
         // TODO could be improved, lucene has xhtml parsers/readers
         doc.add(new TextField("full", new TagFilter(xref)));
         doc.add(new TextField("full", constants, Store.NO));
diff --git a/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java b/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java
@@ -33,7 +33,7 @@
 import org.opensolaris.opengrok.analysis.Definitions;
 import org.opensolaris.opengrok.analysis.ExpandTabsReader;
 import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
-import org.opensolaris.opengrok.analysis.Iterable2TokenStream;
+import org.opensolaris.opengrok.analysis.IteratorReader;
 import org.opensolaris.opengrok.analysis.TextAnalyzer;
 import org.opensolaris.opengrok.configuration.Project;
 import org.opensolaris.opengrok.history.Annotation;
@@ -82,22 +82,14 @@ public void analyze(Document doc, Reader in) throws IOException {
         if (fullpath != null && ctags != null) {
             defs = ctags.doCtags(fullpath + "\n");
             if (defs != null && defs.numberOfSymbols() > 0) {
-                doc.add(new TextField("defs", new Iterable2TokenStream(defs.getSymbols())));
+                doc.add(new TextField("defs", new IteratorReader(defs.getSymbols())));
                 doc.add(new TextField("refs", getContentReader()));
                 byte[] tags = defs.serialize();
                 doc.add(new StoredField("tags", tags));
             }
         }
     }
 
-    @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        if ("refs".equals(fieldName)) {
-            return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
-        }
-        return super.createComponents(fieldName, reader);
-    }
-
     /**
      * Get a reader that reads from the {@link #content} array.
      */
diff --git a/test/org/opensolaris/opengrok/analysis/IteratorReaderTest.java b/test/org/opensolaris/opengrok/analysis/IteratorReaderTest.java
@@ -22,67 +22,50 @@
  */
 package org.opensolaris.opengrok.analysis;
 
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
 /**
- * Do basic testing of the Iterable2TokenStream class.
+ * Do basic testing of the IteratorReader class.
  *
  * @author Trond Norbye
  */
-public class Iterable2TokenStreamTest {
-
-    public Iterable2TokenStreamTest() {
-    }
-
-    @BeforeClass
-    public static void setUpClass() throws Exception {
-    }
-
-    @AfterClass
-    public static void tearDownClass() throws Exception {
-    }
-
-    @Before
-    public void setUp() {
-    }
-
-    @After
-    public void tearDown() {
-    }
+public class IteratorReaderTest {
 
     /**
      * Test that we don't get an error when the list is empty.
      */
     @Test
     public void testBug3094() throws IOException {
         List<String> empty = Collections.emptyList();
-        Iterable2TokenStream instance = new Iterable2TokenStream(empty);
-        assertNotNull(instance);
-        assertFalse(instance.incrementToken());        
-        instance.close();
+        try (IteratorReader instance = new IteratorReader(empty)) {
+            assertNotNull(instance);
+            assertEquals(-1, instance.read());
+        }
     }
 
     /**
      * Test that we get an error immediately when constructing a token stream
      * where the list is {@code null}.
      */
-    @Test
+    @Test(expected= NullPointerException.class)
     public void testFailfastOnNull() {
-        try {
-            new Iterable2TokenStream(null);
-            fail("expected an exception");
-        } catch (NullPointerException npe) {
-            // expected
-        }
+        new IteratorReader((List<String>) null);
+    }
+
+    /**
+     * Test that a {@code NullPointerException} is thrown immediately also
+     * when using the constructor that takes an {@code Iterator}.
+     */
+    @Test(expected= NullPointerException.class)
+    public void testFailfastOnNullIterator() {
+        new IteratorReader((Iterator<String>) null);
     }
 
     /**
@@ -93,15 +76,11 @@ public void testFailfastOnNull() {
      */
     @Test
     public void testReadAllTokens() throws IOException {
-        try (Iterable2TokenStream instance = new Iterable2TokenStream(
-                     Arrays.asList("abc.def", "ghi.jkl"))) {
-            int count = 0;
-            while (instance.incrementToken()) {
-                count++;
-            }
-
-            // List2TokenStream used to find only 3 tokens.
-            assertEquals(4, count);
+        try (BufferedReader instance = new BufferedReader(new IteratorReader(
+                     Arrays.asList("abc.def", "ghi.jkl")))) {
+            assertEquals("abc.def", instance.readLine());
+            assertEquals("ghi.jkl", instance.readLine());
+            assertNull(instance.readLine());
         }
     }
 }