Skip to content

Commit b5a3e34

Browse files
committed
Make Iterable2TokenStream a Reader.
Iterable2TokenStream splits the input into tokens in a way similar to PlainSymbolTokenizer, but not identical. If it is a Reader instead, the input could be passed on to PlainSymbolTokenizer to produce tokens in a consistent and uniform way.
1 parent d28b28e commit b5a3e34

File tree

6 files changed

+108
-125
lines changed

6 files changed

+108
-125
lines changed

src/org/opensolaris/opengrok/analysis/FileAnalyzer.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.apache.lucene.document.Document;
3939
import org.opensolaris.opengrok.OpenGrokLogger;
4040
import org.opensolaris.opengrok.analysis.plain.PlainFullTokenizer;
41+
import org.opensolaris.opengrok.analysis.plain.PlainSymbolTokenizer;
4142
import org.opensolaris.opengrok.configuration.Project;
4243
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
4344

@@ -151,6 +152,9 @@ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
151152
return new TokenStreamComponents(new PathTokenizer(reader));
152153
case "hist":
153154
return new HistoryAnalyzer().createComponents(fieldName, reader);
155+
case "refs":
156+
case "defs":
157+
return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
154158
default:
155159
OpenGrokLogger.getLogger().log(
156160
Level.WARNING, "Have no analyzer for: {0}", fieldName);

src/org/opensolaris/opengrok/analysis/Iterable2TokenStream.java

Lines changed: 0 additions & 67 deletions
This file was deleted.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opensolaris.opengrok.analysis;
24+
25+
import java.io.IOException;
26+
import java.io.Reader;
27+
import java.io.StringReader;
28+
import java.util.Iterator;
29+
30+
/**
31+
* Class that presents the contents of an {@code Iterator} as a {@code Reader}.
32+
* All elements in the {@code Iterator} are separated by a newline character.
33+
*/
34+
public final class IteratorReader extends Reader {
35+
private Iterator<String> iterator;
36+
private StringReader current;
37+
38+
public IteratorReader(Iterable<String> iterable) {
39+
this(iterable.iterator());
40+
}
41+
42+
public IteratorReader(Iterator<String> iterator) {
43+
if (iterator == null) {
44+
throw new NullPointerException();
45+
}
46+
this.iterator = iterator;
47+
}
48+
49+
@Override
50+
public int read(char[] cbuf, int off, int len) throws IOException {
51+
if (current != null) {
52+
int ret = current.read(cbuf, off, len);
53+
if (ret > 0 || len == 0) {
54+
// If some data was read, or if no data was requested,
55+
// we're OK. Return the number of characters read.
56+
return ret;
57+
}
58+
}
59+
60+
// No more data was found in the current element. Read data from
61+
// the next element, or return -1 if there are no more elements.
62+
if (iterator.hasNext()) {
63+
current = new StringReader(iterator.next() + '\n');
64+
return current.read(cbuf, off, len);
65+
} else {
66+
return -1;
67+
}
68+
}
69+
70+
@Override
71+
public void close() {
72+
iterator = null;
73+
current = null;
74+
}
75+
}

src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
import org.apache.lucene.document.TextField;
5454
import org.opensolaris.opengrok.analysis.FileAnalyzer;
5555
import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
56-
import org.opensolaris.opengrok.analysis.Iterable2TokenStream;
56+
import org.opensolaris.opengrok.analysis.IteratorReader;
5757
import org.opensolaris.opengrok.analysis.TagFilter;
5858
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
5959

@@ -99,8 +99,8 @@ public void analyze(Document doc, InputStream in) throws IOException {
9999
}
100100
String constants = out.toString();
101101

102-
doc.add(new TextField("defs", new Iterable2TokenStream(defs)));
103-
doc.add(new TextField("refs", new Iterable2TokenStream(refs)));
102+
doc.add(new TextField("defs", new IteratorReader(defs)));
103+
doc.add(new TextField("refs", new IteratorReader(refs)));
104104
// TODO could be improved, lucene has xhtml parsers/readers
105105
doc.add(new TextField("full", new TagFilter(xref)));
106106
doc.add(new TextField("full", constants, Store.NO));

src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import org.opensolaris.opengrok.analysis.Definitions;
3434
import org.opensolaris.opengrok.analysis.ExpandTabsReader;
3535
import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
36-
import org.opensolaris.opengrok.analysis.Iterable2TokenStream;
36+
import org.opensolaris.opengrok.analysis.IteratorReader;
3737
import org.opensolaris.opengrok.analysis.TextAnalyzer;
3838
import org.opensolaris.opengrok.configuration.Project;
3939
import org.opensolaris.opengrok.history.Annotation;
@@ -82,22 +82,14 @@ public void analyze(Document doc, Reader in) throws IOException {
8282
if (fullpath != null && ctags != null) {
8383
defs = ctags.doCtags(fullpath + "\n");
8484
if (defs != null && defs.numberOfSymbols() > 0) {
85-
doc.add(new TextField("defs", new Iterable2TokenStream(defs.getSymbols())));
85+
doc.add(new TextField("defs", new IteratorReader(defs.getSymbols())));
8686
doc.add(new TextField("refs", getContentReader()));
8787
byte[] tags = defs.serialize();
8888
doc.add(new StoredField("tags", tags));
8989
}
9090
}
9191
}
9292

93-
@Override
94-
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
95-
if ("refs".equals(fieldName)) {
96-
return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
97-
}
98-
return super.createComponents(fieldName, reader);
99-
}
100-
10193
/**
10294
* Get a reader that reads from the {@link #content} array.
10395
*/

test/org/opensolaris/opengrok/analysis/Iterable2TokenStreamTest.java renamed to test/org/opensolaris/opengrok/analysis/IteratorReaderTest.java

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,67 +22,50 @@
2222
*/
2323
package org.opensolaris.opengrok.analysis;
2424

25+
import java.io.BufferedReader;
2526
import java.io.IOException;
2627
import java.util.Arrays;
2728
import java.util.Collections;
29+
import java.util.Iterator;
2830
import java.util.List;
29-
import org.junit.After;
30-
import org.junit.AfterClass;
31-
import org.junit.Before;
32-
import org.junit.BeforeClass;
3331
import org.junit.Test;
3432
import static org.junit.Assert.*;
3533

3634
/**
37-
* Do basic testing of the Iterable2TokenStream class.
35+
* Do basic testing of the IteratorReader class.
3836
*
3937
* @author Trond Norbye
4038
*/
41-
public class Iterable2TokenStreamTest {
42-
43-
public Iterable2TokenStreamTest() {
44-
}
45-
46-
@BeforeClass
47-
public static void setUpClass() throws Exception {
48-
}
49-
50-
@AfterClass
51-
public static void tearDownClass() throws Exception {
52-
}
53-
54-
@Before
55-
public void setUp() {
56-
}
57-
58-
@After
59-
public void tearDown() {
60-
}
39+
public class IteratorReaderTest {
6140

6241
/**
6342
* Test that we don't get an error when the list is empty.
6443
*/
6544
@Test
6645
public void testBug3094() throws IOException {
6746
List<String> empty = Collections.emptyList();
68-
Iterable2TokenStream instance = new Iterable2TokenStream(empty);
69-
assertNotNull(instance);
70-
assertFalse(instance.incrementToken());
71-
instance.close();
47+
try (IteratorReader instance = new IteratorReader(empty)) {
48+
assertNotNull(instance);
49+
assertEquals(-1, instance.read());
50+
}
7251
}
7352

7453
/**
7554
* Test that we get an error immediately when constructing a token stream
7655
* where the list is {@code null}.
7756
*/
78-
@Test
57+
@Test(expected= NullPointerException.class)
7958
public void testFailfastOnNull() {
80-
try {
81-
new Iterable2TokenStream(null);
82-
fail("expected an exception");
83-
} catch (NullPointerException npe) {
84-
// expected
85-
}
59+
new IteratorReader((List<String>) null);
60+
}
61+
62+
/**
63+
* Test that a {@code NullPointerException} is thrown immediately also
64+
* when using the constructor that takes an {@code Iterator}.
65+
*/
66+
@Test(expected= NullPointerException.class)
67+
public void testFailfastOnNullIterator() {
68+
new IteratorReader((Iterator<String>) null);
8669
}
8770

8871
/**
@@ -93,15 +76,11 @@ public void testFailfastOnNull() {
9376
*/
9477
@Test
9578
public void testReadAllTokens() throws IOException {
96-
try (Iterable2TokenStream instance = new Iterable2TokenStream(
97-
Arrays.asList("abc.def", "ghi.jkl"))) {
98-
int count = 0;
99-
while (instance.incrementToken()) {
100-
count++;
101-
}
102-
103-
// List2TokenStream used to find only 3 tokens.
104-
assertEquals(4, count);
79+
try (BufferedReader instance = new BufferedReader(new IteratorReader(
80+
Arrays.asList("abc.def", "ghi.jkl")))) {
81+
assertEquals("abc.def", instance.readLine());
82+
assertEquals("ghi.jkl", instance.readLine());
83+
assertNull(instance.readLine());
10584
}
10685
}
10786
}

0 commit comments

Comments
 (0)