|
16 | 16 | */
|
17 | 17 | package org.apache.lucene.index;
|
18 | 18 |
|
| 19 | +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; |
| 20 | + |
19 | 21 | import java.io.IOException;
|
20 | 22 | import org.apache.lucene.analysis.MockAnalyzer;
|
21 |
| -import org.apache.lucene.analysis.MockTokenizer; |
| 23 | +import org.apache.lucene.analysis.TokenStream; |
22 | 24 | import org.apache.lucene.document.Document;
|
23 | 25 | import org.apache.lucene.document.Field;
|
24 | 26 | import org.apache.lucene.document.FieldType;
|
25 | 27 | import org.apache.lucene.document.TextField;
|
26 | 28 | import org.apache.lucene.store.Directory;
|
27 |
| -import org.apache.lucene.util.English; |
| 29 | +import org.apache.lucene.util.BytesRef; |
28 | 30 | import org.apache.lucene.util.IOUtils;
|
29 | 31 | import org.apache.lucene.util.LuceneTestCase;
|
30 | 32 | import org.apache.lucene.util.TestUtil;
|
31 |
| -import org.junit.AfterClass; |
32 |
| -import org.junit.BeforeClass; |
33 | 33 |
|
34 | 34 | public class TestTermVectors extends LuceneTestCase {
|
35 |
| - private static IndexReader reader; |
36 |
| - private static Directory directory; |
37 |
| - |
38 |
| - @BeforeClass |
39 |
| - public static void beforeClass() throws Exception { |
40 |
| - directory = newDirectory(); |
41 |
| - RandomIndexWriter writer = |
42 |
| - new RandomIndexWriter( |
43 |
| - random(), |
44 |
| - directory, |
45 |
| - newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)) |
46 |
| - .setMergePolicy(newLogMergePolicy())); |
47 |
| - // writer.setNoCFSRatio(1.0); |
48 |
| - // writer.infoStream = System.out; |
49 |
| - for (int i = 0; i < 1000; i++) { |
50 |
| - Document doc = new Document(); |
51 |
| - FieldType ft = new FieldType(TextField.TYPE_STORED); |
52 |
| - int mod3 = i % 3; |
53 |
| - int mod2 = i % 2; |
54 |
| - if (mod2 == 0 && mod3 == 0) { |
55 |
| - ft.setStoreTermVectors(true); |
56 |
| - ft.setStoreTermVectorOffsets(true); |
57 |
| - ft.setStoreTermVectorPositions(true); |
58 |
| - } else if (mod2 == 0) { |
59 |
| - ft.setStoreTermVectors(true); |
60 |
| - ft.setStoreTermVectorPositions(true); |
61 |
| - } else if (mod3 == 0) { |
62 |
| - ft.setStoreTermVectors(true); |
63 |
| - ft.setStoreTermVectorOffsets(true); |
64 |
| - } else { |
65 |
| - ft.setStoreTermVectors(true); |
66 |
| - } |
67 |
| - doc.add(new Field("field", English.intToEnglish(i), ft)); |
68 |
| - // test no term vectors too |
69 |
| - doc.add(new TextField("noTV", English.intToEnglish(i), Field.Store.YES)); |
70 |
| - writer.addDocument(doc); |
71 |
| - } |
72 |
| - reader = writer.getReader(); |
73 |
| - writer.close(); |
74 |
| - } |
75 |
| - |
76 |
| - @AfterClass |
77 |
| - public static void afterClass() throws Exception { |
78 |
| - reader.close(); |
79 |
| - directory.close(); |
80 |
| - reader = null; |
81 |
| - directory = null; |
82 |
| - } |
83 | 35 |
|
84 | 36 | private IndexWriter createWriter(Directory dir) throws IOException {
|
85 | 37 | return new IndexWriter(
|
@@ -166,4 +118,98 @@ public void testFullMergeAddIndexesReader() throws Exception {
|
166 | 118 | verifyIndex(target);
|
167 | 119 | IOUtils.close(target, input[0], input[1]);
|
168 | 120 | }
|
| 121 | + |
| 122 | + /** |
| 123 | + * Assert that a merged segment has payloads set up in fieldInfo, if at least 1 segment has |
| 124 | + * payloads for this field. |
| 125 | + */ |
| 126 | + public void testMergeWithPayloads() throws Exception { |
| 127 | + final FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED); |
| 128 | + ft1.setStoreTermVectors(true); |
| 129 | + ft1.setStoreTermVectorOffsets(true); |
| 130 | + ft1.setStoreTermVectorPositions(true); |
| 131 | + ft1.setStoreTermVectorPayloads(true); |
| 132 | + ft1.freeze(); |
| 133 | + |
| 134 | + final int numDocsInSegment = 10; |
| 135 | + for (boolean hasPayloads : new boolean[] {false, true}) { |
| 136 | + Directory dir = newDirectory(); |
| 137 | + IndexWriterConfig indexWriterConfig = |
| 138 | + new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(numDocsInSegment); |
| 139 | + IndexWriter writer = new IndexWriter(dir, indexWriterConfig); |
| 140 | + TokenStreamGenerator tkg1 = new TokenStreamGenerator(hasPayloads); |
| 141 | + TokenStreamGenerator tkg2 = new TokenStreamGenerator(!hasPayloads); |
| 142 | + |
| 143 | + // create one segment with payloads, and another without payloads |
| 144 | + for (int i = 0; i < numDocsInSegment; i++) { |
| 145 | + Document doc = new Document(); |
| 146 | + doc.add(new Field("c", tkg1.newTokenStream(), ft1)); |
| 147 | + writer.addDocument(doc); |
| 148 | + } |
| 149 | + for (int i = 0; i < numDocsInSegment; i++) { |
| 150 | + Document doc = new Document(); |
| 151 | + doc.add(new Field("c", tkg2.newTokenStream(), ft1)); |
| 152 | + writer.addDocument(doc); |
| 153 | + } |
| 154 | + |
| 155 | + IndexReader reader1 = writer.getReader(); |
| 156 | + assertEquals(2, reader1.leaves().size()); |
| 157 | + assertEquals( |
| 158 | + hasPayloads, |
| 159 | + reader1.leaves().get(0).reader().getFieldInfos().fieldInfo("c").hasPayloads()); |
| 160 | + assertNotEquals( |
| 161 | + hasPayloads, |
| 162 | + reader1.leaves().get(1).reader().getFieldInfos().fieldInfo("c").hasPayloads()); |
| 163 | + |
| 164 | + writer.forceMerge(1); |
| 165 | + IndexReader reader2 = writer.getReader(); |
| 166 | + assertEquals(1, reader2.leaves().size()); |
| 167 | + // assert that in the merged segments payloads set up for the field |
| 168 | + assertTrue(reader2.leaves().get(0).reader().getFieldInfos().fieldInfo("c").hasPayloads()); |
| 169 | + |
| 170 | + IOUtils.close(writer, reader1, reader2, dir); |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + /** A generator for token streams with optional null payloads */ |
| 175 | + private static class TokenStreamGenerator { |
| 176 | + private final String[] terms; |
| 177 | + private final BytesRef[] termBytes; |
| 178 | + private final boolean hasPayloads; |
| 179 | + |
| 180 | + public TokenStreamGenerator(boolean hasPayloads) { |
| 181 | + this.hasPayloads = hasPayloads; |
| 182 | + final int termsCount = 10; |
| 183 | + terms = new String[termsCount]; |
| 184 | + termBytes = new BytesRef[termsCount]; |
| 185 | + for (int i = 0; i < termsCount; ++i) { |
| 186 | + terms[i] = TestUtil.randomRealisticUnicodeString(random()); |
| 187 | + termBytes[i] = new BytesRef(terms[i]); |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + public TokenStream newTokenStream() { |
| 192 | + return new OptionalNullPayloadTokenStream(TestUtil.nextInt(random(), 1, 5), terms, termBytes); |
| 193 | + } |
| 194 | + |
| 195 | + private class OptionalNullPayloadTokenStream |
| 196 | + extends BaseTermVectorsFormatTestCase.RandomTokenStream { |
| 197 | + public OptionalNullPayloadTokenStream( |
| 198 | + int len, String[] sampleTerms, BytesRef[] sampleTermBytes) { |
| 199 | + super(len, sampleTerms, sampleTermBytes); |
| 200 | + } |
| 201 | + |
| 202 | + @Override |
| 203 | + protected BytesRef randomPayload() { |
| 204 | + if (hasPayloads == false) { |
| 205 | + return null; |
| 206 | + } |
| 207 | + final int len = randomIntBetween(1, 5); |
| 208 | + final BytesRef payload = new BytesRef(len); |
| 209 | + random().nextBytes(payload.bytes); |
| 210 | + payload.length = len; |
| 211 | + return payload; |
| 212 | + } |
| 213 | + } |
| 214 | + } |
169 | 215 | }
|
0 commit comments