Skip to content

Commit 1320cd4

Browse files
Lubos KoscoLubos Kosco
authored andcommitted
fix duplicate documents and tests for add/remove files to index
this fixes #7
1 parent a39bcfe commit 1320cd4

File tree

2 files changed

+34
-25
lines changed

2 files changed

+34
-25
lines changed

src/org/opensolaris/opengrok/index/IndexDatabase.java

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
* one index database per project.
7676
*
7777
* @author Trond Norbye
78-
* @author Lubos Kosco , update for lucene 4.0.0
78+
* @author Lubos Kosco , update for lucene 4.2.0
7979
*/
8080
public class IndexDatabase {
8181

@@ -345,7 +345,7 @@ public void update() throws IOException, HistoryException {
345345
directories.add(project.getPath());
346346
}
347347
}
348-
348+
349349
for (String dir : directories) {
350350
File sourceRoot;
351351
if ("".equals(dir)) {
@@ -367,9 +367,11 @@ public void update() throws IOException, HistoryException {
367367

368368
try {
369369
if (numDocs > 0) {
370-
uidIter = terms.iterator(null);
371-
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid), true); //init uid
372-
if (stat==TermsEnum.SeekStatus.END || stat==TermsEnum.SeekStatus.NOT_FOUND) { uidIter=null; }
370+
uidIter = terms.iterator(uidIter);
371+
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
372+
if (stat==TermsEnum.SeekStatus.END) { uidIter=null;
373+
log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
374+
}
373375
}
374376
//TODO below should be optional, since it traverses the tree once more to get total count! :(
375377
int file_cnt = 0;
@@ -385,7 +387,8 @@ public void update() throws IOException, HistoryException {
385387

386388
while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
387389
removeFile();
388-
uidIter.next();
390+
BytesRef next = uidIter.next();
391+
if (next==null) {uidIter=null;}
389392
}
390393
} finally {
391394
reader.close();
@@ -802,7 +805,7 @@ private boolean isLocal(String path) {
802805
*
803806
*/
804807
private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException {
805-
int lcur_count = cur_count;
808+
int lcur_count = cur_count;
806809
if (isInterrupted()) {
807810
return lcur_count;
808811
}
@@ -842,16 +845,18 @@ public int compare(File p1, File p2) {
842845
if (uidIter != null) {
843846
String uid = Util.path2uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc
844847
BytesRef buid = new BytesRef(uid);
845-
while (uidIter.term() != null
848+
while (uidIter != null && uidIter.term() != null
846849
&& uidIter.term().compareTo(emptyBR) !=0
847850
&& uidIter.term().compareTo(buid) < 0) {
848851
removeFile();
849-
uidIter.next();
852+
BytesRef next = uidIter.next();
853+
if (next==null) {uidIter=null;}
850854
}
851855

852-
if (uidIter.term() != null
856+
if (uidIter != null && uidIter.term() != null
853857
&& uidIter.term().bytesEquals(buid)) {
854-
uidIter.next(); // keep matching docs
858+
BytesRef next = uidIter.next(); // keep matching docs
859+
if (next==null) {uidIter=null;}
855860
continue;
856861
}
857862
}
@@ -953,8 +958,8 @@ public static void listAllFiles(List<String> subFiles) throws IOException {
953958
*/
954959
public void listFiles() throws IOException {
955960
IndexReader ireader = null;
956-
TermsEnum iter;
957-
Terms terms = null;
961+
TermsEnum iter=null;
962+
Terms terms = null;
958963

959964
try {
960965
ireader = DirectoryReader.open(indexDirectory); // open existing index
@@ -963,10 +968,11 @@ public void listFiles() throws IOException {
963968
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
964969
terms = uFields.terms(QueryBuilder.U);
965970
}
966-
iter = terms.iterator(null); // init uid iterator
967-
while (iter.term() != null) {
971+
iter = terms.iterator(iter); // init uid iterator
972+
while (iter != null && iter.term() != null) {
968973
log.fine(Util.uid2url(iter.term().utf8ToString()));
969-
iter.next();
974+
BytesRef next=iter.next();
975+
if (next==null) {iter=null;}
970976
}
971977
} finally {
972978

@@ -1014,7 +1020,7 @@ static void listFrequentTokens(List<String> subFiles) throws IOException {
10141020
public void listTokens(int freq) throws IOException {
10151021
IndexReader ireader = null;
10161022
TermsEnum iter = null;
1017-
Terms terms = null;
1023+
Terms terms = null;
10181024

10191025
try {
10201026
ireader = DirectoryReader.open(indexDirectory);
@@ -1023,13 +1029,14 @@ public void listTokens(int freq) throws IOException {
10231029
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
10241030
terms = uFields.terms(QueryBuilder.DEFS);
10251031
}
1026-
iter = terms.iterator(null); // init uid iterator
1027-
while (iter.term() != null) {
1032+
iter = terms.iterator(iter); // init uid iterator
1033+
while (iter != null && iter.term() != null) {
10281034
//if (iter.term().field().startsWith("f")) {
10291035
if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
10301036
log.warning(iter.term().utf8ToString());
10311037
}
1032-
iter.next();
1038+
BytesRef next = iter.next();
1039+
if (next==null) {iter=null;}
10331040
/*} else {
10341041
break;
10351042
}*/

test/org/opensolaris/opengrok/index/IndexerTest.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ public void testMain() throws IOException {
181181

182182
private class MyIndexChangeListener implements org.opensolaris.opengrok.index.IndexChangedListener {
183183

184-
List<String> files = new ArrayList<String>();
184+
List<String> files = new ArrayList<>();
185+
List<String> removedFiles = new ArrayList<>();
185186

186187
@Override
187188
public void fileAdd(String path, String analyzer) {
@@ -203,10 +204,11 @@ public void fileUpdate(String path) {
203204
@Override
204205
public void fileRemoved(String path) {
205206
files.remove(path);
207+
removedFiles.add(path);
206208
}
207209

208210
public void reset() {
209-
this.files = new ArrayList<String>();
211+
this.files = new ArrayList<>();
210212
}
211213
}
212214

@@ -311,11 +313,11 @@ public void testIncrementalIndexAddRemoveFile() throws Exception {
311313
listener.reset();
312314
repository.addDummyFile(ppath);
313315
idb.update();
314-
assertEquals("No new file added",2, listener.files.size());
315-
listener.reset();
316+
assertEquals("No new file added",1, listener.files.size());
316317
repository.removeDummyFile(ppath);
317318
idb.update();
318-
assertEquals("Didn't remove the dummy file",1, listener.files.size());
319+
assertEquals("Didn't remove the dummy file",0, listener.files.size());
320+
assertEquals("Didn't remove the dummy file",1, listener.removedFiles.size());
319321
} else {
320322
System.out.println("Skipping test. Could not find a ctags I could use in path.");
321323
}

0 commit comments

Comments
 (0)