|
61 | 61 | import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
62 | 62 | import org.apache.lucene.index.IndexableField;
|
63 | 63 | import org.apache.lucene.index.MultiFields;
|
| 64 | +import org.apache.lucene.index.PostingsEnum; |
64 | 65 | import org.apache.lucene.index.Term;
|
65 | 66 | import org.apache.lucene.index.Terms;
|
66 | 67 | import org.apache.lucene.index.TermsEnum;
|
67 | 68 | import org.apache.lucene.queryparser.classic.ParseException;
|
| 69 | +import org.apache.lucene.search.DocIdSetIterator; |
68 | 70 | import org.apache.lucene.search.IndexSearcher;
|
69 | 71 | import org.apache.lucene.search.Query;
|
70 | 72 | import org.apache.lucene.search.TopDocs;
|
@@ -106,13 +108,17 @@ public class IndexDatabase {
|
106 | 108 | private static final Comparator<File> FILENAME_COMPARATOR =
|
107 | 109 | (File p1, File p2) -> p1.getName().compareTo(p2.getName());
|
108 | 110 |
|
| 111 | + private static final Set<String> CHECK_FIELDS; |
| 112 | + |
109 | 113 | private final Object INSTANCE_LOCK = new Object();
|
110 | 114 |
|
111 | 115 | private Project project;
|
112 | 116 | private FSDirectory indexDirectory;
|
| 117 | + private IndexReader reader; |
113 | 118 | private IndexWriter writer;
|
114 | 119 | private PendingFileCompleter completer;
|
115 | 120 | private TermsEnum uidIter;
|
| 121 | + private PostingsEnum postsIter; |
116 | 122 | private IgnoredNames ignoredNames;
|
117 | 123 | private Filter includedNames;
|
118 | 124 | private AnalyzerGuru analyzerGuru;
|
@@ -155,6 +161,11 @@ public IndexDatabase(Project project) throws IOException {
|
155 | 161 | initialize();
|
156 | 162 | }
|
157 | 163 |
|
| 164 | + static { |
| 165 | + CHECK_FIELDS = new HashSet<>(); |
| 166 | + CHECK_FIELDS.add(QueryBuilder.TABSIZE); |
| 167 | + } |
| 168 | + |
158 | 169 | /**
|
159 | 170 | * Update the index database for all of the projects. Print progress to
|
160 | 171 | * standard out.
|
@@ -382,6 +393,11 @@ public void update(IndexerParallelizer parallelizer)
|
382 | 393 | this.parallelizer = parallelizer;
|
383 | 394 | RuntimeEnvironment env = RuntimeEnvironment.getInstance();
|
384 | 395 |
|
| 396 | + reader = null; |
| 397 | + writer = null; |
| 398 | + uidIter = null; |
| 399 | + postsIter = null; |
| 400 | + |
385 | 401 | IOException finishingException = null;
|
386 | 402 | try {
|
387 | 403 | Analyzer analyzer = AnalyzerGuru.getAnalyzer();
|
@@ -422,7 +438,7 @@ public void update(IndexerParallelizer parallelizer)
|
422 | 438 | }
|
423 | 439 |
|
424 | 440 | String startuid = Util.path2uid(dir, "");
|
425 |
| - IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index |
| 441 | + reader = DirectoryReader.open(indexDirectory); // open existing index |
426 | 442 | Terms terms = null;
|
427 | 443 | int numDocs = reader.numDocs();
|
428 | 444 | if (numDocs > 0) {
|
@@ -976,14 +992,19 @@ private void indexDown(File dir, String parent, IndexDownArgs args)
|
976 | 992 | }
|
977 | 993 | }
|
978 | 994 |
|
979 |
| - // If the file was not modified, skip to the next one. |
980 |
| - if (uidIter != null && uidIter.term() != null |
981 |
| - && uidIter.term().bytesEquals(buid)) { |
982 |
| - BytesRef next = uidIter.next(); // keep matching docs |
983 |
| - if (next == null) { |
984 |
| - uidIter = null; |
985 |
| - } |
986 |
| - continue; |
| 995 | + /** |
| 996 | + * If the file was not modified, probably skip to the |
| 997 | + * next one. |
| 998 | + */ |
| 999 | + if (uidIter != null && uidIter.term() != null && |
| 1000 | + uidIter.term().bytesEquals(buid)) { |
| 1001 | + boolean chkres = chkFields(file, path); |
| 1002 | + if (!chkres) removeFile(false); |
| 1003 | + |
| 1004 | + BytesRef next = uidIter.next(); |
| 1005 | + if (next == null) uidIter = null; |
| 1006 | + |
| 1007 | + if (chkres) continue; // keep matching docs |
987 | 1008 | }
|
988 | 1009 | }
|
989 | 1010 |
|
@@ -1490,6 +1511,44 @@ private void finishWriting() throws IOException {
|
1490 | 1511 | }
|
1491 | 1512 | }
|
1492 | 1513 |
|
| 1514 | + private boolean chkFields(File file, String path) throws IOException { |
| 1515 | + int n = 0; |
| 1516 | + postsIter = uidIter.postings(postsIter); |
| 1517 | + while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| 1518 | + ++n; |
| 1519 | + // Read a limited-fields version of the document. |
| 1520 | + Document doc = reader.document(postsIter.docID(), CHECK_FIELDS); |
| 1521 | + if (doc == null) { |
| 1522 | + LOGGER.log(Level.FINER, "No Document: {0}", path); |
| 1523 | + continue; |
| 1524 | + } |
| 1525 | + |
| 1526 | + /** |
| 1527 | + * Verify TABSIZE, or return a value to indicate mismatch. |
| 1528 | + * For an older OpenGrok index that does not yet have TABSIZE, |
| 1529 | + * ignore the check so that no extra work is done. After a re-index, |
| 1530 | + * the TABSIZE check will be active. |
| 1531 | + */ |
| 1532 | + int reqTabSize = project != null ? project.hasTabSizeSetting() ? |
| 1533 | + project.getTabSize() : 0 : 0; |
| 1534 | + IndexableField tbsz = doc.getField(QueryBuilder.TABSIZE); |
| 1535 | + int tbszint = tbsz != null ? tbsz.numericValue().intValue(): 0; |
| 1536 | + if (tbsz != null && tbszint != reqTabSize) { |
| 1537 | + LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path); |
| 1538 | + return false; |
| 1539 | + } |
| 1540 | + |
| 1541 | + break; |
| 1542 | + } |
| 1543 | + if (n < 1) { |
| 1544 | + LOGGER.log(Level.FINER, "Missing index Documents: {0}", path); |
| 1545 | + return false; |
| 1546 | + } |
| 1547 | + |
| 1548 | + // Assume "true" if otherwise no discrepancies were observed. |
| 1549 | + return true; |
| 1550 | + } |
| 1551 | + |
1493 | 1552 | private class IndexDownArgs {
|
1494 | 1553 | boolean count_only;
|
1495 | 1554 | int cur_count;
|
|
0 commit comments