Store TABSIZE with all documents

idodeclare · tarzanek · commit acc8700f2506 · 2018-03-28T08:17:26.000+02:00
diff --git a/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java b/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
@@ -19,7 +19,7 @@
 
 /*
  * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
- * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
+ * Portions Copyright (c) 2017-2018, Chris Fraire <cfraire@me.com>.
  */
 package org.opensolaris.opengrok.analysis;
 
@@ -52,6 +52,7 @@
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.util.BytesRef;
@@ -470,6 +471,9 @@ public void populateDocument(Document doc, File file, String path,
             doc.add(npstring);
         }
 
+        doc.add(new StoredField(QueryBuilder.TABSIZE, project != null &&
+            project.hasTabSizeSetting() ? project.getTabSize() : 0));
+
         if (fa != null) {
             Genre g = fa.getGenre();
             if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) {
diff --git a/src/org/opensolaris/opengrok/index/IndexDatabase.java b/src/org/opensolaris/opengrok/index/IndexDatabase.java
@@ -61,10 +61,12 @@
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TopDocs;
@@ -106,13 +108,17 @@ public class IndexDatabase {
     private static final Comparator<File> FILENAME_COMPARATOR =
         (File p1, File p2) -> p1.getName().compareTo(p2.getName());
 
+    private static final Set<String> CHECK_FIELDS;
+
     private final Object INSTANCE_LOCK = new Object();
 
     private Project project;
     private FSDirectory indexDirectory;
+    private IndexReader reader;
     private IndexWriter writer;
     private PendingFileCompleter completer;
     private TermsEnum uidIter;
+    private PostingsEnum postsIter;
     private IgnoredNames ignoredNames;
     private Filter includedNames;
     private AnalyzerGuru analyzerGuru;
@@ -155,6 +161,11 @@ public IndexDatabase(Project project) throws IOException {
         initialize();
     }
 
+    static {
+        CHECK_FIELDS = new HashSet<>();
+        CHECK_FIELDS.add(QueryBuilder.TABSIZE);
+    }
+
     /**
      * Update the index database for all of the projects. Print progress to
      * standard out.
@@ -382,6 +393,11 @@ public void update(IndexerParallelizer parallelizer)
         this.parallelizer = parallelizer;
         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
 
+        reader = null;
+        writer = null;
+        uidIter = null;
+        postsIter = null;
+
         IOException finishingException = null;
         try {
             Analyzer analyzer = AnalyzerGuru.getAnalyzer();
@@ -422,7 +438,7 @@ public void update(IndexerParallelizer parallelizer)
                 }
 
                 String startuid = Util.path2uid(dir, "");
-                IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
+                reader = DirectoryReader.open(indexDirectory); // open existing index
                 Terms terms = null;
                 int numDocs = reader.numDocs();
                 if (numDocs > 0) {
@@ -976,14 +992,19 @@ private void indexDown(File dir, String parent, IndexDownArgs args)
                             }
                         }
 
-                        // If the file was not modified, skip to the next one.
-                        if (uidIter != null && uidIter.term() != null
-                                && uidIter.term().bytesEquals(buid)) {
-                            BytesRef next = uidIter.next(); // keep matching docs
-                            if (next == null) {
-                                uidIter = null;
-                            }
-                            continue;
+                        /**
+                         * If the file was not modified, probably skip to the
+                         * next one.
+                         */
+                        if (uidIter != null && uidIter.term() != null &&
+                            uidIter.term().bytesEquals(buid)) {
+                            boolean chkres = chkFields(file, path);
+                            if (!chkres) removeFile(false);
+
+                            BytesRef next = uidIter.next();
+                            if (next == null) uidIter = null;
+
+                            if (chkres) continue; // keep matching docs
                         }
                     }
 
@@ -1490,6 +1511,44 @@ private void finishWriting() throws IOException {
         }
     }
 
+    private boolean chkFields(File file, String path) throws IOException {
+        int n = 0;
+        postsIter = uidIter.postings(postsIter);
+        while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            ++n;
+            // Read a limited-fields version of the document.
+            Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
+            if (doc == null) {
+                LOGGER.log(Level.FINER, "No Document: {0}", path);
+                continue;
+            }
+
+            /**
+             * Verify TABSIZE, or return a value to indicate mismatch.
+             * For an older OpenGrok index that does not yet have TABSIZE,
+             * ignore the check so that no extra work is done. After a re-index,
+             * the TABSIZE check will be active.
+             */
+            int reqTabSize = project != null ? project.hasTabSizeSetting() ?
+                project.getTabSize() : 0 : 0;
+            IndexableField tbsz = doc.getField(QueryBuilder.TABSIZE);
+            int tbszint = tbsz != null ? tbsz.numericValue().intValue(): 0;
+            if (tbsz != null && tbszint != reqTabSize) {
+                LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
+                return false;
+            }
+
+            break;
+        }
+        if (n < 1) {
+            LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
+            return false;
+        }
+
+        // Assume "true" if otherwise no discrepancies were observed.
+        return true;
+    }
+
     private class IndexDownArgs {
         boolean count_only;
         int cur_count;
diff --git a/src/org/opensolaris/opengrok/search/QueryBuilder.java b/src/org/opensolaris/opengrok/search/QueryBuilder.java
@@ -20,7 +20,7 @@
 /* 
  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Jens Elkner.
- * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
+ * Portions Copyright (c) 2017-2018, Chris Fraire <cfraire@me.com>.
  */
 package org.opensolaris.opengrok.search;
 
@@ -66,6 +66,7 @@ public class QueryBuilder {
     public static final String DIRPATH = "dirpath";
     public static final String PROJECT = "project";
     public static final String DATE = "date";
+    public static final String TABSIZE = "tabsize";
 
     /** Used for paths, so SHA-1 is completely sufficient */
     private static final String DIRPATH_HASH_ALGORITHM = "SHA-1";