Skip to content

Commit f96b76b

Browse files
committed
add check for definitions
1 parent 2046944 commit f96b76b

File tree

4 files changed

+257
-55
lines changed

4 files changed

+257
-55
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexCheck.java

Lines changed: 188 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,24 @@
2323
*/
2424
package org.opengrok.indexer.index;
2525

26+
import java.io.BufferedReader;
27+
import java.io.FileReader;
2628
import java.io.IOException;
29+
import java.nio.file.FileVisitResult;
30+
import java.nio.file.Files;
2731
import java.nio.file.Path;
32+
import java.nio.file.SimpleFileVisitor;
33+
import java.nio.file.attribute.BasicFileAttributes;
2834
import java.util.ArrayList;
2935
import java.util.Collection;
3036
import java.util.HashSet;
3137
import java.util.List;
3238
import java.util.Map;
3339
import java.util.Set;
3440
import java.util.concurrent.ConcurrentHashMap;
41+
import java.util.concurrent.CountDownLatch;
42+
import java.util.concurrent.ExecutorService;
43+
import java.util.concurrent.Future;
3544
import java.util.logging.Level;
3645
import java.util.logging.Logger;
3746

@@ -42,6 +51,7 @@
4251
import org.apache.lucene.index.IndexableField;
4352
import org.apache.lucene.index.MultiBits;
4453
import org.apache.lucene.index.SegmentInfos;
54+
import org.apache.lucene.queryparser.classic.ParseException;
4555
import org.apache.lucene.store.Directory;
4656
import org.apache.lucene.store.FSDirectory;
4757
import org.apache.lucene.store.LockFactory;
@@ -51,7 +61,9 @@
5161
import org.apache.lucene.util.Version;
5262
import org.jetbrains.annotations.NotNull;
5363
import org.jetbrains.annotations.Nullable;
64+
import org.opengrok.indexer.analysis.Definitions;
5465
import org.opengrok.indexer.configuration.Configuration;
66+
import org.opengrok.indexer.configuration.RuntimeEnvironment;
5567
import org.opengrok.indexer.logger.LoggerFactory;
5668
import org.opengrok.indexer.search.QueryBuilder;
5769
import org.opengrok.indexer.util.Statistics;
@@ -71,6 +83,7 @@ public class IndexCheck {
7183
public enum IndexCheckMode {
7284
NO_CHECK,
7385
VERSION,
86+
DEFINITIONS,
7487
DOCUMENTS
7588
}
7689

@@ -138,7 +151,7 @@ private IndexCheck() {
138151
* @return true on success, false on failure
139152
*/
140153
public static boolean isOkay(@NotNull Configuration configuration, IndexCheckMode mode,
141-
Collection<String> projectNames) {
154+
Collection<String> projectNames) throws IOException {
142155

143156
if (mode.equals(IndexCheckMode.NO_CHECK)) {
144157
LOGGER.log(Level.WARNING, "no index check mode selected");
@@ -151,15 +164,17 @@ public static boolean isOkay(@NotNull Configuration configuration, IndexCheckMod
151164
if (!projectNames.isEmpty()) {
152165
// Assumes projects are enabled.
153166
for (String projectName : projectNames) {
154-
ret |= checkDirNoExceptions(Path.of(indexRoot.toString(), projectName), mode);
167+
ret |= checkDirFilterExceptions(Path.of(configuration.getSourceRoot()),
168+
Path.of(indexRoot.toString(), projectName), mode);
155169
}
156170
} else {
157171
if (configuration.isProjectsEnabled()) {
158172
for (String projectName : configuration.getProjects().keySet()) {
159-
ret |= checkDirNoExceptions(Path.of(indexRoot.toString(), projectName), mode);
173+
ret |= checkDirFilterExceptions(Path.of(configuration.getSourceRoot()),
174+
Path.of(indexRoot.toString(), projectName), mode);
160175
}
161176
} else {
162-
ret |= checkDirNoExceptions(indexRoot, mode);
177+
ret |= checkDirFilterExceptions(Path.of(configuration.getSourceRoot()), indexRoot, mode);
163178
}
164179
}
165180

@@ -168,15 +183,15 @@ public static boolean isOkay(@NotNull Configuration configuration, IndexCheckMod
168183

169184
/**
170185
* @param indexPath directory with index
171-
* @return 0 on success, 1 on failure
186+
* @return 0 on success, 1 on failure (index check failed)
187+
* @throws IOException on I/O error
172188
*/
173-
private static int checkDirNoExceptions(Path indexPath, IndexCheckMode mode) {
189+
private static int checkDirFilterExceptions(Path sourcePath, Path indexPath, IndexCheckMode mode) throws IOException {
174190
try {
175-
LOGGER.log(Level.INFO, "Checking index in ''{0}''", indexPath);
176-
checkDir(indexPath, mode);
191+
LOGGER.log(Level.INFO, "Checking index in ''{0}'' (mode {1})", new Object[]{indexPath, mode});
192+
checkDir(sourcePath, indexPath, mode);
177193
} catch (IOException e) {
178-
LOGGER.log(Level.WARNING, String.format("Could not perform index check for directory '%s'", indexPath), e);
179-
return 0;
194+
throw e;
180195
} catch (Exception e) {
181196
LOGGER.log(Level.WARNING, String.format("Index check for directory '%s' failed", indexPath), e);
182197
return 1;
@@ -190,15 +205,174 @@ private static int checkDirNoExceptions(Path indexPath, IndexCheckMode mode) {
190205
* Check index in given directory. It assumes that that all commits (if any)
191206
* in the Lucene segment file were done with the same version.
192207
*
208+
* @param sourcePath path to source directory
193209
* @param indexPath directory with index to check
194210
* @param mode index check mode
195211
* @throws IOException if the directory cannot be opened
196212
* @throws IndexVersionException if the version of the index does not match Lucene index version
197213
* @throws IndexDocumentException if there are duplicate documents in the index
198214
*/
199-
public static void checkDir(Path indexPath, IndexCheckMode mode)
200-
throws IndexVersionException, IndexDocumentException, IOException {
215+
public static void checkDir(Path sourcePath, Path indexPath, IndexCheckMode mode)
216+
throws IndexVersionException, IndexDocumentException, IOException, ParseException, ClassNotFoundException {
217+
218+
switch (mode) {
219+
case VERSION:
220+
checkVersion(indexPath);
221+
break;
222+
case DOCUMENTS:
223+
checkDuplicateDocuments(indexPath);
224+
break;
225+
case DEFINITIONS:
226+
checkDefinitions(sourcePath, indexPath);
227+
}
228+
}
229+
230+
private static List<String> getLines(Path path) throws IOException {
231+
List<String> lines = new ArrayList<>();
232+
try (BufferedReader bufferedReader = new BufferedReader(new FileReader(path.toFile()))) {
233+
String line;
234+
while ((line = bufferedReader.readLine()) != null) {
235+
lines.add(line);
236+
}
237+
}
238+
239+
return lines;
240+
}
241+
242+
/**
243+
* Crosscheck definitions found in the index for given file w.r.t. actual file contents.
244+
* There is a number of cases this check can fail even for legitimate cases. This is why
245+
* certain patterns are skipped.
246+
* @param path path to the file being checked
247+
* @return okay indication
248+
*/
249+
private static boolean checkDefinitionsForFile(Path path) throws ParseException, IOException, ClassNotFoundException {
250+
251+
// Avoid paths with certain suffixes. These exhibit some behavior that cannot be handled
252+
// For example, '1;' in Perl code is interpreted by Universal Ctags as 'STDOUT'.
253+
Set<String> suffixesToAvoid = Set.of(".sh", ".SH", ".pod", ".pl", ".pm", ".js", ".json", ".css");
254+
if (suffixesToAvoid.stream().anyMatch(s -> path.toString().endsWith(s))) {
255+
return true;
256+
}
257+
258+
boolean okay = true;
259+
Definitions defs = IndexDatabase.getDefinitions(path.toFile());
260+
if (defs != null) {
261+
LOGGER.log(Level.FINE, "checking definitions for ''{0}''", path);
262+
List<String> lines = getLines(path);
263+
264+
for (Definitions.Tag tag : defs.getTags()) {
265+
// These symbols are sometimes produced by Universal Ctags even though they are not
266+
// actually present in the file.
267+
if (tag.symbol.startsWith("__anon")) {
268+
continue;
269+
}
270+
271+
// Needed for some TeX definitions.
272+
String symbol = tag.symbol;
273+
if (symbol.contains("\\")) {
274+
symbol = symbol.replace("\\", "");
275+
}
276+
277+
// C++ operator overload symbol contains extra space, ignore them for now.
278+
if (symbol.startsWith("operator ")) {
279+
continue;
280+
}
281+
282+
// These could be e.g. C structure members, having their line number equal to
283+
// where the structure definition starts, ignore.
284+
if (tag.type.equals("argument")) {
285+
continue;
286+
}
287+
288+
if (!lines.get(tag.line - 1).contains(symbol)) {
289+
// Line wrap, skip it.
290+
if (lines.get(tag.line - 1).endsWith("\\")) {
291+
continue;
292+
}
293+
294+
// Line wraps cause the symbol to be reported on different line than it resides on.
295+
// Perform more thorough/expensive check.
296+
final String str = symbol;
297+
if (lines.stream().noneMatch(l -> l.contains(str))) {
298+
LOGGER.log(Level.WARNING, String.format("'%s' does not contain '%s' (should be on line %d)",
299+
path, symbol, tag.line));
300+
okay = false;
301+
}
302+
}
303+
}
304+
}
305+
306+
return okay;
307+
}
308+
309+
private static class GetFiles extends SimpleFileVisitor<Path> {
310+
Set<Path> files = new HashSet<>();
311+
312+
@Override
313+
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
314+
if (RuntimeEnvironment.getInstance().getIgnoredNames().ignore(dir.toFile())) {
315+
return FileVisitResult.SKIP_SUBTREE;
316+
}
317+
318+
return FileVisitResult.CONTINUE;
319+
}
320+
321+
@Override
322+
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
323+
if (file.toFile().isFile()) {
324+
files.add(file);
325+
}
326+
327+
return FileVisitResult.CONTINUE;
328+
}
329+
}
201330

331+
private static void checkDefinitions(Path sourcePath, Path indexPath) throws IOException, IndexDocumentException {
332+
333+
Statistics statistics = new Statistics();
334+
GetFiles getFiles = new GetFiles();
335+
Files.walkFileTree(sourcePath, getFiles);
336+
Set<Path> paths = getFiles.files;
337+
LOGGER.log(Level.FINE, "Checking definitions in ''{0}'' ({1} paths)",
338+
new Object[]{indexPath, paths.size()});
339+
340+
long errors = 0;
341+
ExecutorService executorService = RuntimeEnvironment.getInstance().getIndexerParallelizer().getFixedExecutor();
342+
final CountDownLatch latch = new CountDownLatch(paths.size());
343+
List<Future<Boolean>> futures = new ArrayList<>();
344+
for (Path path : paths) {
345+
futures.add(executorService.submit(() -> {
346+
try {
347+
return checkDefinitionsForFile(path);
348+
} finally {
349+
latch.countDown();
350+
}
351+
}));
352+
}
353+
try {
354+
latch.await();
355+
} catch (InterruptedException e) {
356+
LOGGER.log(Level.WARNING, "failed to await", e);
357+
}
358+
for (Future<Boolean> future : futures) {
359+
try {
360+
if (!future.get()) {
361+
errors++;
362+
}
363+
} catch (Exception e) {
364+
LOGGER.log(Level.WARNING, "failure when checking definitions", e);
365+
}
366+
}
367+
statistics.report(LOGGER, Level.FINE, String.format("checked %d files", paths.size()));
368+
369+
if (errors > 0) {
370+
throw new IndexDocumentException(String.format("document check failed for (%d documents out of %d)",
371+
errors, paths.size()));
372+
}
373+
}
374+
375+
private static boolean checkVersion(Path indexPath) throws IOException, IndexVersionException {
202376
LockFactory lockFactory = NativeFSLockFactory.INSTANCE;
203377
int segVersion;
204378

@@ -208,7 +382,7 @@ public static void checkDir(Path indexPath, IndexCheckMode mode)
208382
segVersion = segInfos.getIndexCreatedVersionMajor();
209383
} catch (IndexNotFoundException e) {
210384
LOGGER.log(Level.WARNING, "no index found in ''{0}''", indexDirectory);
211-
return;
385+
return true;
212386
}
213387
}
214388

@@ -219,9 +393,7 @@ public static void checkDir(Path indexPath, IndexCheckMode mode)
219393
Version.LATEST.major, segVersion);
220394
}
221395

222-
if (mode.ordinal() >= IndexCheckMode.DOCUMENTS.ordinal()) {
223-
checkDuplicateDocuments(indexPath);
224-
}
396+
return false;
225397
}
226398

227399
public static IndexReader getIndexReader(Path indexPath) throws IOException {

0 commit comments

Comments
 (0)