23
23
*/
24
24
package org .opengrok .indexer .index ;
25
25
26
+ import java .io .BufferedReader ;
27
+ import java .io .FileReader ;
26
28
import java .io .IOException ;
29
+ import java .nio .file .FileVisitResult ;
30
+ import java .nio .file .Files ;
27
31
import java .nio .file .Path ;
32
+ import java .nio .file .SimpleFileVisitor ;
33
+ import java .nio .file .attribute .BasicFileAttributes ;
28
34
import java .util .ArrayList ;
29
35
import java .util .Collection ;
30
36
import java .util .HashSet ;
31
37
import java .util .List ;
32
38
import java .util .Map ;
33
39
import java .util .Set ;
34
40
import java .util .concurrent .ConcurrentHashMap ;
41
+ import java .util .concurrent .CountDownLatch ;
42
+ import java .util .concurrent .ExecutorService ;
43
+ import java .util .concurrent .Future ;
35
44
import java .util .logging .Level ;
36
45
import java .util .logging .Logger ;
37
46
42
51
import org .apache .lucene .index .IndexableField ;
43
52
import org .apache .lucene .index .MultiBits ;
44
53
import org .apache .lucene .index .SegmentInfos ;
54
+ import org .apache .lucene .queryparser .classic .ParseException ;
45
55
import org .apache .lucene .store .Directory ;
46
56
import org .apache .lucene .store .FSDirectory ;
47
57
import org .apache .lucene .store .LockFactory ;
51
61
import org .apache .lucene .util .Version ;
52
62
import org .jetbrains .annotations .NotNull ;
53
63
import org .jetbrains .annotations .Nullable ;
64
+ import org .opengrok .indexer .analysis .Definitions ;
54
65
import org .opengrok .indexer .configuration .Configuration ;
66
+ import org .opengrok .indexer .configuration .RuntimeEnvironment ;
55
67
import org .opengrok .indexer .logger .LoggerFactory ;
56
68
import org .opengrok .indexer .search .QueryBuilder ;
57
69
import org .opengrok .indexer .util .Statistics ;
@@ -71,6 +83,7 @@ public class IndexCheck {
71
83
public enum IndexCheckMode {
72
84
NO_CHECK ,
73
85
VERSION ,
86
+ DEFINITIONS ,
74
87
DOCUMENTS
75
88
}
76
89
@@ -138,7 +151,7 @@ private IndexCheck() {
138
151
* @return true on success, false on failure
139
152
*/
140
153
public static boolean isOkay (@ NotNull Configuration configuration , IndexCheckMode mode ,
141
- Collection <String > projectNames ) {
154
+ Collection <String > projectNames ) throws IOException {
142
155
143
156
if (mode .equals (IndexCheckMode .NO_CHECK )) {
144
157
LOGGER .log (Level .WARNING , "no index check mode selected" );
@@ -151,15 +164,17 @@ public static boolean isOkay(@NotNull Configuration configuration, IndexCheckMod
151
164
if (!projectNames .isEmpty ()) {
152
165
// Assumes projects are enabled.
153
166
for (String projectName : projectNames ) {
154
- ret |= checkDirNoExceptions (Path .of (indexRoot .toString (), projectName ), mode );
167
+ ret |= checkDirFilterExceptions (Path .of (configuration .getSourceRoot ()),
168
+ Path .of (indexRoot .toString (), projectName ), mode );
155
169
}
156
170
} else {
157
171
if (configuration .isProjectsEnabled ()) {
158
172
for (String projectName : configuration .getProjects ().keySet ()) {
159
- ret |= checkDirNoExceptions (Path .of (indexRoot .toString (), projectName ), mode );
173
+ ret |= checkDirFilterExceptions (Path .of (configuration .getSourceRoot ()),
174
+ Path .of (indexRoot .toString (), projectName ), mode );
160
175
}
161
176
} else {
162
- ret |= checkDirNoExceptions ( indexRoot , mode );
177
+ ret |= checkDirFilterExceptions ( Path . of ( configuration . getSourceRoot ()), indexRoot , mode );
163
178
}
164
179
}
165
180
@@ -168,15 +183,15 @@ public static boolean isOkay(@NotNull Configuration configuration, IndexCheckMod
168
183
169
184
/**
170
185
* @param indexPath directory with index
171
- * @return 0 on success, 1 on failure
186
+ * @return 0 on success, 1 on failure (index check failed)
187
+ * @throws IOException on I/O error
172
188
*/
173
- private static int checkDirNoExceptions (Path indexPath , IndexCheckMode mode ) {
189
+ private static int checkDirFilterExceptions (Path sourcePath , Path indexPath , IndexCheckMode mode ) throws IOException {
174
190
try {
175
- LOGGER .log (Level .INFO , "Checking index in ''{0}''" , indexPath );
176
- checkDir (indexPath , mode );
191
+ LOGGER .log (Level .INFO , "Checking index in ''{0}'' (mode {1}) " , new Object []{ indexPath , mode } );
192
+ checkDir (sourcePath , indexPath , mode );
177
193
} catch (IOException e ) {
178
- LOGGER .log (Level .WARNING , String .format ("Could not perform index check for directory '%s'" , indexPath ), e );
179
- return 0 ;
194
+ throw e ;
180
195
} catch (Exception e ) {
181
196
LOGGER .log (Level .WARNING , String .format ("Index check for directory '%s' failed" , indexPath ), e );
182
197
return 1 ;
@@ -190,15 +205,174 @@ private static int checkDirNoExceptions(Path indexPath, IndexCheckMode mode) {
190
205
* Check index in given directory. It assumes that that all commits (if any)
191
206
* in the Lucene segment file were done with the same version.
192
207
*
208
+ * @param sourcePath path to source directory
193
209
* @param indexPath directory with index to check
194
210
* @param mode index check mode
195
211
* @throws IOException if the directory cannot be opened
196
212
* @throws IndexVersionException if the version of the index does not match Lucene index version
197
213
* @throws IndexDocumentException if there are duplicate documents in the index
198
214
*/
199
- public static void checkDir (Path indexPath , IndexCheckMode mode )
200
- throws IndexVersionException , IndexDocumentException , IOException {
215
+ public static void checkDir (Path sourcePath , Path indexPath , IndexCheckMode mode )
216
+ throws IndexVersionException , IndexDocumentException , IOException , ParseException , ClassNotFoundException {
217
+
218
+ switch (mode ) {
219
+ case VERSION :
220
+ checkVersion (indexPath );
221
+ break ;
222
+ case DOCUMENTS :
223
+ checkDuplicateDocuments (indexPath );
224
+ break ;
225
+ case DEFINITIONS :
226
+ checkDefinitions (sourcePath , indexPath );
227
+ }
228
+ }
229
+
230
+ private static List <String > getLines (Path path ) throws IOException {
231
+ List <String > lines = new ArrayList <>();
232
+ try (BufferedReader bufferedReader = new BufferedReader (new FileReader (path .toFile ()))) {
233
+ String line ;
234
+ while ((line = bufferedReader .readLine ()) != null ) {
235
+ lines .add (line );
236
+ }
237
+ }
238
+
239
+ return lines ;
240
+ }
241
+
242
+ /**
243
+ * Crosscheck definitions found in the index for given file w.r.t. actual file contents.
244
+ * There is a number of cases this check can fail even for legitimate cases. This is why
245
+ * certain patterns are skipped.
246
+ * @param path path to the file being checked
247
+ * @return okay indication
248
+ */
249
+ private static boolean checkDefinitionsForFile (Path path ) throws ParseException , IOException , ClassNotFoundException {
250
+
251
+ // Avoid paths with certain suffixes. These exhibit some behavior that cannot be handled
252
+ // For example, '1;' in Perl code is interpreted by Universal Ctags as 'STDOUT'.
253
+ Set <String > suffixesToAvoid = Set .of (".sh" , ".SH" , ".pod" , ".pl" , ".pm" , ".js" , ".json" , ".css" );
254
+ if (suffixesToAvoid .stream ().anyMatch (s -> path .toString ().endsWith (s ))) {
255
+ return true ;
256
+ }
257
+
258
+ boolean okay = true ;
259
+ Definitions defs = IndexDatabase .getDefinitions (path .toFile ());
260
+ if (defs != null ) {
261
+ LOGGER .log (Level .FINE , "checking definitions for ''{0}''" , path );
262
+ List <String > lines = getLines (path );
263
+
264
+ for (Definitions .Tag tag : defs .getTags ()) {
265
+ // These symbols are sometimes produced by Universal Ctags even though they are not
266
+ // actually present in the file.
267
+ if (tag .symbol .startsWith ("__anon" )) {
268
+ continue ;
269
+ }
270
+
271
+ // Needed for some TeX definitions.
272
+ String symbol = tag .symbol ;
273
+ if (symbol .contains ("\\ " )) {
274
+ symbol = symbol .replace ("\\ " , "" );
275
+ }
276
+
277
+ // C++ operator overload symbol contains extra space, ignore them for now.
278
+ if (symbol .startsWith ("operator " )) {
279
+ continue ;
280
+ }
281
+
282
+ // These could be e.g. C structure members, having their line number equal to
283
+ // where the structure definition starts, ignore.
284
+ if (tag .type .equals ("argument" )) {
285
+ continue ;
286
+ }
287
+
288
+ if (!lines .get (tag .line - 1 ).contains (symbol )) {
289
+ // Line wrap, skip it.
290
+ if (lines .get (tag .line - 1 ).endsWith ("\\ " )) {
291
+ continue ;
292
+ }
293
+
294
+ // Line wraps cause the symbol to be reported on different line than it resides on.
295
+ // Perform more thorough/expensive check.
296
+ final String str = symbol ;
297
+ if (lines .stream ().noneMatch (l -> l .contains (str ))) {
298
+ LOGGER .log (Level .WARNING , String .format ("'%s' does not contain '%s' (should be on line %d)" ,
299
+ path , symbol , tag .line ));
300
+ okay = false ;
301
+ }
302
+ }
303
+ }
304
+ }
305
+
306
+ return okay ;
307
+ }
308
+
309
+ private static class GetFiles extends SimpleFileVisitor <Path > {
310
+ Set <Path > files = new HashSet <>();
311
+
312
+ @ Override
313
+ public FileVisitResult preVisitDirectory (Path dir , BasicFileAttributes attrs ) throws IOException {
314
+ if (RuntimeEnvironment .getInstance ().getIgnoredNames ().ignore (dir .toFile ())) {
315
+ return FileVisitResult .SKIP_SUBTREE ;
316
+ }
317
+
318
+ return FileVisitResult .CONTINUE ;
319
+ }
320
+
321
+ @ Override
322
+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
323
+ if (file .toFile ().isFile ()) {
324
+ files .add (file );
325
+ }
326
+
327
+ return FileVisitResult .CONTINUE ;
328
+ }
329
+ }
201
330
331
+ private static void checkDefinitions (Path sourcePath , Path indexPath ) throws IOException , IndexDocumentException {
332
+
333
+ Statistics statistics = new Statistics ();
334
+ GetFiles getFiles = new GetFiles ();
335
+ Files .walkFileTree (sourcePath , getFiles );
336
+ Set <Path > paths = getFiles .files ;
337
+ LOGGER .log (Level .FINE , "Checking definitions in ''{0}'' ({1} paths)" ,
338
+ new Object []{indexPath , paths .size ()});
339
+
340
+ long errors = 0 ;
341
+ ExecutorService executorService = RuntimeEnvironment .getInstance ().getIndexerParallelizer ().getFixedExecutor ();
342
+ final CountDownLatch latch = new CountDownLatch (paths .size ());
343
+ List <Future <Boolean >> futures = new ArrayList <>();
344
+ for (Path path : paths ) {
345
+ futures .add (executorService .submit (() -> {
346
+ try {
347
+ return checkDefinitionsForFile (path );
348
+ } finally {
349
+ latch .countDown ();
350
+ }
351
+ }));
352
+ }
353
+ try {
354
+ latch .await ();
355
+ } catch (InterruptedException e ) {
356
+ LOGGER .log (Level .WARNING , "failed to await" , e );
357
+ }
358
+ for (Future <Boolean > future : futures ) {
359
+ try {
360
+ if (!future .get ()) {
361
+ errors ++;
362
+ }
363
+ } catch (Exception e ) {
364
+ LOGGER .log (Level .WARNING , "failure when checking definitions" , e );
365
+ }
366
+ }
367
+ statistics .report (LOGGER , Level .FINE , String .format ("checked %d files" , paths .size ()));
368
+
369
+ if (errors > 0 ) {
370
+ throw new IndexDocumentException (String .format ("document check failed for (%d documents out of %d)" ,
371
+ errors , paths .size ()));
372
+ }
373
+ }
374
+
375
+ private static boolean checkVersion (Path indexPath ) throws IOException , IndexVersionException {
202
376
LockFactory lockFactory = NativeFSLockFactory .INSTANCE ;
203
377
int segVersion ;
204
378
@@ -208,7 +382,7 @@ public static void checkDir(Path indexPath, IndexCheckMode mode)
208
382
segVersion = segInfos .getIndexCreatedVersionMajor ();
209
383
} catch (IndexNotFoundException e ) {
210
384
LOGGER .log (Level .WARNING , "no index found in ''{0}''" , indexDirectory );
211
- return ;
385
+ return true ;
212
386
}
213
387
}
214
388
@@ -219,9 +393,7 @@ public static void checkDir(Path indexPath, IndexCheckMode mode)
219
393
Version .LATEST .major , segVersion );
220
394
}
221
395
222
- if (mode .ordinal () >= IndexCheckMode .DOCUMENTS .ordinal ()) {
223
- checkDuplicateDocuments (indexPath );
224
- }
396
+ return false ;
225
397
}
226
398
227
399
public static IndexReader getIndexReader (Path indexPath ) throws IOException {
0 commit comments