18
18
*/
19
19
20
20
/*
21
- * Copyright (c) 2005, 2019 , Oracle and/or its affiliates. All rights reserved.
21
+ * Copyright (c) 2005, 2021 , Oracle and/or its affiliates. All rights reserved.
22
22
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected] >.
23
23
*/
24
24
package org .opengrok .indexer .analysis ;
@@ -40,24 +40,23 @@ public class CtagsReader {
40
40
/**
41
41
* Matches the Unicode word that occurs last in a string, ignoring any
42
42
* trailing whitespace or non-word characters, and makes it accessible as
43
- * the first capture, {@code mtch.groups (1)}.
43
+ * the first capture, {@code match.group (1)}.
44
44
*/
45
- private static final Pattern LAST_UWORD = Pattern .compile ("(?U)(\\ w+)[\\ W\\ s]*$" );
45
+ private static final Pattern LAST_UNICODE_WORD = Pattern .compile ("(?U)(\\ w+)[\\ W\\ s]*$" );
46
46
47
47
/**
48
48
* Matches a Unicode word character.
49
49
*/
50
50
private static final Pattern WORD_CHAR = Pattern .compile ("(?U)\\ w" );
51
51
52
- private static final Logger LOGGER = LoggerFactory .getLogger (
53
- CtagsReader .class );
52
+ private static final Logger LOGGER = LoggerFactory .getLogger (CtagsReader .class );
54
53
55
54
/** A value indicating empty method body in tags, so skip it. */
56
55
private static final int MIN_METHOD_LINE_LENGTH = 6 ;
57
56
58
57
/**
59
58
* 96 is used by universal ctags for some lines, but it's too low,
60
- * OpenGrok can theoretically handle 50000 with 8G heap. Also this might
59
+ * OpenGrok can theoretically handle 50000 with 8G heap. Also, this might
61
60
* break scopes functionality, if set too low.
62
61
*/
63
62
private static final int MAX_METHOD_LINE_LENGTH = 1030 ;
@@ -198,7 +197,6 @@ public void readLine(String tagLine) {
198
197
199
198
int p = tagLine .indexOf ('\t' );
200
199
if (p <= 0 ) {
201
- //log.fine("SKIPPING LINE - NO TAB");
202
200
return ;
203
201
}
204
202
String def = tagLine .substring (0 , p );
@@ -208,9 +206,7 @@ public void readLine(String tagLine) {
208
206
209
207
int lp = tagLine .length ();
210
208
while ((p = tagLine .lastIndexOf ('\t' , lp - 1 )) > 0 ) {
211
- //log.fine(" p = " + p + " lp = " + lp);
212
209
String fld = tagLine .substring (p + 1 , lp );
213
- //log.fine("FIELD===" + fld);
214
210
lp = p ;
215
211
216
212
int sep = fld .indexOf (':' );
@@ -243,26 +239,24 @@ public void readLine(String tagLine) {
243
239
LOGGER .log (Level .FINEST , "Ctags: stripping method" +
244
240
" body for def {0} line {1}(scopes/highlight" +
245
241
" might break)" , new Object []{def , lnum });
246
- match = whole .substring (0 , MAX_METHOD_LINE_LENGTH ).replaceAll (
247
- "[ \t ]+" , " " );
242
+ match = whole .substring (0 , MAX_METHOD_LINE_LENGTH ).replaceAll ("[ \t ]+" , " " );
248
243
}
249
- } else { //tag is wrong format; cannot extract tagaddress from it; skip
244
+ } else { // tag is in wrong format; cannot extract tagaddress from it; skip
250
245
return ;
251
246
}
252
247
253
248
// Bug #809: Keep track of which symbols have already been
254
249
// seen to prevent duplicating them in memory.
255
250
256
- final String type = classInher == null ? kind : kind + " in " +
257
- classInher ;
251
+ final String type = classInher == null ? kind : kind + " in " + classInher ;
258
252
259
253
int lineno ;
260
254
try {
261
255
lineno = Integer .parseUnsignedInt (lnum );
262
256
} catch (NumberFormatException e ) {
263
257
lineno = 0 ;
264
258
LOGGER .log (Level .WARNING , "CTags line number parsing problem(but" +
265
- " I will continue with line # 0) for symbol {0}" , def );
259
+ " will continue with line # 0) for symbol {0}" , def );
266
260
}
267
261
268
262
CpatIndex cidx = bestIndexOfTag (lineno , whole , def );
@@ -305,7 +299,7 @@ public void readLine(String tagLine) {
305
299
cidx = bestIndexOfArg (lineno , whole , arg );
306
300
307
301
String name = null ;
308
- Matcher mname = LAST_UWORD .matcher (arg );
302
+ Matcher mname = LAST_UNICODE_WORD .matcher (arg );
309
303
if (mname .find ()) {
310
304
name = mname .group (1 );
311
305
} else if (arg .equals ("..." )) {
@@ -324,8 +318,7 @@ public void readLine(String tagLine) {
324
318
}
325
319
}
326
320
}
327
- // log.fine("Read = " + def + " : " + lnum + " = " + kind + " IS " +
328
- // inher + " M " + match);
321
+
329
322
fields .clear ();
330
323
}
331
324
@@ -339,7 +332,7 @@ private static String cutPattern(String tagLine, int startTab, int endTab) {
339
332
// Three lead character represents "\t/^".
340
333
String cut = tagLine .substring (startTab + 3 , endTab );
341
334
342
- /**
335
+ /*
343
336
* Formerly this class cut four characters from the end, but my testing
344
337
* revealed a bug for short lines in files with macOS endings (e.g.
345
338
* cyrus-sasl mac/libdes/src/des_enc.c) where the pattern-ending $ is
@@ -351,7 +344,7 @@ private static String cutPattern(String tagLine, int startTab, int endTab) {
351
344
} else if (cut .endsWith ("/;\" " )) {
352
345
cut = cut .substring (0 , cut .length () - 3 );
353
346
} else {
354
- /**
347
+ /*
355
348
* The former logic did the following without the inspections above.
356
349
* Leaving this here as a fallback.
357
350
*/
@@ -412,7 +405,7 @@ private CpatIndex bestIndexOfTag(int lineno, String whole, String str) {
412
405
}
413
406
414
407
if (woff < 0 ) {
415
- /** At this point, do a lax search of the substring. */
408
+ /* At this point, do a lax search of the substring. */
416
409
woff = whole .indexOf (str );
417
410
}
418
411
}
@@ -422,7 +415,7 @@ private CpatIndex bestIndexOfTag(int lineno, String whole, String str) {
422
415
e = ExpandTabsReader .translate (whole , woff + str .length (), t );
423
416
return new CpatIndex (lineno , s , e );
424
417
}
425
- /**
418
+ /*
426
419
* When ctags has truncated a pattern, or when it spans multiple lines,
427
420
* then `str' might not be found in `whole'. In that case, return an
428
421
* imprecise index for the last character as the best we can do.
@@ -496,7 +489,7 @@ private CpatIndex bestIndexOfArg(int lineno, String whole, String arg) {
496
489
LOGGER .log (Level .FINE , "Odd arg:{0}|versus:{1}|line {2}" ,
497
490
new Object []{arg , whole , lineno });
498
491
}
499
- /**
492
+ /*
500
493
* When no fuzzy match can be generated, return an imprecise index
501
494
* for the first character as the best we can do.
502
495
*/
@@ -529,7 +522,7 @@ private CpatIndex bestIndexOfArg(int lineno, String whole, String arg) {
529
522
}
530
523
}
531
524
532
- /**
525
+ /*
533
526
* When no match is found, return an imprecise index for the last
534
527
* character as the best we can do.
535
528
*/
@@ -575,7 +568,7 @@ private int strictIndexOf(String whole, String substr) {
575
568
576
569
spos = woff + 1 ;
577
570
String onechar ;
578
- /**
571
+ /*
579
572
* Reject if the previous character is a word character, as that
580
573
* would not accord with a clean symbol break
581
574
*/
@@ -585,7 +578,7 @@ private int strictIndexOf(String whole, String substr) {
585
578
continue ;
586
579
}
587
580
}
588
- /**
581
+ /*
589
582
* Reject if the following character is a word character, as that
590
583
* would not accord with a clean symbol break
591
584
*/
@@ -613,7 +606,7 @@ private PatResult strictMatch(String whole, String substr, Pattern pat) {
613
606
Matcher m = pat .matcher (whole );
614
607
while (m .find ()) {
615
608
String onechar ;
616
- /**
609
+ /*
617
610
* Reject if the previous character is a word character, as that
618
611
* would not accord with a clean symbol break
619
612
*/
@@ -623,7 +616,7 @@ private PatResult strictMatch(String whole, String substr, Pattern pat) {
623
616
continue ;
624
617
}
625
618
}
626
- /**
619
+ /*
627
620
* Reject if the following character is a word character, as that
628
621
* would not accord with a clean symbol break
629
622
*/
@@ -656,7 +649,7 @@ private CpatIndex bestLineOfMatch(int lineno, PatResult pr, String cut) {
656
649
int t = tabSize ;
657
650
int resIndex = mIndex ;
658
651
int contentLength = 0 ;
659
- /**
652
+ /*
660
653
* Initialize the following just to silence warnings but with values
661
654
* that will be detected as "bad fuzzy" later.
662
655
*/
@@ -690,7 +683,7 @@ private CpatIndex bestLineOfMatch(int lineno, PatResult pr, String cut) {
690
683
return new CpatIndex (resIndex + 1 , s , e );
691
684
}
692
685
693
- /**
686
+ /*
694
687
* This should not happen -- but if it does, log it and return an
695
688
* imprecise index for the first character as the best we can do.
696
689
*/
0 commit comments