Skip to content

Commit 8f6d1b9

Browse files
committed
Store and verify AnalyzerGuru and Analyzer versions
1 parent 417a89a commit 8f6d1b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+752
-59
lines changed

OpenGrok

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,14 @@ Supported Environment Variables for configuring the default setup:
120120
OPENGROK_IGNORE_PATTERNS="-i d:dummy"
121121
Multiple entries can be joined together:
122122
"-i dummy -i d:tmp -i f:dummy"
123+
- OPENGROK_ASSIGNMENTS Assign/Unassign specialized analyzers using
124+
(.ext|prefix.):(-|Analyzer) syntax. E.g.,
125+
.lib:Ignorant,Makefile.:- to set files ending
126+
with .LIB (case-insensitive) to use the
127+
IgnorantAnalyzerFactory (case-sensitive due to
128+
Java limitation), and to clear special
129+
handling for files starting with MAKEFILE
130+
(case-insensitive and no full-stop)
123131
- OPENGROK_SCAN_REPOS Disable Scan for repositories (*)
124132
- OPENGROK_SCAN_DEPTH how deep should scanning for repos go
125133
(by default 3 directories from SRC_ROOT)
@@ -445,6 +453,11 @@ ${BZR:+-Dorg.opensolaris.opengrok.history.Bazaar=$BZR} \
445453
then
446454
PROGRESS="--progress"
447455
fi
456+
457+
if [ "$OPENGROK_ASSIGNMENTS" != "" ]; then
458+
ASSIGNMENTS="`echo $OPENGROK_ASSIGNMENTS | sed 's/[:space:]+/_/g'`"
459+
ASSIGNMENTS="-A `echo $ASSIGNMENTS | sed 's/,/ -A /g'`"
460+
fi
448461
}
449462

450463
#
@@ -878,6 +891,7 @@ CommonInvocation()
878891
${CTAGS_OPTIONS_FILE:+-o} ${CTAGS_OPTIONS_FILE} \
879892
${OPENGROK_FLUSH_RAM_BUFFER_SIZE} ${SKIN} ${LEADING_WILDCARD} \
880893
${OPENGROK_PARALLELISM:+--threads} ${OPENGROK_PARALLELISM} \
894+
${ASSIGNMENTS} \
881895
${READ_XML_CONF} \
882896
${WEBAPP_CONFIG} \
883897
${WEBAPP_CONTEXT} \

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 107 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@
4141
import java.util.List;
4242
import java.util.Locale;
4343
import java.util.Map;
44+
import java.util.Objects;
4445
import java.util.SortedMap;
4546
import java.util.TreeMap;
47+
import java.util.TreeSet;
4648
import java.util.logging.Level;
4749
import java.util.logging.Logger;
4850

@@ -52,6 +54,7 @@
5254
import org.apache.lucene.document.Field.Store;
5355
import org.apache.lucene.document.FieldType;
5456
import org.apache.lucene.document.SortedDocValuesField;
57+
import org.apache.lucene.document.StoredField;
5558
import org.apache.lucene.document.StringField;
5659
import org.apache.lucene.document.TextField;
5760
import org.apache.lucene.util.BytesRef;
@@ -167,6 +170,17 @@ public class AnalyzerGuru {
167170
*/
168171
private static final Map<String, FileAnalyzerFactory> pre = new HashMap<>();
169172

173+
/**
174+
* Appended when
175+
* {@link #addExtension(java.lang.String, org.opensolaris.opengrok.analysis.FileAnalyzerFactory)}
176+
* or
177+
* {@link #addPrefix(java.lang.String, org.opensolaris.opengrok.analysis.FileAnalyzerFactory)}
178+
* are called to augment the value in {@link #getVersionNo()}.
179+
*/
180+
private static final TreeSet<String> CUSTOMIZATION_KEYS = new TreeSet<>();
181+
182+
private static int customizationHashCode;
183+
170184
/**
171185
* Descending string length comparator for magics
172186
*/
@@ -220,6 +234,12 @@ public class AnalyzerGuru {
220234
private static final Map<String, FileAnalyzerFactory> FILETYPE_FACTORIES =
221235
new HashMap<>();
222236

237+
/**
238+
* Maps from {@link FileAnalyzer#getFileTypeName()} to
239+
* {@link FileAnalyzer#getVersionNo()}
240+
*/
241+
private static final Map<String, Long> ANALYZER_VERSIONS = new HashMap<>();
242+
223243
/*
224244
* If you write your own analyzer please register it here. The order is
225245
* important for any factory that uses a FileAnalyzerFactory.Matcher
@@ -295,6 +315,45 @@ public class AnalyzerGuru {
295315
}
296316
}
297317

318+
/**
319+
* Gets a version number to be used to tag documents examined by the guru so
320+
* that analysis can be re-done later if a stored version number is
321+
* different from the current implementation or if customization has been
322+
* done by the user to change the {@link AnalyzerGuru} operation.
323+
* <p>
324+
* The static part of the version is bumped in a release when e.g. new
325+
* {@link FileAnalyzerFactory} subclasses are registered or when existing
326+
* {@link FileAnalyzerFactory} subclasses are revised to target more or
327+
* different files.
328+
* @return a value whose lower 32-bits are a static value
329+
* 20171230_00
330+
* for the current implementation and whose higher-32 bits are non-zero if
331+
* {@link #addExtension(java.lang.String, org.opensolaris.opengrok.analysis.FileAnalyzerFactory)}
332+
* or
333+
* {@link #addPrefix(java.lang.String, org.opensolaris.opengrok.analysis.FileAnalyzerFactory)}
334+
* has been called.
335+
*/
336+
public static long getVersionNo() {
337+
final int ver32 = 20171230_00; // Edit comment above too!
338+
long ver = ver32;
339+
if (customizationHashCode != 0) {
340+
ver |= (long)customizationHashCode << 32;
341+
}
342+
return ver;
343+
}
344+
345+
/**
346+
* Gets a version number according to a registered
347+
* {@link FileAnalyzer#getVersionNo()} for a {@code fileTypeName} according
348+
* to {@link FileAnalyzer#getFileTypeName()}.
349+
* @param fileTypeName a defined instance
350+
* @return a registered value or {@link Long#MIN_VALUE} if
351+
* {@code fileTypeName} is unknown
352+
*/
353+
public static long getAnalyzerVersionNo(String fileTypeName) {
354+
return ANALYZER_VERSIONS.getOrDefault(fileTypeName, Long.MIN_VALUE);
355+
}
356+
298357
public static Map<String, FileAnalyzerFactory> getExtensionsMap() {
299358
return Collections.unmodifiableMap(ext);
300359
}
@@ -347,7 +406,9 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) {
347406
factories.add(factory);
348407

349408
FileAnalyzer fa = factory.getAnalyzer();
350-
FILETYPE_FACTORIES.put(fa.getFileTypeName(), factory);
409+
String fileTypeName = fa.getFileTypeName();
410+
FILETYPE_FACTORIES.put(fileTypeName, factory);
411+
ANALYZER_VERSIONS.put(fileTypeName, fa.getVersionNo());
351412
}
352413

353414
/**
@@ -359,12 +420,16 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) {
359420
* extension (if you pass null as the analyzer, you will disable the
360421
* analyzer used for that extension)
361422
*/
362-
public static void addPrefix(String prefix,
363-
FileAnalyzerFactory factory) {
423+
public static void addPrefix(String prefix, FileAnalyzerFactory factory) {
424+
FileAnalyzerFactory oldFactory;
364425
if (factory == null) {
365-
pre.remove(prefix);
426+
oldFactory = pre.remove(prefix);
366427
} else {
367-
pre.put(prefix, factory);
428+
oldFactory = pre.put(prefix, factory);
429+
}
430+
431+
if (factoriesDifferent(factory, oldFactory)) {
432+
addCustomizationKey("p:" + prefix);
368433
}
369434
}
370435

@@ -379,10 +444,15 @@ public static void addPrefix(String prefix,
379444
*/
380445
public static void addExtension(String extension,
381446
FileAnalyzerFactory factory) {
447+
FileAnalyzerFactory oldFactory;
382448
if (factory == null) {
383-
ext.remove(extension);
449+
oldFactory = ext.remove(extension);
384450
} else {
385-
ext.put(extension, factory);
451+
oldFactory = ext.put(extension, factory);
452+
}
453+
454+
if (factoriesDifferent(factory, oldFactory)) {
455+
addCustomizationKey("e:" + extension);
386456
}
387457
}
388458

@@ -501,6 +571,8 @@ public void populateDocument(Document doc, File file, String path,
501571

502572
String type = fa.getFileTypeName();
503573
doc.add(new StringField(QueryBuilder.TYPE, type, Store.YES));
574+
575+
doc.add(new StoredField(QueryBuilder.ZVER, fa.getVersionNo()));
504576
}
505577
}
506578

@@ -1003,4 +1075,32 @@ private static String readOpening(InputStream in, byte[] sig)
10031075
in.reset();
10041076
return opening.toString();
10051077
}
1078+
1079+
private static void addCustomizationKey(String k) {
1080+
CUSTOMIZATION_KEYS.add(k);
1081+
Object[] keys = CUSTOMIZATION_KEYS.toArray();
1082+
customizationHashCode = Objects.hash(keys);
1083+
}
1084+
1085+
private static boolean factoriesDifferent(FileAnalyzerFactory a,
1086+
FileAnalyzerFactory b) {
1087+
String a_name = null;
1088+
if (a != null) {
1089+
a_name = a.getName();
1090+
if (a_name == null) {
1091+
a_name = a.getClass().getSimpleName();
1092+
}
1093+
}
1094+
String b_name = null;
1095+
if (b != null) {
1096+
b_name = b.getName();
1097+
if (b_name == null) {
1098+
b_name = b.getClass().getSimpleName();
1099+
}
1100+
}
1101+
if (a_name == null && b_name == null) {
1102+
return false;
1103+
}
1104+
return a_name == null || b_name == null || !a_name.equals(b_name);
1105+
}
10061106
}

src/org/opensolaris/opengrok/analysis/FileAnalyzer.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public class FileAnalyzer extends Analyzer {
6363
private static final Logger LOGGER = LoggerFactory.getLogger(FileAnalyzer.class);
6464

6565
protected Project project;
66+
protected Ctags ctags;
6667
protected boolean scopesEnabled;
6768
protected boolean foldingEnabled;
6869
private final FileAnalyzerFactory factory;
@@ -126,7 +127,31 @@ public static Genre get(String typeName) {
126127
return null;
127128
}
128129
}
129-
protected Ctags ctags;
130+
131+
/**
132+
* Gets a version number to be used to tag processed documents so that
133+
* re-analysis can be re-done later if a stored version number is different
134+
* from the current implementation.
135+
* <p>
136+
* The value is the union of a {@link FileAnalyzer} root version and the
137+
* value from {@link #getSpecializedVersionNo()}. Changing the root version
138+
* affects all analyzers simultaneously; while subclasses can override
139+
* {@link #getSpecializedVersionNo()} to allow changes that affect a few.
140+
* @return (20061115_01 &lt;&lt; 32) | {@link #getSpecializedVersionNo()}
141+
*/
142+
public final long getVersionNo() {
143+
final int rootVersionNo = 20061115_01; // Edit comment above too!
144+
return ((long)rootVersionNo << 32) | getSpecializedVersionNo();
145+
}
146+
147+
/**
148+
* Subclasses should override to produce a value relevant for the evolution
149+
* of their analysis in each release.
150+
* @return 0
151+
*/
152+
protected int getSpecializedVersionNo() {
153+
return 0; // FileAnalyzer is not specialized.
154+
}
130155

131156
public void setCtags(Ctags ctags) {
132157
this.ctags = ctags;

src/org/opensolaris/opengrok/analysis/TextAnalyzer.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opensolaris.opengrok.analysis;
2525

@@ -49,6 +49,17 @@ protected TextAnalyzer(FileAnalyzerFactory factory,
4949
super(factory, symbolTokenizer);
5050
}
5151

52+
/**
53+
* Gets a version number to be used to tag processed documents so that
54+
* re-analysis can be re-done later if a stored version number is different
55+
* from the current implementation.
56+
* @return 20171223_00
57+
*/
58+
@Override
59+
protected int getSpecializedVersionNo() {
60+
return 20171223_00; // Edit comment above too!
61+
}
62+
5263
/**
5364
* Write a cross referenced HTML file reads the source from in
5465
* @param args a defined instance

src/org/opensolaris/opengrok/analysis/ada/AdaAnalyzer.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opensolaris.opengrok.analysis.ada;
2525

@@ -46,6 +46,17 @@ protected AdaAnalyzer(FileAnalyzerFactory factory) {
4646
super(factory, new JFlexTokenizer(new AdaSymbolTokenizer(
4747
FileAnalyzer.dummyReader)));
4848
}
49+
50+
/**
51+
* Gets a version number to be used to tag processed documents so that
52+
* re-analysis can be re-done later if a stored version number is different
53+
* from the current implementation.
54+
* @return 20171218_00
55+
*/
56+
@Override
57+
protected int getSpecializedVersionNo() {
58+
return 20171218_00; // Edit comment above too!
59+
}
4960

5061
/**
5162
* Creates a wrapped {@link AdaXref} instance.

src/org/opensolaris/opengrok/analysis/archive/BZip2Analyzer.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opensolaris.opengrok.analysis.archive;
2525

@@ -55,11 +55,23 @@ public Genre getGenre() {
5555
protected BZip2Analyzer(FileAnalyzerFactory factory) {
5656
super(factory);
5757
}
58-
private FileAnalyzer fa;
58+
59+
/**
60+
* Gets a version number to be used to tag processed documents so that
61+
* re-analysis can be re-done later if a stored version number is different
62+
* from the current implementation.
63+
* @return 20180111_00
64+
*/
65+
@Override
66+
protected int getSpecializedVersionNo() {
67+
return 20180111_00; // Edit comment above too!
68+
}
5969

6070
@Override
6171
public void analyze(Document doc, StreamSource src, Writer xrefOut)
6272
throws IOException, InterruptedException {
73+
FileAnalyzer fa;
74+
6375
StreamSource bzSrc = wrap(src);
6476
String path = doc.get("path");
6577
if (path != null
@@ -69,9 +81,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut)
6981
try (InputStream in = bzSrc.getStream()) {
7082
fa = AnalyzerGuru.getAnalyzer(in, newname);
7183
}
72-
if (fa instanceof BZip2Analyzer) {
73-
fa = null;
74-
} else {
84+
if (!(fa instanceof BZip2Analyzer)) {
7585
if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
7686
this.g = Genre.XREFABLE;
7787
} else {

src/org/opensolaris/opengrok/analysis/archive/GZIPAnalyzer.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opensolaris.opengrok.analysis.archive;
2525

@@ -61,11 +61,23 @@ public Genre getGenre() {
6161
protected GZIPAnalyzer(FileAnalyzerFactory factory) {
6262
super(factory);
6363
}
64-
private FileAnalyzer fa;
64+
65+
/**
66+
* Gets a version number to be used to tag processed documents so that
67+
* re-analysis can be re-done later if a stored version number is different
68+
* from the current implementation.
69+
* @return 20180111_00
70+
*/
71+
@Override
72+
protected int getSpecializedVersionNo() {
73+
return 20180111_00; // Edit comment above too!
74+
}
6575

6676
@Override
6777
public void analyze(Document doc, StreamSource src, Writer xrefOut)
6878
throws IOException, InterruptedException {
79+
FileAnalyzer fa;
80+
6981
StreamSource gzSrc = wrap(src);
7082
String path = doc.get("path");
7183
if (path != null

0 commit comments

Comments
 (0)