Skip to content

Commit 0595269

Browse files
committed
introduce prefix matching
fixes #641 fixes #604
1 parent 83439b4 commit 0595269

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+190
-79
lines changed

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public class AnalyzerGuru {
9898
private static final Map<String, FileAnalyzerFactory>
9999
ext = new HashMap<>();
100100

101+
/** Map from file prefixes to analyzer factories. */
102+
private static final Map<String, FileAnalyzerFactory>
103+
pre = new HashMap<>();
104+
101105
// @TODO: have a comparator
102106
/** Map from magic strings to analyzer factories. */
103107
private static final SortedMap<String, FileAnalyzerFactory>
@@ -141,7 +145,7 @@ public class AnalyzerGuru {
141145
new CxxAnalyzerFactory(),
142146
new ShAnalyzerFactory(),
143147
PlainAnalyzerFactory.DEFAULT_INSTANCE,
144-
new UuencodeAnalyzerFactory(),
148+
new UuencodeAnalyzerFactory(),
145149
new GZIPAnalyzerFactory(),
146150
new JavaAnalyzerFactory(),
147151
new JavaScriptAnalyzerFactory(),
@@ -177,6 +181,11 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) {
177181
assert old == null :
178182
"name '" + name + "' used in multiple analyzers";
179183
}
184+
for (String prefix : factory.getPrefixes()) {
185+
FileAnalyzerFactory old = pre.put(prefix, factory);
186+
assert old == null :
187+
"prefix '" + prefix + "' used in multiple analyzers";
188+
}
180189
for (String suffix : factory.getSuffixes()) {
181190
FileAnalyzerFactory old = ext.put(suffix, factory);
182191
assert old == null :
@@ -191,6 +200,24 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) {
191200
factories.add(factory);
192201
}
193202

203+
/**
204+
* Instruct the AnalyzerGuru to use a given analyzer for a given
205+
* file prefix.
206+
* @param prefix the file prefix to add
207+
* @param factory a factory which creates
208+
* the analyzer to use for the given extension
209+
* (if you pass null as the analyzer, you will disable
210+
* the analyzer used for that extension)
211+
*/
212+
public static void addPrefix(String prefix,
213+
FileAnalyzerFactory factory) {
214+
if (factory == null) {
215+
pre.remove(prefix);
216+
} else {
217+
pre.put(prefix, factory);
218+
}
219+
}
220+
194221
/**
195222
* Instruct the AnalyzerGuru to use a given analyzer for a given
196223
* file extension.
@@ -445,19 +472,37 @@ public static FileAnalyzerFactory find(InputStream in, String file)
445472
public static FileAnalyzerFactory find(String file) {
446473
String path = file;
447474
int i;
448-
if (((i = path.lastIndexOf('/')) > 0 || (i = path.lastIndexOf('\\')) > 0)
475+
476+
// Get basename of the file first.
477+
if (((i = path.lastIndexOf(File.separatorChar)) > 0)
449478
&& (i + 1 < path.length())) {
450479
path = path.substring(i + 1);
451480
}
481+
452482
int dotpos = path.lastIndexOf('.');
453483
if (dotpos >= 0) {
454-
FileAnalyzerFactory factory =
484+
FileAnalyzerFactory factory;
485+
486+
// Try matching the prefix.
487+
if (dotpos > 0) {
488+
factory =
489+
pre.get(path.substring(0, dotpos).toUpperCase(Locale.getDefault()));
490+
if (factory != null) {
491+
return factory;
492+
}
493+
}
494+
495+
// Now try matching the suffix. We kind of consider this order (first
496+
// prefix then suffix) to be workable although for sure there can be
497+
// cases when this does not work.
498+
factory =
455499
ext.get(path.substring(dotpos + 1).toUpperCase(Locale.getDefault()));
456500
if (factory != null) {
457501
return factory;
458502
}
459503
}
460-
// file doesn't have any of the extensions we know, try full match
504+
505+
// file doesn't have any of the prefix or extensions we know, try full match
461506
return FILE_NAMES.get(path.toUpperCase(Locale.getDefault()));
462507
}
463508

src/org/opensolaris/opengrok/analysis/FileAnalyzerFactory.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ public class FileAnalyzerFactory {
4444
private final ThreadLocal<FileAnalyzer> cachedAnalyzer;
4545
/** List of file names on which this kind of analyzer should be used. */
4646
private final List<String> names;
47+
/** List of file prefixes on which this kind of analyzer should be
48+
* used. */
49+
private final List<String> prefixes;
4750
/** List of file extensions on which this kind of analyzer should be
4851
* used. */
4952
private final List<String> suffixes;
@@ -62,14 +65,15 @@ public class FileAnalyzerFactory {
6265
* Create an instance of {@code FileAnalyzerFactory}.
6366
*/
6467
FileAnalyzerFactory() {
65-
this(null, null, null, null, null, null);
68+
this(null, null, null, null, null, null, null);
6669
}
6770

6871
/**
6972
* Construct an instance of {@code FileAnalyzerFactory}. This constructor
7073
* should be used by subclasses to override default values.
7174
*
7275
* @param names list of file names to recognize (possibly {@code null})
76+
* @param prefixes list of prefixes to recognize (possibly {@code null})
7377
* @param suffixes list of suffixes to recognize (possibly {@code null})
7478
* @param magics list of magic strings to recognize (possibly {@code null})
7579
* @param matcher a matcher for this analyzer (possibly {@code null})
@@ -78,11 +82,12 @@ public class FileAnalyzerFactory {
7882
* Genre.DATA} is used)
7983
*/
8084
protected FileAnalyzerFactory(
81-
String[] names, String[] suffixes, String[] magics,
82-
Matcher matcher, String contentType,
85+
String[] names, String[] prefixes, String[] suffixes,
86+
String[] magics, Matcher matcher, String contentType,
8387
Genre genre) {
8488
cachedAnalyzer = new ThreadLocal<FileAnalyzer>();
8589
this.names = asList(names);
90+
this.prefixes = asList(prefixes);
8691
this.suffixes = asList(suffixes);
8792
this.magics = asList(magics);
8893
if (matcher == null) {
@@ -117,6 +122,14 @@ final List<String> getFileNames() {
117122
return names;
118123
}
119124

125+
/**
126+
* Get the list of file prefixes recognized by this analyzer.
127+
* @return list of prefixes
128+
*/
129+
final List<String> getPrefixes() {
130+
return prefixes;
131+
}
132+
120133
/**
121134
* Get the list of file extensions recognized by this analyzer.
122135
* @return list of suffixes

src/org/opensolaris/opengrok/analysis/PathTokenizer.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import org.apache.lucene.analysis.Tokenizer;
2929
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
3030
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
31-
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
3231

3332
/**
3433
* Tokenizer for paths filenames and extensions Input:

src/org/opensolaris/opengrok/analysis/archive/BZip2AnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public class BZip2AnalyzerFactory extends FileAnalyzerFactory {
3636
};
3737

3838
public BZip2AnalyzerFactory() {
39-
super(null, SUFFIXES, MAGICS, null, null, null);
39+
super(null, null, SUFFIXES, MAGICS, null, null, null);
4040
}
4141

4242
@Override

src/org/opensolaris/opengrok/analysis/archive/GZIPAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class GZIPAnalyzerFactory extends FileAnalyzerFactory {
3737
};
3838

3939
public GZIPAnalyzerFactory() {
40-
super(null, SUFFIXES, MAGICS, null, null, null);
40+
super(null, null, SUFFIXES, MAGICS, null, null, null);
4141
}
4242

4343
@Override

src/org/opensolaris/opengrok/analysis/archive/TarAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public class TarAnalyzerFactory extends FileAnalyzerFactory {
3434
};
3535

3636
public TarAnalyzerFactory() {
37-
super(null, SUFFIXES, null, null, null, Genre.XREFABLE);
37+
super(null, null, SUFFIXES, null, null, null, Genre.XREFABLE);
3838
}
3939

4040
@Override

src/org/opensolaris/opengrok/analysis/archive/ZipAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ public FileAnalyzerFactory isMagic(byte[] contents, InputStream in)
9999
new ZipAnalyzerFactory();
100100

101101
private ZipAnalyzerFactory() {
102-
super(null, SUFFIXES, null, MATCHER, null, Genre.XREFABLE);
102+
super(null, null, SUFFIXES, null, MATCHER, null, Genre.XREFABLE);
103103
}
104104

105105
@Override

src/org/opensolaris/opengrok/analysis/c/CAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public class CAnalyzerFactory extends FileAnalyzerFactory {
4949
};
5050

5151
public CAnalyzerFactory() {
52-
super(null, SUFFIXES, null, null, "text/plain", Genre.PLAIN);
52+
super(null, null, SUFFIXES, null, null, "text/plain", Genre.PLAIN);
5353
}
5454

5555
@Override

src/org/opensolaris/opengrok/analysis/c/CxxAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class CxxAnalyzerFactory extends FileAnalyzerFactory {
4545
};
4646

4747
public CxxAnalyzerFactory() {
48-
super(null, SUFFIXES, null, null, "text/plain", Genre.PLAIN);
48+
super(null, null, SUFFIXES, null, null, "text/plain", Genre.PLAIN);
4949
}
5050

5151
@Override

src/org/opensolaris/opengrok/analysis/csharp/CSharpAnalyzerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public class CSharpAnalyzerFactory extends FileAnalyzerFactory {
4343
};
4444

4545
public CSharpAnalyzerFactory() {
46-
super(null, SUFFIXES, MAGICS, null, "text/plain", Genre.PLAIN);
46+
super(null, null, SUFFIXES, MAGICS, null, "text/plain", Genre.PLAIN);
4747
}
4848

4949
@Override

0 commit comments

Comments
 (0)