Skip to content

Commit b296203

Browse files
Lubos KoscoLubos Kosco
authored andcommitted
fixes for FilePath searches being broken (in QueryBuilder),
cleanups, path tokenizer fixes and tests fixes issue #9
1 parent 743ae7d commit b296203

File tree

8 files changed

+240
-64
lines changed

8 files changed

+240
-64
lines changed

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,12 @@
7171
import org.opensolaris.opengrok.history.HistoryException;
7272
import org.opensolaris.opengrok.history.HistoryGuru;
7373
import org.opensolaris.opengrok.history.HistoryReader;
74+
import org.opensolaris.opengrok.search.QueryBuilder;
7475
import org.opensolaris.opengrok.web.Util;
7576

7677
/**
7778
* Manages and provides Analyzers as needed. Please see
78-
* <a href="http://www.opensolaris.org/os/project/opengrok/manual/internals/">
79+
* <a href="https://github.com/OpenGrok/OpenGrok/wiki/OpenGrok-Internals">
7980
* this</a> page for a great description of the purpose of the AnalyzerGuru.
8081
*
8182
* Created on September 22, 2005
@@ -89,27 +90,27 @@ public class AnalyzerGuru {
8990

9091
/** Map from file names to analyzer factories. */
9192
private static final Map<String, FileAnalyzerFactory>
92-
FILE_NAMES = new HashMap<String, FileAnalyzerFactory>();
93+
FILE_NAMES = new HashMap<>();
9394

9495
/** Map from file extensions to analyzer factories. */
9596
private static final Map<String, FileAnalyzerFactory>
96-
ext = new HashMap<String, FileAnalyzerFactory>();
97+
ext = new HashMap<>();
9798

9899
// @TODO: have a comparator
99100
/** Map from magic strings to analyzer factories. */
100101
private static final SortedMap<String, FileAnalyzerFactory>
101-
magics = new TreeMap<String, FileAnalyzerFactory>();
102+
magics = new TreeMap<>();
102103

103104
/**
104105
* List of matcher objects which can be used to determine which analyzer
105106
* factory to use.
106107
*/
107108
private static final List<FileAnalyzerFactory.Matcher>
108-
matchers = new ArrayList<FileAnalyzerFactory.Matcher>();
109+
matchers = new ArrayList<>();
109110

110111
/** List of all registered {@code FileAnalyzerFactory} instances. */
111112
private static final List<FileAnalyzerFactory>
112-
factories = new ArrayList<FileAnalyzerFactory>();
113+
factories = new ArrayList<>();
113114

114115
public static final Reader dummyR = new StringReader("");
115116
public static final String dummyS = "";
@@ -237,41 +238,41 @@ public static FileAnalyzer getAnalyzer(InputStream in, String file) throws IOExc
237238
* @param xrefOut Where to write the xref (possibly {@code null})
238239
* @return The Lucene document to add to the index database
239240
* @throws java.io.IOException If an exception occurs while collecting the
240-
* datas
241+
* data
241242
*/
242243
public Document getDocument(File file, String path,
243244
FileAnalyzer fa, Writer xrefOut)
244245
throws IOException {
245246
Document doc = new Document();
246247
String date = DateTools.timeToString(file.lastModified(),
247248
DateTools.Resolution.MILLISECOND);
248-
doc.add(new Field("u", Util.path2uid(path, date),
249+
doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date),
249250
string_ft_stored_nanalyzed_norms));
250-
doc.add(new Field("fullpath", file.getAbsolutePath(),
251+
doc.add(new Field(QueryBuilder.FULLPATH, file.getAbsolutePath(),
251252
string_ft_nstored_nanalyzed_norms));
252253

253254
try {
254255
HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(file);
255256
if (hr != null) {
256-
doc.add(new TextField("hist", hr));
257+
doc.add(new TextField(QueryBuilder.HIST, hr));
257258
// date = hr.getLastCommentDate() //RFE
258259
}
259260
} catch (HistoryException e) {
260261
OpenGrokLogger.getLogger().log(Level.WARNING, "An error occurred while reading history: ", e);
261262
}
262-
doc.add(new Field("date", date, string_ft_stored_nanalyzed_norms));
263+
doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms));
263264
if (path != null) {
264-
doc.add(new TextField("path", path, Store.YES));
265+
doc.add(new TextField(QueryBuilder.PATH, path, Store.YES));
265266
Project project = Project.getProject(path);
266267
if (project != null) {
267-
doc.add(new TextField("project", project.getPath(), Store.YES));
268+
doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES));
268269
}
269270
}
270271

271272
if (fa != null) {
272273
Genre g = fa.getGenre();
273274
if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) {
274-
doc.add(new Field("t", g.typeName(), string_ft_stored_nanalyzed_norms
275+
doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms
275276
));
276277
}
277278
fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
@@ -301,9 +302,9 @@ public static String getContentType(InputStream in, String file) throws IOExcept
301302
}
302303

303304
/**
304-
* Write a browsable version of the file
305+
* Write a browse-able version of the file
305306
*
306-
* @param factory The analyzer factory for this filetype
307+
* @param factory The analyzer factory for this file type
307308
* @param in The input stream containing the data
308309
* @param out Where to write the result
309310
* @param defs definitions for the source file, if available
@@ -329,7 +330,7 @@ public static void writeXref(FileAnalyzerFactory factory, Reader in,
329330
/**
330331
* Get the genre of a file
331332
*
332-
* @param file The file to inpect
333+
* @param file The file to inspect
333334
* @return The genre suitable to decide how to display the file
334335
*/
335336
public static Genre getGenre(String file) {
@@ -440,7 +441,7 @@ public static FileAnalyzerFactory find(InputStream in, String file)
440441
*/
441442
public static FileAnalyzerFactory find(String file) {
442443
String path = file;
443-
int i = 0;
444+
int i;
444445
if (((i = path.lastIndexOf('/')) > 0 || (i = path.lastIndexOf('\\')) > 0)
445446
&& (i + 1 < path.length())) {
446447
path = path.substring(i + 1);
@@ -458,7 +459,7 @@ public static FileAnalyzerFactory find(String file) {
458459
}
459460

460461
/**
461-
* Finds a suitable analyser class for the data in this stream
462+
* Finds a suitable analyzer class for the data in this stream
462463
*
463464
* @param in The stream containing the data to analyze
464465
* @return the analyzer factory to use
@@ -497,7 +498,7 @@ public static FileAnalyzerFactory find(InputStream in) throws IOException {
497498
}
498499

499500
/**
500-
* Finds a suitable analyser class for a magic signature
501+
* Finds a suitable analyzer class for a magic signature
501502
*
502503
* @param signature the magic signature look up
503504
* @return the analyzer factory to use
@@ -536,7 +537,7 @@ private static FileAnalyzerFactory find(byte[] signature)
536537

537538
/** Byte-order markers. */
538539
private static final Map<String, byte[]> BOMS =
539-
new HashMap<String, byte[]>();
540+
new HashMap<>();
540541
static {
541542
BOMS.put("UTF-8", new byte[] {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
542543
BOMS.put("UTF-16BE", new byte[] {(byte) 0xFE, (byte) 0xFF});

src/org/opensolaris/opengrok/analysis/CompatibleAnalyser.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.lucene.analysis.Analyzer;
2727
import org.opensolaris.opengrok.analysis.plain.PlainFullTokenizer;
2828
import org.opensolaris.opengrok.analysis.plain.PlainSymbolTokenizer;
29+
import org.opensolaris.opengrok.search.QueryBuilder;
2930

3031
public class CompatibleAnalyser extends Analyzer {
3132

@@ -36,16 +37,16 @@ public CompatibleAnalyser() {
3637
@Override
3738
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
3839
switch (fieldName) {
39-
case "full":
40+
case QueryBuilder.FULL:
4041
return new TokenStreamComponents(new PlainFullTokenizer(reader));
41-
case "refs":
42+
case QueryBuilder.REFS:
4243
return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
43-
case "defs":
44+
case QueryBuilder.DEFS:
4445
return new TokenStreamComponents(new PlainSymbolTokenizer(reader));
45-
case "path":
46-
case "project":
46+
case QueryBuilder.PATH:
47+
case QueryBuilder.PROJECT:
4748
return new TokenStreamComponents(new PathTokenizer(reader));
48-
case "hist":
49+
case QueryBuilder.HIST:
4950
return new HistoryAnalyzer().createComponents(fieldName, reader);
5051
default:
5152
return new TokenStreamComponents(new PlainFullTokenizer(reader));

src/org/opensolaris/opengrok/analysis/PathTokenizer.java

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,31 +27,54 @@
2727
import java.util.Arrays;
2828
import org.apache.lucene.analysis.Tokenizer;
2929
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30+
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
31+
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
3032

33+
/**
34+
* Tokenizer for paths filenames and extensions Input:
35+
*
36+
* <pre>
37+
* /topdir/subdir/filename.ext
38+
* </pre>
39+
*
40+
* Output:
41+
*
42+
* <pre>
43+
* topdir
44+
* subdir
45+
* filename
46+
* .
47+
* ext
48+
* </pre>
49+
*/
3150
public class PathTokenizer extends Tokenizer {
3251

33-
// below should be '/' since we try to convert even windows file separators to unix ones
34-
private static final char dirSep = '/';
35-
private boolean dot = false;
36-
private static final char ADOT[]={'.'};
52+
// below should be '/' since we try to convert even windows file separators
53+
// to unix ones
54+
public static final char DEFAULT_DELIMITER = '/';
3755
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
56+
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
57+
private int startPosition = 0;
58+
private final char delimiter;
59+
private int charsRead = 0;
60+
private boolean dot = false;
61+
private static final char cdot = '.';
3862

3963
public PathTokenizer(Reader input) {
40-
super(input);
41-
}
42-
43-
@Override
44-
public void reset() throws IOException {
45-
super.reset();
46-
dot = false;
64+
super(input);
65+
this.delimiter = DEFAULT_DELIMITER;
4766
}
4867

4968
@Override
5069
public final boolean incrementToken() throws IOException {
5170
clearAttributes();
5271
if (dot) {
53-
dot = false;
54-
termAtt.copyBuffer(ADOT,0,1);
72+
dot = false;
73+
termAtt.setEmpty();
74+
termAtt.append(cdot);
75+
termAtt.setLength(1);
76+
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + 1));
77+
startPosition++;
5578
return true;
5679
}
5780

@@ -60,22 +83,42 @@ public final boolean incrementToken() throws IOException {
6083
int i = 0;
6184
do {
6285
c = input.read();
86+
charsRead++;
6387
if (c == -1) {
6488
return false;
6589
}
66-
} while (c == dirSep);
90+
} while (c == delimiter);
6791

6892
do {
6993
if (i >= buf.length) {
7094
buf = Arrays.copyOf(buf, buf.length * 2);
7195
}
7296
buf[i++] = Character.toLowerCase((char) c);
7397
c = input.read();
74-
} while (c != dirSep && c != '.' && !Character.isWhitespace(c) && c != -1);
75-
if (c == '.') {
98+
charsRead++;
99+
} while ( c != delimiter && c != cdot && !Character.isWhitespace(c) && c != -1);
100+
if (c == cdot) {
76101
dot = true;
77-
}
102+
}
78103
termAtt.copyBuffer(buf, 0, i);
104+
termAtt.setLength(i);
105+
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + i));
106+
startPosition = startPosition + i + 1;
79107
return true;
80108
}
109+
110+
@Override
111+
public final void end() {
112+
// set final offset
113+
int finalOffset = correctOffset(charsRead);
114+
offsetAtt.setOffset(finalOffset, finalOffset);
115+
}
116+
117+
@Override
118+
public void reset() throws IOException {
119+
super.reset();
120+
dot=false;
121+
charsRead = 0;
122+
startPosition = 0;
123+
}
81124
}

src/org/opensolaris/opengrok/index/IndexDatabase.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ public class IndexDatabase {
9999
private Ctags ctags;
100100
private LockFactory lockfact;
101101
private final BytesRef emptyBR = new BytesRef("");
102+
103+
//Directory where we store indexes
104+
private static final String INDEX_DIR="index";
102105

103106
/**
104107
* Create a new instance of the Index Database. Use this constructor if you
@@ -143,7 +146,7 @@ public static void updateAll(ExecutorService executor) throws IOException {
143146
*/
144147
static void updateAll(ExecutorService executor, IndexChangedListener listener) throws IOException {
145148
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
146-
List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
149+
List<IndexDatabase> dbs = new ArrayList<>();
147150

148151
if (env.hasProjects()) {
149152
for (Project project : env.getProjects()) {
@@ -182,7 +185,7 @@ public void run() {
182185
*/
183186
public static void update(ExecutorService executor, IndexChangedListener listener, List<String> paths) throws IOException {
184187
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
185-
List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
188+
List<IndexDatabase> dbs = new ArrayList<>();
186189

187190
for (String path : paths) {
188191
Project project = Project.getProject(path);
@@ -236,7 +239,7 @@ public void run() {
236239
private void initialize() throws IOException {
237240
synchronized (this) {
238241
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
239-
File indexDir = new File(env.getDataRootFile(), "index");
242+
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
240243
File spellDir = new File(env.getDataRootFile(), "spellIndex");
241244
if (project != null) {
242245
indexDir = new File(indexDir, project.getPath());
@@ -267,10 +270,10 @@ private void initialize() throws IOException {
267270
if (env.isGenerateHtml()) {
268271
xrefDir = new File(env.getDataRootFile(), "xref");
269272
}
270-
listeners = new ArrayList<IndexChangedListener>();
273+
listeners = new ArrayList<>();
271274
dirtyFile = new File(indexDir, "dirty");
272275
dirty = dirtyFile.exists();
273-
directories = new ArrayList<String>();
276+
directories = new ArrayList<>();
274277
}
275278
}
276279

@@ -363,7 +366,7 @@ public void update() throws IOException, HistoryException {
363366
if (numDocs > 0) {
364367
Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
365368
terms = uFields.terms(QueryBuilder.U);
366-
}
369+
}
367370

368371
try {
369372
if (numDocs > 0) {
@@ -444,7 +447,7 @@ public void update() throws IOException, HistoryException {
444447
* @throws IOException if an error occurs
445448
*/
446449
static void optimizeAll(ExecutorService executor) throws IOException {
447-
List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
450+
List<IndexDatabase> dbs = new ArrayList<>();
448451
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
449452
if (env.hasProjects()) {
450453
for (Project project : env.getProjects()) {
@@ -805,7 +808,7 @@ private boolean isLocal(String path) {
805808
*
806809
*/
807810
private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException {
808-
int lcur_count = cur_count;
811+
int lcur_count = cur_count;
809812
if (isInterrupted()) {
810813
return lcur_count;
811814
}
@@ -959,7 +962,7 @@ public static void listAllFiles(List<String> subFiles) throws IOException {
959962
public void listFiles() throws IOException {
960963
IndexReader ireader = null;
961964
TermsEnum iter=null;
962-
Terms terms = null;
965+
Terms terms = null;
963966

964967
try {
965968
ireader = DirectoryReader.open(indexDirectory); // open existing index
@@ -1064,7 +1067,7 @@ public static IndexReader getIndexReader(String path) {
10641067
IndexReader ret = null;
10651068

10661069
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1067-
File indexDir = new File(env.getDataRootFile(), "index");
1070+
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
10681071

10691072
if (env.hasProjects()) {
10701073
Project p = Project.getProject(path);

0 commit comments

Comments
 (0)