Skip to content

Commit 0a0e955

Browse files
committed
Use DirectoryExtraReader to supplement filesystem metadata
1 parent 3a8a81e commit 0a0e955

File tree

14 files changed

+575
-20
lines changed

14 files changed

+575
-20
lines changed

src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,20 @@ public void populateDocument(Document doc, File file, String path,
429429
doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES));
430430
}
431431

432+
/*
433+
* Use the parent of the path -- not the absolute file as is done for
434+
* FULLPATH -- so that DIRPATH is the same convention as for PATH
435+
* above. A StringField, however, is used instead of a TextField.
436+
*/
437+
File fpath = new File(path);
438+
String fileParent = fpath.getParent();
439+
if (fileParent != null && fileParent.length() > 0) {
440+
String normalizedPath = QueryBuilder.normalizeDirPath(fileParent);
441+
StringField npstring = new StringField(QueryBuilder.DIRPATH,
442+
normalizedPath, Store.NO);
443+
doc.add(npstring);
444+
}
445+
432446
if (fa != null) {
433447
Genre g = fa.getGenre();
434448
if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) {
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
package org.opensolaris.opengrok.search;
26+
27+
import java.io.File;
28+
29+
/**
30+
* Represents a pairing of {@link File} along with supplemental
31+
* {@link FileExtra}.
32+
*/
33+
public class DirectoryEntry {
34+
35+
private final File file;
36+
private final FileExtra extra;
37+
38+
/**
39+
* Initializes an instance with a specified, required {@link File}.
40+
* @param file a defined instance
41+
*/
42+
public DirectoryEntry(File file) {
43+
this(file, null);
44+
}
45+
46+
/**
47+
* Initializes an instance with a specified, required {@link File} and
48+
* a possible {@link FileExtra}.
49+
* @param file a defined instance
50+
* @param extra an optional instance
51+
*/
52+
public DirectoryEntry(File file, FileExtra extra) {
53+
if (file == null) {
54+
throw new IllegalArgumentException("`file' is null");
55+
}
56+
this.file = file;
57+
this.extra = extra;
58+
}
59+
60+
/**
61+
* @return the file
62+
*/
63+
public File getFile() {
64+
return file;
65+
}
66+
67+
/**
68+
* @return the (optional) extra file data
69+
*/
70+
public FileExtra getExtra() {
71+
return extra;
72+
}
73+
}
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opensolaris.opengrok.search;
25+
26+
import java.io.IOException;
27+
import java.util.ArrayList;
28+
import java.util.List;
29+
import java.util.logging.Level;
30+
import java.util.logging.Logger;
31+
import org.apache.lucene.document.Document;
32+
import org.apache.lucene.queryparser.classic.ParseException;
33+
import org.apache.lucene.search.IndexSearcher;
34+
import org.apache.lucene.search.Query;
35+
import org.apache.lucene.search.ScoreDoc;
36+
import org.apache.lucene.search.TopDocs;
37+
import org.opensolaris.opengrok.logger.LoggerFactory;
38+
39+
/**
40+
* Represents a searcher to supplement metadata from the file-system with
41+
* per-file, OpenGrok-analyzed data.
42+
*/
43+
public class DirectoryExtraReader {
44+
45+
// N.b.: update #search() comment when changing
46+
private final int DIR_LIMIT_NUM = 2000;
47+
48+
private static final Logger LOGGER = LoggerFactory.getLogger(
49+
DirectoryExtraReader.class);
50+
51+
/**
52+
* Search for supplemental file information in the specified {@code path}.
53+
* @param searcher a defined instance
54+
* @param path a defined path to qualify the search
55+
* @return a list of results, limited to 2000 values
56+
* @throws IOException if an error occurs searching the index
57+
*/
58+
public List<FileExtra> search(IndexSearcher searcher, String path)
59+
throws IOException {
60+
if (searcher == null) {
61+
throw new IllegalArgumentException("`searcher' is null");
62+
}
63+
if (path == null) {
64+
throw new IllegalArgumentException("`path' is null");
65+
}
66+
67+
QueryBuilder qbuild = new QueryBuilder();
68+
qbuild.setDirPath(path);
69+
Query query;
70+
try {
71+
query = qbuild.build();
72+
} catch (ParseException e) {
73+
final String PARSE_ERROR =
74+
"An error occured while parsing dirpath query";
75+
LOGGER.log(Level.WARNING, PARSE_ERROR, e);
76+
throw new IOException(PARSE_ERROR);
77+
}
78+
79+
TopDocs hits = searcher.search(query, DIR_LIMIT_NUM);
80+
List<FileExtra> results = processHits(searcher, hits);
81+
return results;
82+
}
83+
84+
private List<FileExtra> processHits(IndexSearcher searcher, TopDocs hits)
85+
throws IOException {
86+
87+
List<FileExtra> results = new ArrayList<>();
88+
89+
for (ScoreDoc sd : hits.scoreDocs) {
90+
Document d = searcher.doc(sd.doc);
91+
String filepath = d.get(QueryBuilder.PATH);
92+
Integer numlines = tryParseInt(d.get(QueryBuilder.NUML));
93+
Integer loc = tryParseInt(d.get(QueryBuilder.LOC));
94+
FileExtra extra = new FileExtra(filepath, numlines, loc);
95+
results.add(extra);
96+
}
97+
98+
return results;
99+
}
100+
101+
private static Integer tryParseInt(String value) {
102+
if (value == null) return null;
103+
try {
104+
return Integer.parseInt(value);
105+
} catch (NumberFormatException e) {
106+
return null;
107+
}
108+
}
109+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opensolaris.opengrok.search;
25+
26+
/**
27+
* Represents supplemental, per-file data stored after OpenGrok analysis.
28+
*/
29+
public class FileExtra {
30+
31+
private final String filepath;
32+
private final Integer numlines;
33+
private final Integer loc;
34+
35+
/**
36+
* Initializes an instance with specified file path, number of lines, and
37+
* lines-of-code.
38+
* @param filepath the file path
39+
* @param numlines the number of lines (null if unknown)
40+
* @param loc the lines-of-code (null if unknown)
41+
*/
42+
public FileExtra(String filepath, Integer numlines, Integer loc) {
43+
this.filepath = filepath;
44+
this.numlines = numlines;
45+
this.loc = loc;
46+
}
47+
48+
/**
49+
* @return the file path
50+
*/
51+
public String getFilepath() {
52+
return filepath;
53+
}
54+
55+
/**
56+
* @return the number of lines (null if unknown)
57+
*/
58+
public Integer getNumlines() {
59+
return numlines;
60+
}
61+
62+
/**
63+
* @return the lines-of-code (null if unknown)
64+
*/
65+
public Integer getLoc() {
66+
return loc;
67+
}
68+
}

src/org/opensolaris/opengrok/search/QueryBuilder.java

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
*/
2525
package org.opensolaris.opengrok.search;
2626

27+
import java.io.File;
28+
import java.nio.charset.StandardCharsets;
29+
import java.security.MessageDigest;
30+
import java.security.NoSuchAlgorithmException;
2731
import java.util.ArrayList;
2832
import java.util.Collections;
2933
import java.util.Map;
@@ -41,7 +45,7 @@
4145
public class QueryBuilder {
4246

4347
/**
44-
* Fields we use in lucene public ones
48+
* Fields we use in lucene: public ones
4549
*/
4650
public static final String FULL = "full";
4751
public static final String DEFS = "defs";
@@ -53,14 +57,19 @@ public class QueryBuilder {
5357
public static final String NUML = "numl";
5458
public static final String LOC = "loc";
5559
/**
56-
* Fields we use in lucene internal ones
60+
* Fields we use in lucene: internal ones
5761
*/
5862
public static final String U = "u";
5963
public static final String TAGS = "tags";
6064
public static final String T = "t";
6165
public static final String FULLPATH = "fullpath";
66+
public static final String DIRPATH = "dirpath";
6267
public static final String PROJECT = "project";
6368
public static final String DATE = "date";
69+
70+
/** Used for paths, so SHA-1 is completely sufficient */
71+
private static final String DIRPATH_HASH_ALGORITHM = "SHA-1";
72+
6473
/**
6574
* A map containing the query text for each field. (We use a sorted map here
6675
* only because we have tests that check the generated query string. If we
@@ -145,6 +154,55 @@ public String getPath() {
145154
return getQueryText(PATH);
146155
}
147156

157+
/**
158+
* Set search string for the "dirpath" field.
159+
* @param path query string to set
160+
* @return this instance
161+
*/
162+
public QueryBuilder setDirPath(String path) {
163+
String normalizedPath = normalizeDirPath(path);
164+
return addQueryText(DIRPATH, normalizedPath);
165+
}
166+
167+
/**
168+
* Get search string for the "dirpath" field.
169+
* @return {@code null} if not set; the query string otherwise.
170+
*/
171+
public String getDirPath() {
172+
return getQueryText(DIRPATH);
173+
}
174+
175+
/**
176+
* Transform {@code path} to ensure any {@link File#separatorChar} is
177+
* represented as '/', that there is a trailing '/', and then to hash using
178+
* SHA-1 and formatted in a private encoding using only letters [g-u].
179+
* @param path a defined value
180+
* @return a defined, transformed value
181+
*/
182+
public static String normalizeDirPath(String path) {
183+
String norm1 = path.replace(File.separatorChar, '/');
184+
String norm2 = norm1.endsWith("/") ? norm1 : norm1 + "/";
185+
186+
MessageDigest digest;
187+
try {
188+
digest = MessageDigest.getInstance(DIRPATH_HASH_ALGORITHM);
189+
} catch (NoSuchAlgorithmException e) {
190+
return norm2;
191+
}
192+
byte[] hash = digest.digest(norm2.getBytes(StandardCharsets.UTF_8));
193+
194+
StringBuilder encodedString = new StringBuilder();
195+
for (int i = 0; i < hash.length; ++i) {
196+
int v0 = (0xF0 & hash[i]) >> 4;
197+
int v1 = 0xF & hash[i];
198+
char c0 = (char)('g' + v0);
199+
char c1 = (char)('g' + v1);
200+
encodedString.append(c0);
201+
encodedString.append(c1);
202+
}
203+
return encodedString.toString();
204+
}
205+
148206
/**
149207
* Set search string for the "hist" field.
150208
*
@@ -213,6 +271,7 @@ public boolean isDefSearch() {
213271
&& (getQueryText(REFS) == null)
214272
&& (getQueryText(PATH) == null)
215273
&& (getQueryText(HIST) == null)
274+
&& (getQueryText(DIRPATH) == null)
216275
&& (getQueryText(DEFS) != null));
217276
}
218277

0 commit comments

Comments
 (0)