Skip to content

Commit 4da26a1

Browse files
idodeclareVladimir Kotal
authored andcommitted
Fix #2604, add TandemFilename and TandemPath
Also: - Do case-insensitive comparison for .gz in GZIPAnalyzer. - Add a serialVersionUID to silence lint.
1 parent 0d4f4d3 commit 4da26a1

File tree

15 files changed

+522
-28
lines changed

15 files changed

+522
-28
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/GZIPAnalyzer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.io.IOException;
2828
import java.io.InputStream;
2929
import java.io.Writer;
30+
import java.util.Locale;
3031
import java.util.logging.Level;
3132
import java.util.logging.Logger;
3233
import java.util.zip.GZIPInputStream;
@@ -79,8 +80,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut)
7980

8081
StreamSource gzSrc = wrap(src);
8182
String path = doc.get("path");
82-
if (path != null
83-
&& (path.endsWith(".gz") || path.endsWith(".GZ") || path.endsWith(".Gz"))) {
83+
if (path != null && path.toLowerCase(Locale.ROOT).endsWith(".gz")) {
8484
String newname = path.substring(0, path.length() - 3);
8585
//System.err.println("GZIPPED OF = " + newname);
8686
try (InputStream gzis = gzSrc.getStream()) {

opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import org.opengrok.indexer.logger.LoggerFactory;
6060
import org.opengrok.indexer.util.ForbiddenSymlinkException;
6161
import org.opengrok.indexer.util.IOUtils;
62+
import org.opengrok.indexer.util.TandemPath;
6263

6364
/*
6465
* Class representing file based storage of per source file history.
@@ -213,13 +214,12 @@ private static File getCachedFile(File file) throws HistoryException,
213214
sb.append(File.separator);
214215
sb.append(DIRECTORY_FILE_PREFIX);
215216
}
216-
sb.append(".gz");
217217
} catch (IOException e) {
218218
throw new HistoryException("Failed to get path relative to " +
219219
"source root for " + file, e);
220220
}
221221

222-
return new File(sb.toString());
222+
return new File(TandemPath.join(sb.toString(), ".gz"));
223223
}
224224

225225
/**

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
import org.opengrok.indexer.util.IOUtils;
9999
import org.opengrok.indexer.util.ObjectPool;
100100
import org.opengrok.indexer.util.Statistics;
101+
import org.opengrok.indexer.util.TandemPath;
101102
import org.opengrok.indexer.web.Util;
102103

103104
import javax.ws.rs.client.ClientBuilder;
@@ -685,7 +686,8 @@ private void setDirty() {
685686
}
686687

687688
private File whatXrefFile(String path, boolean compress) {
688-
return new File(xrefDir, path + (compress ? ".gz" : ""));
689+
String xrefPath = compress ? TandemPath.join(path, ".gz") : path;
690+
return new File(xrefDir, xrefPath);
689691
}
690692

691693
/**
@@ -1612,8 +1614,8 @@ private Writer newXrefWriter(FileAnalyzer fa, String path)
16121614

16131615
// Write to a pending file for later renaming.
16141616
String xrefAbs = xrefFile.getAbsolutePath();
1615-
File transientXref = new File(xrefAbs +
1616-
PendingFileCompleter.PENDING_EXTENSION);
1617+
File transientXref = new File(TandemPath.join(xrefAbs,
1618+
PendingFileCompleter.PENDING_EXTENSION));
16171619
PendingFileRenaming ren = new PendingFileRenaming(xrefAbs,
16181620
transientXref.getAbsolutePath());
16191621
completer.add(ren);

opengrok-indexer/src/main/java/org/opengrok/indexer/index/PendingFileCompleter.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import java.util.logging.Logger;
4545
import java.util.stream.Collectors;
4646
import org.opengrok.indexer.logger.LoggerFactory;
47+
import org.opengrok.indexer.util.TandemPath;
4748

4849
/**
4950
* Represents a tracker of pending file deletions and renamings that can later
@@ -319,7 +320,7 @@ private int completeLinkages() throws IOException {
319320
}
320321

321322
private void doDelete(PendingFileDeletionExec del) throws IOException {
322-
File f = new File(del.absolutePath + PENDING_EXTENSION);
323+
File f = new File(TandemPath.join(del.absolutePath, PENDING_EXTENSION));
323324
File parent = f.getParentFile();
324325
del.absoluteParent = parent;
325326

opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.opengrok.indexer.history.HistoryException;
5757
import org.opengrok.indexer.logger.LoggerFactory;
5858
import org.opengrok.indexer.util.IOUtils;
59+
import org.opengrok.indexer.util.TandemPath;
5960
import org.opengrok.indexer.web.Prefix;
6061
import org.opengrok.indexer.web.SearchHelper;
6162
import org.opengrok.indexer.web.Util;
@@ -116,9 +117,9 @@ private static String getTags(File basedir, String path, boolean compressed) {
116117
int len = r.read(content);
117118
return new String(content, 0, len);
118119
} catch (Exception e) {
119-
LOGGER.log(
120-
Level.WARNING, "An error reading tags from " + basedir + path
121-
+ (compressed ? ".gz" : ""), e);
120+
String fnm = compressed ? TandemPath.join(basedir + path, ".gz") :
121+
basedir + path;
122+
LOGGER.log(Level.WARNING, "An error reading tags from " + fnm, e);
122123
}
123124
return "";
124125
}
@@ -127,13 +128,14 @@ private static String getTags(File basedir, String path, boolean compressed) {
127128
private static Reader getXrefReader(
128129
File basedir, String path, boolean compressed)
129130
throws IOException {
130-
/**
131+
/*
131132
* For backward compatibility, read the OpenGrok-produced document
132133
* using the system default charset.
133134
*/
134135
if (compressed) {
135136
return new BufferedReader(IOUtils.createBOMStrippedReader(
136-
new GZIPInputStream(new FileInputStream(new File(basedir, path + ".gz")))));
137+
new GZIPInputStream(new FileInputStream(new File(basedir,
138+
TandemPath.join(path, ".gz"))))));
137139
} else {
138140
return new BufferedReader(IOUtils.createBOMStrippedReader(
139141
new FileInputStream(new File(basedir, path))));

opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import org.opengrok.indexer.search.Summary.Fragment;
6868
import org.opengrok.indexer.search.context.Context;
6969
import org.opengrok.indexer.search.context.HistoryContext;
70+
import org.opengrok.indexer.util.TandemPath;
7071
import org.opengrok.indexer.web.PageConfig;
7172
import org.opengrok.indexer.web.Prefix;
7273
import org.opengrok.indexer.web.ProjectHelper;
@@ -521,7 +522,8 @@ public void results(int start, int end, List<Hit> ret) {
521522
* default charset.
522523
*/
523524
try (Reader r = RuntimeEnvironment.getInstance().isCompressXref()
524-
? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(data + Prefix.XREF_P + filename + ".gz")))))
525+
? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(
526+
TandemPath.join(data + Prefix.XREF_P + filename, ".gz"))))))
525527
: new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
526528
l = r.read(content);
527529
}
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2018, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opengrok.indexer.util;
25+
26+
import java.io.File;
27+
import java.nio.charset.StandardCharsets;
28+
import java.security.MessageDigest;
29+
import java.security.NoSuchAlgorithmException;
30+
import java.util.Arrays;
31+
import java.util.Base64;
32+
33+
/**
34+
* Represents a utility class for creating a filename to operate in tandem with
35+
* an original filename by adding a new file extension but limiting the length
36+
* of the new filename to 255 UTF-8 encoded bytes if necessary by truncating
37+
* and packing in a Base64-encoded SHA-256 hash of the original file name.
38+
*/
39+
public class TandemFilename {
40+
41+
private static final int MAX_BYTES = 255;
42+
43+
/**
44+
* One fewer than {@link #MAX_BYTES} as a cap for simple concatenation to
45+
* avoid the possibility of easily fabricating a collision against this
46+
* algorithm. I.e., a 255 byte tandem filename will always include a
47+
* computed hash and not just be the concatenation of original filename
48+
* plus new extension.
49+
*/
50+
private static final int MAX_CAT_BYTES = MAX_BYTES - 1;
51+
52+
/**
53+
* "Instances of Base64.Encoder class are safe for use by multiple
54+
* concurrent threads." --Oracle.
55+
*/
56+
private static final Base64.Encoder encoder = Base64.getUrlEncoder();
57+
58+
/** private to enforce static */
59+
private TandemFilename() {
60+
}
61+
62+
/**
63+
* Appends an ASCII extension to the specified {@code filename}, truncating
64+
* and packing in a SHA-256 hash if the UTF-8 encoding would exceed 254
65+
* bytes and arriving at a final size of 255 bytes in that special case.
66+
* @param filename a defined instance
67+
* @param asciiExtension a defined instance that is expected to be only
68+
* ASCII so that its UTF-8 form is the same length
69+
* @return a transformed filename whose UTF-8 encoding is not more than 255
70+
* bytes.
71+
* @throws IllegalArgumentException thrown if {@code filename} has a
72+
* parent or if {@code asciiExtension} is too long to allow packing a
73+
* SHA-256 hash in the transformation.
74+
*/
75+
public static String join(String filename, String asciiExtension) {
76+
77+
File file = new File(filename);
78+
if (file.getParent() != null) {
79+
throw new IllegalArgumentException("filename can't have parent");
80+
}
81+
82+
/*
83+
* If the original filename length * 4 (for longest possible UTF-8
84+
* encoding) plus asciiExtension length is not greater than one less
85+
* than 255, then quickly return the concatenation.
86+
*/
87+
if (filename.length() * 4 + asciiExtension.length() <= MAX_CAT_BYTES) {
88+
return filename + asciiExtension;
89+
}
90+
return maybePackSha(filename, asciiExtension);
91+
}
92+
93+
private static String maybePackSha(String filename, String asciiExtension) {
94+
95+
byte[] uFilename = filename.getBytes(StandardCharsets.UTF_8);
96+
int nBytes = uFilename.length;
97+
if (nBytes + asciiExtension.length() <= MAX_CAT_BYTES) {
98+
// Here the UTF-8 encoding already allows for the new extension.
99+
return filename + asciiExtension;
100+
}
101+
102+
/*
103+
* If filename has an ASCII extension already (of a reasonable length),
104+
* shift it to the new asciiExtension so that it won't be overwritten
105+
* by the packed hash.
106+
*/
107+
int pos = filename.lastIndexOf('.');
108+
int extLength = filename.length() - pos;
109+
if (pos >= 0 && extLength < 30 && extLength > 1) {
110+
int i;
111+
for (i = pos + 1; i < filename.length(); ++i) {
112+
char ch = filename.charAt(i);
113+
if (!Character.isLetterOrDigit(ch) || ch > 'z') {
114+
break;
115+
}
116+
}
117+
if (i >= filename.length()) {
118+
// By this point, we affirmed a letters/numbers extension.
119+
asciiExtension = filename.substring(pos) + asciiExtension;
120+
filename = filename.substring(0, pos);
121+
uFilename = filename.getBytes(StandardCharsets.UTF_8);
122+
nBytes = uFilename.length;
123+
}
124+
}
125+
126+
// Pack the hash just before the file extension.
127+
asciiExtension = sha256base64(filename) + asciiExtension;
128+
129+
/*
130+
* Now trim the filename by code points until the full UTF-8 encoding
131+
* fits within MAX_BYTES.
132+
*/
133+
int newLength = filename.length();
134+
while (nBytes + asciiExtension.length() > MAX_BYTES) {
135+
int cp = filename.codePointBefore(newLength);
136+
int nChars = Character.charCount(cp);
137+
String c = filename.substring(newLength - nChars, newLength);
138+
nBytes -= c.getBytes(StandardCharsets.UTF_8).length;
139+
newLength -= nChars;
140+
141+
if (newLength <= 0) {
142+
throw new IllegalArgumentException("asciiExtension too long");
143+
}
144+
}
145+
146+
// Pad if necessary to exactly MAX_BYTES.
147+
if (nBytes + asciiExtension.length() != MAX_BYTES) {
148+
char[] pad = new char[MAX_BYTES - nBytes - asciiExtension.length()];
149+
Arrays.fill(pad, '_');
150+
asciiExtension = new String(pad) + asciiExtension;
151+
}
152+
153+
return filename.substring(0, newLength) + asciiExtension;
154+
}
155+
156+
private static String sha256base64(String value) {
157+
158+
MessageDigest hasher;
159+
try {
160+
hasher = MessageDigest.getInstance("SHA-256");
161+
} catch (NoSuchAlgorithmException e) {
162+
/*
163+
* This will not happen since "Every implementation of the Java
164+
* platform is required to support the following standard
165+
* MessageDigest algorithms: MD5, SHA-1, SHA-256."
166+
*/
167+
throw new RuntimeException(e);
168+
}
169+
170+
byte[] digest = hasher.digest(value.getBytes(StandardCharsets.UTF_8));
171+
return encoder.encodeToString(digest);
172+
}
173+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2018, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opengrok.indexer.util;
25+
26+
import java.io.File;
27+
28+
/**
29+
* Represents a utility class for creating a path to operate in tandem with
30+
* an original path by adding a new file extension but limiting the length
31+
* of the filename component of the new path to 255 UTF-8 encoded bytes if
32+
* necessary by truncating and packing in a Base64-encoded SHA-256 hash of the
33+
* original file name component.
34+
*/
35+
public class TandemPath {
36+
37+
/** private to enforce static */
38+
private TandemPath() {
39+
}
40+
41+
/**
42+
* Appends an ASCII extension to the specified {@code filePath}, truncating
43+
* and packing in a SHA-256 hash if the UTF-8 encoding of the filename
44+
* component of the path would exceed 254 bytes and arriving at a final
45+
* size of 255 bytes in that special case.
46+
* @param filePath a defined instance
47+
* @param asciiExtension a defined instance that is expected to be only
48+
* ASCII so that its UTF-8 form is the same length
49+
* @return a transformed path whose filename component's UTF-8 encoding is
50+
* not more than 255 bytes.
51+
* @throws IllegalArgumentException {@code asciiExtension} is too long to
52+
* allow packing a SHA-256 hash in the transformation.
53+
*/
54+
public static String join(String filePath, String asciiExtension) {
55+
56+
File file = new File(filePath);
57+
String newName = TandemFilename.join(file.getName(), asciiExtension);
58+
File newFile = new File(file.getParent(), newName);
59+
return newFile.getPath();
60+
}
61+
}

0 commit comments

Comments
 (0)