oracle
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/GZIPAnalyzer.java
Lines changed: 2 additions & 2 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/GZIPAnalyzer.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java
Lines changed: 2 additions & 2 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java
Lines changed: 5 additions & 3 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java
Lines changed: 5 additions & 3 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/index/PendingFileCompleter.java
Lines changed: 2 additions & 1 deletion b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/index/PendingFileCompleter.java
Lines changed: 2 additions & 1 deletion
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java
Lines changed: 7 additions & 5 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java
Lines changed: 7 additions & 5 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java
Lines changed: 3 additions & 1 deletion b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java
Lines changed: 3 additions & 1 deletion
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/util/TandemFilename.java
Lines changed: 173 additions & 0 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/util/TandemFilename.java
Lines changed: 173 additions & 0 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/util/TandemPath.java
Lines changed: 61 additions & 0 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/util/TandemPath.java
Lines changed: 61 additions & 0 deletions
@@ -27,6 +27,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Writer;
+import java.util.Locale;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.zip.GZIPInputStream;
@@ -79,8 +80,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut)
 
         StreamSource gzSrc = wrap(src);
         String path = doc.get("path");
-        if (path != null
-                && (path.endsWith(".gz") || path.endsWith(".GZ") || path.endsWith(".Gz"))) {
+        if (path != null && path.toLowerCase(Locale.ROOT).endsWith(".gz")) {
             String newname = path.substring(0, path.length() - 3);
             //System.err.println("GZIPPED OF = " + newname);
             try (InputStream gzis = gzSrc.getStream()) {
 
@@ -59,6 +59,7 @@
 import org.opengrok.indexer.logger.LoggerFactory;
 import org.opengrok.indexer.util.ForbiddenSymlinkException;
 import org.opengrok.indexer.util.IOUtils;
+import org.opengrok.indexer.util.TandemPath;
 
 /*
  * Class representing file based storage of per source file history.
@@ -213,13 +214,12 @@ private static File getCachedFile(File file) throws HistoryException,
                 sb.append(File.separator);
                 sb.append(DIRECTORY_FILE_PREFIX);
             }
-            sb.append(".gz");
         } catch (IOException e) {
             throw new HistoryException("Failed to get path relative to " +
                     "source root for " + file, e);
         }
 
-        return new File(sb.toString());
+        return new File(TandemPath.join(sb.toString(), ".gz"));
     }
 
     /**
 
@@ -98,6 +98,7 @@
 import org.opengrok.indexer.util.IOUtils;
 import org.opengrok.indexer.util.ObjectPool;
 import org.opengrok.indexer.util.Statistics;
+import org.opengrok.indexer.util.TandemPath;
 import org.opengrok.indexer.web.Util;
 
 import javax.ws.rs.client.ClientBuilder;
@@ -685,7 +686,8 @@ private void setDirty() {
     }
 
     private File whatXrefFile(String path, boolean compress) {
-        return new File(xrefDir, path + (compress ? ".gz" : ""));
+        String xrefPath = compress ? TandemPath.join(path, ".gz") : path;
+        return new File(xrefDir, xrefPath);
     }
 
     /**
@@ -1612,8 +1614,8 @@ private Writer newXrefWriter(FileAnalyzer fa, String path)
 
             // Write to a pending file for later renaming.
             String xrefAbs = xrefFile.getAbsolutePath();
-            File transientXref = new File(xrefAbs +
-                PendingFileCompleter.PENDING_EXTENSION);
+            File transientXref = new File(TandemPath.join(xrefAbs,
+                PendingFileCompleter.PENDING_EXTENSION));
             PendingFileRenaming ren = new PendingFileRenaming(xrefAbs,
                 transientXref.getAbsolutePath());
             completer.add(ren);
 
@@ -44,6 +44,7 @@
 import java.util.logging.Logger;
 import java.util.stream.Collectors;
 import org.opengrok.indexer.logger.LoggerFactory;
+import org.opengrok.indexer.util.TandemPath;
 
 /**
  * Represents a tracker of pending file deletions and renamings that can later
@@ -319,7 +320,7 @@ private int completeLinkages() throws IOException {
     }
 
     private void doDelete(PendingFileDeletionExec del) throws IOException {
-        File f = new File(del.absolutePath + PENDING_EXTENSION);
+        File f = new File(TandemPath.join(del.absolutePath, PENDING_EXTENSION));
         File parent = f.getParentFile();
         del.absoluteParent = parent;
 
 
@@ -56,6 +56,7 @@
 import org.opengrok.indexer.history.HistoryException;
 import org.opengrok.indexer.logger.LoggerFactory;
 import org.opengrok.indexer.util.IOUtils;
+import org.opengrok.indexer.util.TandemPath;
 import org.opengrok.indexer.web.Prefix;
 import org.opengrok.indexer.web.SearchHelper;
 import org.opengrok.indexer.web.Util;
@@ -116,9 +117,9 @@ private static String getTags(File basedir, String path, boolean compressed) {
             int len = r.read(content);
             return new String(content, 0, len);
         } catch (Exception e) {
-            LOGGER.log(
-                    Level.WARNING, "An error reading tags from " + basedir + path
-                    + (compressed ? ".gz" : ""), e);
+            String fnm = compressed ? TandemPath.join(basedir + path, ".gz") :
+                    basedir + path;
+            LOGGER.log(Level.WARNING, "An error reading tags from " + fnm, e);
         }
         return "";
     }
@@ -127,13 +128,14 @@ private static String getTags(File basedir, String path, boolean compressed) {
     private static Reader getXrefReader(
                     File basedir, String path, boolean compressed)
             throws IOException {
-        /**
+        /*
          * For backward compatibility, read the OpenGrok-produced document
          * using the system default charset.
          */
         if (compressed) {
             return new BufferedReader(IOUtils.createBOMStrippedReader(
-                    new GZIPInputStream(new FileInputStream(new File(basedir, path + ".gz")))));
+                    new GZIPInputStream(new FileInputStream(new File(basedir,
+                            TandemPath.join(path, ".gz"))))));
         } else {
             return new BufferedReader(IOUtils.createBOMStrippedReader(
                     new FileInputStream(new File(basedir, path))));
 
@@ -67,6 +67,7 @@
 import org.opengrok.indexer.search.Summary.Fragment;
 import org.opengrok.indexer.search.context.Context;
 import org.opengrok.indexer.search.context.HistoryContext;
+import org.opengrok.indexer.util.TandemPath;
 import org.opengrok.indexer.web.PageConfig;
 import org.opengrok.indexer.web.Prefix;
 import org.opengrok.indexer.web.ProjectHelper;
@@ -521,7 +522,8 @@ public void results(int start, int end, List<Hit> ret) {
                              * default charset.
                              */
                             try (Reader r = RuntimeEnvironment.getInstance().isCompressXref()
-                                    ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(data + Prefix.XREF_P + filename + ".gz")))))
+                                    ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(
+                                            TandemPath.join(data + Prefix.XREF_P + filename, ".gz"))))))
                                     : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
                                 l = r.read(content);
                             }
 
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2018, Chris Fraire <[email protected]>.
+ */
+
+package org.opengrok.indexer.util;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Arrays;
+import java.util.Base64;
+
+/**
+ * Represents a utility class for creating a filename to operate in tandem with
+ * an original filename by adding a new file extension but limiting the length
+ * of the new filename to 255 UTF-8 encoded bytes if necessary by truncating
+ * and packing in a Base64-encoded SHA-256 hash of the original file name.
+ */
+public class TandemFilename {
+
+    private static final int MAX_BYTES = 255;
+
+    /**
+     * One fewer than {@link #MAX_BYTES} as a cap for simple concatenation to
+     * avoid the possibility of easily fabricating a collision against this
+     * algorithm. I.e., a 255 byte tandem filename will always include a
+     * computed hash and not just be the concatenation of original filename
+     * plus new extension.
+     */
+    private static final int MAX_CAT_BYTES = MAX_BYTES - 1;
+
+    /**
+     * "Instances of Base64.Encoder class are safe for use by multiple
+     * concurrent threads." --Oracle.
+     */
+    private static final Base64.Encoder encoder = Base64.getUrlEncoder();
+
+    /** private to enforce static */
+    private TandemFilename() {
+    }
+
+    /**
+     * Appends an ASCII extension to the specified {@code filename}, truncating
+     * and packing in a SHA-256 hash if the UTF-8 encoding would exceed 254
+     * bytes and arriving at a final size of 255 bytes in that special case.
+     * @param filename a defined instance
+     * @param asciiExtension a defined instance that is expected to be only
+     *                       ASCII so that its UTF-8 form is the same length
+     * @return a transformed filename whose UTF-8 encoding is not more than 255
+     * bytes.
+     * @throws IllegalArgumentException thrown if {@code filename} has a
+     * parent or if {@code asciiExtension} is too long to allow packing a
+     * SHA-256 hash in the transformation.
+     */
+    public static String join(String filename, String asciiExtension) {
+
+        File file = new File(filename);
+        if (file.getParent() != null) {
+            throw new IllegalArgumentException("filename can't have parent");
+        }
+
+        /*
+         * If the original filename length * 4 (for longest possible UTF-8
+         * encoding) plus asciiExtension length is not greater than one less
+         * than 255, then quickly return the concatenation.
+         */
+        if (filename.length() * 4 + asciiExtension.length() <= MAX_CAT_BYTES) {
+            return filename + asciiExtension;
+        }
+        return maybePackSha(filename, asciiExtension);
+    }
+
+    private static String maybePackSha(String filename, String asciiExtension) {
+
+        byte[] uFilename = filename.getBytes(StandardCharsets.UTF_8);
+        int nBytes = uFilename.length;
+        if (nBytes + asciiExtension.length() <= MAX_CAT_BYTES) {
+            // Here the UTF-8 encoding already allows for the new extension.
+            return filename + asciiExtension;
+        }
+
+        /*
+         * If filename has an ASCII extension already (of a reasonable length),
+         * shift it to the new asciiExtension so that it won't be overwritten
+         * by the packed hash.
+         */
+        int pos = filename.lastIndexOf('.');
+        int extLength = filename.length() - pos;
+        if (pos >= 0 && extLength < 30 && extLength > 1) {
+            int i;
+            for (i = pos + 1; i < filename.length(); ++i) {
+                char ch = filename.charAt(i);
+                if (!Character.isLetterOrDigit(ch) || ch > 'z') {
+                    break;
+                }
+            }
+            if (i >= filename.length()) {
+                // By this point, we affirmed a letters/numbers extension.
+                asciiExtension = filename.substring(pos) + asciiExtension;
+                filename = filename.substring(0, pos);
+                uFilename = filename.getBytes(StandardCharsets.UTF_8);
+                nBytes = uFilename.length;
+            }
+        }
+
+        // Pack the hash just before the file extension.
+        asciiExtension = sha256base64(filename) + asciiExtension;
+
+        /*
+         * Now trim the filename by code points until the full UTF-8 encoding
+         * fits within MAX_BYTES.
+         */
+        int newLength = filename.length();
+        while (nBytes + asciiExtension.length() > MAX_BYTES) {
+            int cp = filename.codePointBefore(newLength);
+            int nChars = Character.charCount(cp);
+            String c = filename.substring(newLength - nChars, newLength);
+            nBytes -= c.getBytes(StandardCharsets.UTF_8).length;
+            newLength -= nChars;
+
+            if (newLength <= 0) {
+                throw new IllegalArgumentException("asciiExtension too long");
+            }
+        }
+
+        // Pad if necessary to exactly MAX_BYTES.
+        if (nBytes + asciiExtension.length() != MAX_BYTES) {
+            char[] pad = new char[MAX_BYTES - nBytes - asciiExtension.length()];
+            Arrays.fill(pad, '_');
+            asciiExtension = new String(pad) + asciiExtension;
+        }
+
+        return filename.substring(0, newLength) + asciiExtension;
+    }
+
+    private static String sha256base64(String value) {
+
+        MessageDigest hasher;
+        try {
+            hasher = MessageDigest.getInstance("SHA-256");
+        } catch (NoSuchAlgorithmException e) {
+            /*
+             * This will not happen since "Every implementation of the Java
+             * platform is required to support the following standard
+             * MessageDigest algorithms: MD5, SHA-1, SHA-256."
+             */
+            throw new RuntimeException(e);
+        }
+
+        byte[] digest = hasher.digest(value.getBytes(StandardCharsets.UTF_8));
+        return encoder.encodeToString(digest);
+    }
+}
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2018, Chris Fraire <[email protected]>.
+ */
+
+package org.opengrok.indexer.util;
+
+import java.io.File;
+
+/**
+ * Represents a utility class for creating a path to operate in tandem with
+ * an original path by adding a new file extension but limiting the length
+ * of the filename component of the new path to 255 UTF-8 encoded bytes if
+ * necessary by truncating and packing in a Base64-encoded SHA-256 hash of the
+ * original file name component.
+ */
+public class TandemPath {
+
+    /** private to enforce static */
+    private TandemPath() {
+    }
+
+    /**
+     * Appends an ASCII extension to the specified {@code filePath}, truncating
+     * and packing in a SHA-256 hash if the UTF-8 encoding of the filename
+     * component of the path would exceed 254 bytes and arriving at a final
+     * size of 255 bytes in that special case.
+     * @param filePath a defined instance
+     * @param asciiExtension a defined instance that is expected to be only
+     *                       ASCII so that its UTF-8 form is the same length
+     * @return a transformed path whose filename component's UTF-8 encoding is
+     * not more than 255 bytes.
+     * @throws IllegalArgumentException {@code asciiExtension} is too long to
+     * allow packing a SHA-256 hash in the transformation.
+     */
+    public static String join(String filePath, String asciiExtension) {
+
+        File file = new File(filePath);
+        String newName = TandemFilename.join(file.getName(), asciiExtension);
+        File newFile = new File(file.getParent(), newName);
+        return newFile.getPath();
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,7 @@`
`59`	`59`	`import org.opengrok.indexer.logger.LoggerFactory;`
`60`	`60`	`import org.opengrok.indexer.util.ForbiddenSymlinkException;`
`61`	`61`	`import org.opengrok.indexer.util.IOUtils;`
	`62`	`+import org.opengrok.indexer.util.TandemPath;`
`62`	`63`
`63`	`64`	`/*`
`64`	`65`	`* Class representing file based storage of per source file history.`
`@@ -213,13 +214,12 @@ private static File getCachedFile(File file) throws HistoryException,`
`213`	`214`	`sb.append(File.separator);`
`214`	`215`	`sb.append(DIRECTORY_FILE_PREFIX);`
`215`	`216`	`}`
`216`		`- sb.append(".gz");`
`217`	`217`	`} catch (IOException e) {`
`218`	`218`	`throw new HistoryException("Failed to get path relative to " +`
`219`	`219`	`"source root for " + file, e);`
`220`	`220`	`}`
`221`	`221`
`222`		`- return new File(sb.toString());`
	`222`	`+ return new File(TandemPath.join(sb.toString(), ".gz"));`
`223`	`223`	`}`
`224`	`224`
`225`	`225`	`/**`