Skip to content

Commit 2edc70d

Browse files
authored
Merge pull request github#14390 from igfoo/igfoo/compr
Kotlin: Improve support for TRAP compression options
2 parents fe57cd0 + ed9502f commit 2edc70d

File tree

4 files changed

+78
-50
lines changed

4 files changed

+78
-50
lines changed

java/kotlin-extractor/src/main/java/com/semmle/extractor/java/OdasaOutput.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
import com.semmle.util.trap.dependencies.TrapSet;
5050
import com.semmle.util.trap.pathtransformers.PathTransformer;
5151

52+
import com.github.codeql.Compression;
53+
5254
public class OdasaOutput {
5355
private final File trapFolder;
5456
private final File sourceArchiveFolder;
@@ -63,16 +65,18 @@ public class OdasaOutput {
6365
private final boolean trackClassOrigins;
6466

6567
private final Logger log;
68+
private final Compression compression;
6669

6770
/** DEBUG only: just use the given file as the root for TRAP, source archive etc */
68-
OdasaOutput(File outputRoot, Logger log) {
71+
OdasaOutput(File outputRoot, Compression compression, Logger log) {
6972
this.trapFolder = new File(outputRoot, "trap");
7073
this.sourceArchiveFolder = new File(outputRoot, "src_archive");
7174
this.trackClassOrigins = false;
7275
this.log = log;
76+
this.compression = compression;
7377
}
7478

75-
public OdasaOutput(boolean trackClassOrigins, Logger log) {
79+
public OdasaOutput(boolean trackClassOrigins, Compression compression, Logger log) {
7680
String trapFolderVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_TRAP_DIR", Var.TRAP_FOLDER.name());
7781
if (trapFolderVar == null) {
7882
throw new ResourceError("CODEQL_EXTRACTOR_JAVA_TRAP_DIR was not set");
@@ -85,6 +89,7 @@ public OdasaOutput(boolean trackClassOrigins, Logger log) {
8589
this.sourceArchiveFolder = new File(sourceArchiveVar);
8690
this.trackClassOrigins = trackClassOrigins;
8791
this.log = log;
92+
this.compression = compression;
8893
}
8994

9095
public File getTrapFolder() {
@@ -180,18 +185,18 @@ private File getTrapFileForJarFile(File jarFile) {
180185
return null;
181186
return FileUtil.appendAbsolutePath(
182187
currentSpecFileEntry.getTrapFolder(),
183-
JARS_DIR + "/" + PathTransformer.std().fileAsDatabaseString(jarFile) + ".trap.gz");
188+
JARS_DIR + "/" + PathTransformer.std().fileAsDatabaseString(jarFile) + ".trap" + compression.getExtension());
184189
}
185190

186191
private File getTrapFileForModule(String moduleName) {
187192
return FileUtil.appendAbsolutePath(
188193
currentSpecFileEntry.getTrapFolder(),
189-
MODULES_DIR + "/" + moduleName + ".trap.gz");
194+
MODULES_DIR + "/" + moduleName + ".trap" + compression.getExtension());
190195
}
191196

192197
private File trapFileFor(File file) {
193198
return FileUtil.appendAbsolutePath(currentSpecFileEntry.getTrapFolder(),
194-
PathTransformer.std().fileAsDatabaseString(file) + ".trap.gz");
199+
PathTransformer.std().fileAsDatabaseString(file) + ".trap" + compression.getExtension());
195200
}
196201

197202
private File getTrapFileForDecl(IrElement sym, String signature) {
@@ -214,7 +219,7 @@ private String trapFilePathForDecl(IrElement sym, String signature) {
214219
binaryName.replace('.', '/') +
215220
signature +
216221
".members" +
217-
".trap.gz";
222+
".trap" + compression.getExtension();
218223
return result;
219224
}
220225

@@ -245,7 +250,7 @@ private TrapFileManager getMembersWriterForDecl(File trap, File trapFileBase, Tr
245250
// don't need to rewrite it only to rename it
246251
// again.
247252
File trapFileDir = trap.getParentFile();
248-
File trapOld = new File(trapFileDir, trap.getName().replace(".trap.gz", ".trap-old.gz"));
253+
File trapOld = new File(trapFileDir, trap.getName().replace(".trap" + compression.getExtension(), ".trap-old" + compression.getExtension()));
249254
if (trapOld.exists()) {
250255
log.trace("Not rewriting trap file for " + trap.toString() + " as the trap-old exists");
251256
return null;
@@ -272,7 +277,7 @@ private TrapFileManager getMembersWriterForDecl(File trap, File trapFileBase, Tr
272277
}
273278

274279
private TrapFileManager trapWriter(File trapFile, IrElement sym, String signature) {
275-
if (!trapFile.getName().endsWith(".trap.gz"))
280+
if (!trapFile.getName().endsWith(".trap" + compression.getExtension()))
276281
throw new CatastrophicError("OdasaOutput only supports writing to compressed trap files");
277282
String relative = FileUtil.relativePath(trapFile, currentSpecFileEntry.getTrapFolder());
278283
trapFile.getParentFile().mkdirs();
@@ -321,7 +326,7 @@ public void close() {
321326
writeTrapDependencies(trapDependenciesForClass);
322327
}
323328
private void writeTrapDependencies(TrapDependencies trapDependencies) {
324-
String dep = trapDependencies.trapFile().replace(".trap.gz", ".dep");
329+
String dep = trapDependencies.trapFile().replace(".trap" + compression.getExtension(), ".dep");
325330
trapDependencies.save(
326331
currentSpecFileEntry.getTrapFolder().toPath().resolve(dep));
327332
}
@@ -335,7 +340,7 @@ public void setHasError() {
335340
* Trap file locking.
336341
*/
337342

338-
private final Pattern selectClassVersionComponents = Pattern.compile("(.*)#(-?[0-9]+)\\.(-?[0-9]+)-(-?[0-9]+)-(.*)\\.trap\\.gz");
343+
private final Pattern selectClassVersionComponents = Pattern.compile("(.*)#(-?[0-9]+)\\.(-?[0-9]+)-(-?[0-9]+)-(.*)\\.trap.*");
339344

340345
/**
341346
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
@@ -412,12 +417,12 @@ private TrapLocker(IrElement decl, String signature, boolean fromSource) {
412417
trapFileVersion = new TrapClassVersion(0, 0, 0, "kotlin");
413418
else
414419
trapFileVersion = TrapClassVersion.fromSymbol(sym, log);
415-
String baseName = normalTrapFile.getName().replace(".trap.gz", "");
420+
String baseName = normalTrapFile.getName().replace(".trap" + compression.getExtension(), "");
416421
// If a class has lots of inner classes, then we get lots of files
417422
// in a single directory. This makes our directory listings later slow.
418423
// To avoid this, rather than using files named .../Foo*, we use .../Foo/Foo*.
419424
trapFileBase = new File(new File(normalTrapFile.getParentFile(), baseName), baseName);
420-
trapFile = new File(trapFileBase.getPath() + '#' + trapFileVersion.toString() + ".trap.gz");
425+
trapFile = new File(trapFileBase.getPath() + '#' + trapFileVersion.toString() + ".trap" + compression.getExtension());
421426
}
422427
}
423428
private TrapLocker(File jarFile) {
@@ -488,7 +493,7 @@ public int compare(Pair<File, TrapClassVersion> p1, Pair<File, TrapClassVersion>
488493
for (Pair<File, TrapClassVersion> p: pairs) {
489494
if (!latestVersion.equals(p.snd())) {
490495
File f = p.fst();
491-
File fOld = new File(f.getParentFile(), f.getName().replace(".trap.gz", ".trap-old.gz"));
496+
File fOld = new File(f.getParentFile(), f.getName().replace(".trap" + compression.getExtension(), ".trap-old" + compression.getExtension()));
492497
// We aren't interested in whether or not this succeeds;
493498
// it may fail because a concurrent extractor has already
494499
// renamed it.

java/kotlin-extractor/src/main/kotlin/ExternalDeclExtractor.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import java.util.ArrayList
1414
import java.util.HashSet
1515
import java.util.zip.GZIPOutputStream
1616

17-
class ExternalDeclExtractor(val logger: FileLogger, val invocationTrapFile: String, val sourceFilePath: String, val primitiveTypeMapping: PrimitiveTypeMapping, val pluginContext: IrPluginContext, val globalExtensionState: KotlinExtractorGlobalState, val diagnosticTrapWriter: DiagnosticTrapWriter) {
17+
class ExternalDeclExtractor(val logger: FileLogger, val compression: Compression, val invocationTrapFile: String, val sourceFilePath: String, val primitiveTypeMapping: PrimitiveTypeMapping, val pluginContext: IrPluginContext, val globalExtensionState: KotlinExtractorGlobalState, val diagnosticTrapWriter: DiagnosticTrapWriter) {
1818

1919
val declBinaryNames = HashMap<IrDeclaration, String>()
2020
val externalDeclsDone = HashSet<Pair<String, String>>()
@@ -23,7 +23,7 @@ class ExternalDeclExtractor(val logger: FileLogger, val invocationTrapFile: Stri
2323
val propertySignature = ";property"
2424
val fieldSignature = ";field"
2525

26-
val output = OdasaOutput(false, logger).also {
26+
val output = OdasaOutput(false, compression, logger).also {
2727
it.setCurrentSourceFile(File(sourceFilePath))
2828
}
2929

@@ -65,7 +65,7 @@ class ExternalDeclExtractor(val logger: FileLogger, val invocationTrapFile: Stri
6565
val trapFile = manager.file
6666
val trapTmpFile = File.createTempFile("${trapFile.nameWithoutExtension}.", ".${trapFile.extension}.tmp", trapFile.parentFile)
6767
try {
68-
GZIPOutputStream(trapTmpFile.outputStream()).bufferedWriter().use {
68+
compression.bufferedWriter(trapTmpFile).use {
6969
extractorFn(it, signature, manager)
7070
}
7171

java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ private fun doFile(
334334
// Now elevate to a SourceFileTrapWriter, and populate the
335335
// file information
336336
val sftw = tw.makeSourceFileTrapWriter(srcFile, true)
337-
val externalDeclExtractor = ExternalDeclExtractor(logger, invocationTrapFile, srcFilePath, primitiveTypeMapping, pluginContext, globalExtensionState, fileTrapWriter.getDiagnosticTrapWriter())
337+
val externalDeclExtractor = ExternalDeclExtractor(logger, compression, invocationTrapFile, srcFilePath, primitiveTypeMapping, pluginContext, globalExtensionState, fileTrapWriter.getDiagnosticTrapWriter())
338338
val linesOfCode = LinesOfCode(logger, sftw, srcFile)
339339
val fileExtractor = KotlinFileExtractor(logger, sftw, linesOfCode, srcFilePath, null, externalDeclExtractor, primitiveTypeMapping, pluginContext, KotlinFileExtractor.DeclarationStack(), globalExtensionState)
340340

@@ -362,7 +362,19 @@ private fun doFile(
362362
}
363363
}
364364

365-
enum class Compression { NONE, GZIP }
365+
enum class Compression(val extension: String) {
366+
NONE("") {
367+
override fun bufferedWriter(file: File): BufferedWriter {
368+
return file.bufferedWriter()
369+
}
370+
},
371+
GZIP(".gz") {
372+
override fun bufferedWriter(file: File): BufferedWriter {
373+
return GZIPOutputStream(file.outputStream()).bufferedWriter()
374+
}
375+
};
376+
abstract fun bufferedWriter(file: File): BufferedWriter
377+
}
366378

367379
private fun getTrapFileWriter(compression: Compression, logger: FileLogger, trapFileName: String): TrapFileWriter {
368380
return when (compression) {
Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,66 @@
11
from create_database_utils import *
22

3-
def check_extension(directory, expected_extension):
3+
def check_extensions(directory, counts):
44
if platform.system() == 'Windows':
55
# It's important that the path is a Unicode path on Windows, so
66
# that the right system calls get used.
77
directory = u'' + directory
88
if not directory.startswith("\\\\?\\"):
99
directory = "\\\\?\\" + os.path.abspath(directory)
1010

11-
if expected_extension == '.trap':
12-
# We start TRAP files with a comment
13-
expected_start = b'//'
14-
elif expected_extension == '.trap.gz':
15-
# The GZip magic numbers
16-
expected_start = b'\x1f\x8b'
17-
else:
18-
raise Exception('Unknown expected extension ' + expected_extension)
19-
count = check_extension_worker(directory, expected_extension, expected_start)
20-
if count != 1:
21-
raise Exception('Expected 1 relevant file, but found ' + str(count) + ' in ' + directory)
11+
check_extensions_worker(counts, directory)
12+
check_counts('non-compressed', counts.expected_none, counts.count_none)
13+
check_counts('gzipped', counts.expected_gzip, counts.count_gzip)
2214

23-
def check_extension_worker(directory, expected_extension, expected_start):
24-
count = 0
15+
def check_counts(name, expected, count):
16+
if expected == -1:
17+
if count < 10:
18+
raise Exception('Expected lots of ' + name + ' files, but got ' + str(count))
19+
elif expected != count:
20+
raise Exception('Expected ' + str(expected) + ' ' + name + ' files, but got ' + str(count))
21+
22+
class Counts:
23+
def __init__(self, expected_none, expected_gzip):
24+
self.expected_none = expected_none
25+
self.expected_gzip = expected_gzip
26+
self.count_none = 0
27+
self.count_gzip = 0
28+
29+
def check_extensions_worker(counts, directory):
2530
for f in os.listdir(directory):
2631
x = os.path.join(directory, f)
2732
if os.path.isdir(x):
28-
count += check_extension_worker(x, expected_extension, expected_start)
29-
else:
30-
if f.startswith('test.kt') and not f.endswith('.set'):
31-
if f.endswith(expected_extension):
32-
with open(x, 'rb') as f_in:
33-
content = f_in.read()
34-
if content.startswith(expected_start):
35-
count += 1
36-
else:
37-
raise Exception('Unexpected start to content of ' + x)
38-
else:
39-
raise Exception('Expected test.kt TRAP file to have extension ' + expected_extension + ', but found ' + x)
40-
return count
33+
check_extensions_worker(counts, x)
34+
elif f.endswith('.trap'):
35+
counts.count_none += 1
36+
if not startsWith(x, b'//'): # We start TRAP files with a comment
37+
raise Exception("TRAP file that doesn't start with a comment: " + f)
38+
elif f.endswith('.trap.gz'):
39+
counts.count_gzip += 1
40+
if not startsWith(x, b'\x1f\x8b'): # The GZip magic numbers
41+
raise Exception("GZipped TRAP file that doesn't start with GZip magic numbers: " + f)
42+
43+
def startsWith(f, b):
44+
with open(f, 'rb') as f_in:
45+
content = f_in.read()
46+
return content.startswith(b)
4147

48+
# In the counts, we expect lots of files of the compression type chosen
49+
# (so expected count is -1), but the diagnostic TRAP files will always
50+
# be uncompressed (so count_none is always 1 or -1) and the
51+
# sourceLocationPrefix TRAP file is always gzipped (so count_gzip is
52+
# always 1 or -1).
4253
run_codeql_database_create(['kotlinc test.kt'], test_db="default-db", db=None, lang="java")
43-
check_extension('default-db/trap', '.trap.gz')
54+
check_extensions('default-db/trap', Counts(1, -1))
4455
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "nOnE"
4556
run_codeql_database_create(['kotlinc test.kt'], test_db="none-db", db=None, lang="java")
46-
check_extension('none-db/trap', '.trap')
57+
check_extensions('none-db/trap', Counts(-1, 1))
4758
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "gzip"
4859
run_codeql_database_create(['kotlinc test.kt'], test_db="gzip-db", db=None, lang="java")
49-
check_extension('gzip-db/trap', '.trap.gz')
60+
check_extensions('gzip-db/trap', Counts(1, -1))
5061
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "brotli"
5162
run_codeql_database_create(['kotlinc test.kt'], test_db="brotli-db", db=None, lang="java")
52-
check_extension('brotli-db/trap', '.trap.gz')
63+
check_extensions('brotli-db/trap', Counts(1, -1))
5364
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "invalidValue"
5465
run_codeql_database_create(['kotlinc test.kt'], test_db="invalid-db", db=None, lang="java")
55-
check_extension('invalid-db/trap', '.trap.gz')
66+
check_extensions('invalid-db/trap', Counts(1, -1))

0 commit comments

Comments
 (0)