Skip to content

Commit c460eae

Browse files
committed
implement diagnostics
1 parent a1a2d7c commit c460eae

17 files changed

+254
-97
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 138 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.semmle.js.extractor;
22

33
import java.io.File;
4+
import java.io.FileNotFoundException;
45
import java.io.IOException;
56
import java.io.Reader;
67
import java.lang.ProcessBuilder.Redirect;
@@ -17,6 +18,7 @@
1718
import java.nio.file.attribute.BasicFileAttributes;
1819
import java.util.ArrayList;
1920
import java.util.Arrays;
21+
import java.util.Collections;
2022
import java.util.Comparator;
2123
import java.util.LinkedHashMap;
2224
import java.util.LinkedHashSet;
@@ -27,6 +29,7 @@
2729
import java.util.concurrent.ExecutorService;
2830
import java.util.concurrent.Executors;
2931
import java.util.concurrent.TimeUnit;
32+
import java.util.concurrent.atomic.AtomicInteger;
3033
import java.util.function.Predicate;
3134
import java.util.stream.Collectors;
3235
import java.util.stream.Stream;
@@ -41,11 +44,15 @@
4144
import com.semmle.js.extractor.trapcache.DefaultTrapCache;
4245
import com.semmle.js.extractor.trapcache.DummyTrapCache;
4346
import com.semmle.js.extractor.trapcache.ITrapCache;
47+
import com.semmle.js.parser.ParseError;
4448
import com.semmle.js.parser.ParsedProject;
4549
import com.semmle.ts.extractor.TypeExtractor;
4650
import com.semmle.ts.extractor.TypeScriptParser;
51+
import com.semmle.ts.extractor.TypeScriptWrapperOOMError;
4752
import com.semmle.ts.extractor.TypeTable;
4853
import com.semmle.util.data.StringUtil;
54+
import com.semmle.util.diagnostics.DiagnosticLevel;
55+
import com.semmle.util.diagnostics.DiagnosticWriter;
4956
import com.semmle.util.exception.CatastrophicError;
5057
import com.semmle.util.exception.Exceptions;
5158
import com.semmle.util.exception.ResourceError;
@@ -444,33 +451,127 @@ protected boolean hasSeenCode() {
444451

445452
/** Perform extraction. */
446453
public int run() throws IOException {
447-
startThreadPool();
448-
try {
449-
CompletableFuture<?> sourceFuture = extractSource();
450-
sourceFuture.join(); // wait for source extraction to complete
451-
if (hasSeenCode()) { // don't bother with the externs if no code was seen
452-
extractExterns();
454+
startThreadPool();
455+
try {
456+
CompletableFuture<?> sourceFuture = extractSource();
457+
sourceFuture.join(); // wait for source extraction to complete
458+
if (hasSeenCode()) { // don't bother with the externs if no code was seen
459+
extractExterns();
460+
}
461+
extractXml();
462+
} catch (OutOfMemoryError oom) {
463+
System.err.println("Out of memory while extracting the project.");
464+
return 137; // the CodeQL CLI will interpret this as an out-of-memory error
465+
// purpusely not doing anything else (printing stack, etc.), as the JVM
466+
// basically guarantees nothing after an OOM
467+
} catch (TypeScriptWrapperOOMError oom) {
468+
System.err.println("Out of memory while extracting the project.");
469+
System.err.println(oom.getMessage());
470+
oom.printStackTrace(System.err);
471+
return 137;
472+
} catch (RuntimeException | IOException e) {
473+
writeDiagnostics("Internal error: " + e, JSDiagnosticKind.INTERNAL_ERROR);
474+
e.printStackTrace(System.err);
475+
return 1;
476+
} finally {
477+
shutdownThreadPool();
478+
diagnosticsToClose.forEach(DiagnosticWriter::close);
453479
}
454-
extractXml();
455-
} finally {
456-
shutdownThreadPool();
480+
481+
if (!hasSeenCode()) {
482+
if (seenFiles) {
483+
warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors.");
484+
} else {
485+
warn("No JavaScript or TypeScript code found.");
486+
}
487+
// ensuring that the finalize steps detects that no code was seen.
488+
Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src");
489+
// check that the srcFolder is empty
490+
if (Files.list(srcFolder).count() == 0) {
491+
// Non-recursive delete because "src/" should be empty.
492+
FileUtil8.delete(srcFolder);
493+
}
494+
return 0;
495+
}
496+
return 0;
497+
}
498+
499+
/**
500+
* A kind of error that can happen during extraction of JavaScript or TypeScript
501+
* code.
502+
* For use with the {@link #writeDiagnostics(String, JSDiagnosticKind)} method.
503+
*/
504+
public static enum JSDiagnosticKind {
505+
PARSE_ERROR("parse-error", "Parse error", DiagnosticLevel.Warning),
506+
INTERNAL_ERROR("internal-error", "Internal error", DiagnosticLevel.Debug);
507+
508+
private final String id;
509+
private final String name;
510+
private final DiagnosticLevel level;
511+
512+
private JSDiagnosticKind(String id, String name, DiagnosticLevel level) {
513+
this.id = id;
514+
this.name = name;
515+
this.level = level;
516+
}
517+
518+
public String getId() {
519+
return id;
520+
}
521+
522+
public String getName() {
523+
return name;
524+
}
525+
526+
public DiagnosticLevel getLevel() {
527+
return level;
457528
}
458-
if (!hasSeenCode()) {
459-
if (seenFiles) {
460-
warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors.");
529+
}
530+
531+
private AtomicInteger diagnosticCount = new AtomicInteger(0);
532+
private List<DiagnosticWriter> diagnosticsToClose = Collections.synchronizedList(new ArrayList<>());
533+
private ThreadLocal<DiagnosticWriter> diagnostics = new ThreadLocal<DiagnosticWriter>(){
534+
@Override protected DiagnosticWriter initialValue() {
535+
DiagnosticWriter result = initDiagnosticsWriter(diagnosticCount.incrementAndGet());
536+
diagnosticsToClose.add(result);
537+
return result;
538+
}
539+
};
540+
541+
/**
542+
* Persist a diagnostic message to a file in the diagnostics directory.
543+
* See {@link JSDiagnosticKind} for the kinds of errors that can be reported,
544+
* and see
545+
* {@link DiagnosticWriter} for more details.
546+
*/
547+
public void writeDiagnostics(String message, JSDiagnosticKind error) throws IOException {
548+
if (diagnostics.get() == null) {
549+
warn("No diagnostics directory, so not writing diagnostic: " + message);
550+
return;
551+
}
552+
553+
// DiagnosticLevel level, String extractorName, String sourceId, String sourceName, String markdown
554+
diagnostics.get().writeMarkdown(error.getLevel(), "javascript", "javascript/" + error.getId(), error.getName(),
555+
message);
556+
}
557+
558+
private DiagnosticWriter initDiagnosticsWriter(int count) {
559+
String diagnosticsDir = System.getenv("CODEQL_EXTRACTOR_JAVASCRIPT_DIAGNOSTIC_DIR");
560+
561+
if (diagnosticsDir != null) {
562+
File diagnosticsDirFile = new File(diagnosticsDir);
563+
if (!diagnosticsDirFile.isDirectory()) {
564+
warn("Diagnostics directory " + diagnosticsDir + " does not exist");
461565
} else {
462-
warn("No JavaScript or TypeScript code found.");
463-
}
464-
// ensuring that the finalize steps detects that no code was seen.
465-
Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src");
466-
// check that the srcFolder is empty
467-
if (Files.list(srcFolder).count() == 0) {
468-
// Non-recursive delete because "src/" should be empty.
469-
FileUtil8.delete(srcFolder);
566+
File diagnosticsFile = new File(diagnosticsDirFile, "autobuilder-" + count + ".jsonl");
567+
try {
568+
return new DiagnosticWriter(diagnosticsFile);
569+
} catch (FileNotFoundException e) {
570+
warn("Failed to open diagnostics file " + diagnosticsFile);
571+
}
470572
}
471-
return 0;
472573
}
473-
return 0;
574+
return null;
474575
}
475576

476577
private void startThreadPool() {
@@ -1113,13 +1214,26 @@ private void doExtract(FileExtractor extractor, Path file, ExtractorState state)
11131214

11141215
try {
11151216
long start = logBeginProcess("Extracting " + file);
1116-
Integer loc = extractor.extract(f, state);
1117-
if (!extractor.getConfig().isExterns() && (loc == null || loc != 0)) seenCode = true;
1217+
ParseResultInfo loc = extractor.extract(f, state);
1218+
if (!extractor.getConfig().isExterns() && (loc == null || loc.getLinesOfCode() != 0)) seenCode = true;
11181219
if (!extractor.getConfig().isExterns()) seenFiles = true;
1220+
for (ParseError err : loc.getParseErrors()) {
1221+
String msg = "A parse error occurred: " + err.getMessage() + ". Check the syntax of the file. If the file is invalid, correct the error or exclude the file from analysis.";
1222+
writeDiagnostics(msg, JSDiagnosticKind.PARSE_ERROR);
1223+
}
11191224
logEndProcess(start, "Done extracting " + file);
1225+
} catch (OutOfMemoryError oom) {
1226+
System.err.println("Out of memory while extracting " + file + ".");
1227+
oom.printStackTrace(System.err);
1228+
System.exit(137); // caught by the CodeQL CLI
11201229
} catch (Throwable t) {
11211230
System.err.println("Exception while extracting " + file + ".");
11221231
t.printStackTrace(System.err);
1232+
try {
1233+
writeDiagnostics("Internal error: " + t, JSDiagnosticKind.INTERNAL_ERROR);
1234+
} catch (IOException ignored) {
1235+
// ignore - we are already crashing
1236+
}
11231237
System.exit(1);
11241238
}
11251239
}

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import java.io.FileInputStream;
66
import java.io.FileReader;
77
import java.io.IOException;
8-
import java.nio.charset.Charset;
98
import java.nio.charset.StandardCharsets;
109
import java.nio.file.Path;
1110
import java.util.LinkedHashSet;
@@ -434,7 +433,7 @@ public boolean supports(File f) {
434433
}
435434

436435
/** @return the number of lines of code extracted, or {@code null} if the file was cached */
437-
public Integer extract(File f, ExtractorState state) throws IOException {
436+
public ParseResultInfo extract(File f, ExtractorState state) throws IOException {
438437
FileSnippet snippet = state.getSnippets().get(f.toPath());
439438
if (snippet != null) {
440439
return this.extractSnippet(f.toPath(), snippet, state);
@@ -461,7 +460,7 @@ public Integer extract(File f, ExtractorState state) throws IOException {
461460
* <p>A trap file will be derived from the snippet file, but its file label, source locations, and
462461
* source archive entry are based on the original file.
463462
*/
464-
private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException {
463+
private ParseResultInfo extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException {
465464
TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(file.toFile());
466465

467466
File originalFile = origin.getOriginalFile().toFile();
@@ -495,7 +494,7 @@ private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState sta
495494
* <p>Also note that we support extraction with TRAP writer factories that are not file-backed;
496495
* obviously, no caching is done in that scenario.
497496
*/
498-
private Integer extractContents(
497+
private ParseResultInfo extractContents(
499498
File extractedFile, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
500499
throws IOException {
501500
ExtractionMetrics metrics = new ExtractionMetrics();
@@ -545,15 +544,15 @@ private Integer extractContents(
545544
TextualExtractor textualExtractor =
546545
new TextualExtractor(
547546
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
548-
LoCInfo loc = extractor.extract(textualExtractor);
547+
ParseResultInfo loc = extractor.extract(textualExtractor);
549548
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
550549
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
551550
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
552551
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
553552
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
554553
metrics.writeTimingsToTrap(trapwriter);
555554
successful = true;
556-
return linesOfCode;
555+
return loc;
557556
} finally {
558557
if (!successful && trapwriter instanceof CachingTrapWriter)
559558
((CachingTrapWriter) trapwriter).discard();

javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.File;
44
import java.io.IOException;
55
import java.nio.file.Path;
6+
import java.util.Collections;
67
import java.util.List;
78
import java.util.function.Supplier;
89
import java.util.regex.Matcher;
@@ -29,7 +30,7 @@
2930

3031
/** Extractor for handling HTML and XHTML files. */
3132
public class HTMLExtractor implements IExtractor {
32-
private LoCInfo locInfo = new LoCInfo(0, 0);
33+
private ParseResultInfo locInfo = new ParseResultInfo(0, 0, Collections.emptyList());
3334

3435
private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler {
3536
private final ScopeManager scopeManager;
@@ -212,11 +213,11 @@ public static HTMLExtractor forEmbeddedHtml(ExtractorConfig config) {
212213
}
213214

214215
@Override
215-
public LoCInfo extract(TextualExtractor textualExtractor) throws IOException {
216+
public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException {
216217
return extractEx(textualExtractor).snd();
217218
}
218219

219-
public Pair<List<Label>, LoCInfo> extractEx(TextualExtractor textualExtractor) {
220+
public Pair<List<Label>, ParseResultInfo> extractEx(TextualExtractor textualExtractor) {
220221
// Angular templates contain attribute names that are not valid HTML/XML, such
221222
// as [foo], (foo), [(foo)], and *foo.
222223
// Allow a large number of errors in attribute names, so the Jericho parser does
@@ -369,7 +370,7 @@ private void extractSnippet(
369370
config.getExtractLines(),
370371
textualExtractor.getMetrics(),
371372
textualExtractor.getExtractedFile());
372-
Pair<Label, LoCInfo> result = extractor.extract(tx, source, toplevelKind, scopeManager);
373+
Pair<Label, ParseResultInfo> result = extractor.extract(tx, source, toplevelKind, scopeManager);
373374
Label toplevelLabel = result.fst();
374375
if (toplevelLabel != null) { // can be null when script ends up being parsed as JSON
375376
emitTopLevelXmlNodeBinding(parentLabel, toplevelLabel, trapWriter);
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.semmle.js.extractor;
22

33
import java.io.IOException;
4+
import com.semmle.js.parser.ParseError;
45

56
/** Generic extractor interface. */
67
public interface IExtractor {
@@ -9,5 +10,5 @@ public interface IExtractor {
910
* TextualExtractor}, and return information about the number of lines of code and the number of
1011
* lines of comments extracted.
1112
*/
12-
public LoCInfo extract(TextualExtractor textualExtractor) throws IOException;
13+
public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException;
1314
}

javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.semmle.js.extractor;
22

33
import java.util.ArrayList;
4+
import java.util.Collections;
45
import java.util.regex.Matcher;
56
import java.util.regex.Pattern;
67

@@ -18,6 +19,7 @@
1819
import com.semmle.util.exception.UserError;
1920
import com.semmle.util.trap.TrapWriter;
2021
import com.semmle.util.trap.TrapWriter.Label;
22+
import com.semmle.js.extractor.ParseResultInfo;
2123

2224
/**
2325
* Extractor for populating JavaScript source code, including AST information, lexical information
@@ -36,14 +38,14 @@ public JSExtractor(ExtractorConfig config) {
3638
private static final Pattern containsModuleIndicator =
3739
Pattern.compile("(?m)^([ \t]*)(import|export|goog\\.module)\\b");
3840

39-
public Pair<Label, LoCInfo> extract(
41+
public Pair<Label, ParseResultInfo> extract(
4042
TextualExtractor textualExtractor, String source, TopLevelKind toplevelKind, ScopeManager scopeManager)
4143
throws ParseError {
4244
// if the file starts with `{ "<string>":` it won't parse as JavaScript; try parsing as JSON
4345
// instead
4446
if (FileExtractor.JSON_OBJECT_START.matcher(textualExtractor.getSource()).matches()) {
4547
try {
46-
LoCInfo loc =
48+
ParseResultInfo loc =
4749
new JSONExtractor(config.withTolerateParseErrors(false)).extract(textualExtractor);
4850
return Pair.make(null, loc);
4951
} catch (UserError ue) {
@@ -82,7 +84,7 @@ public SourceType establishSourceType(String source, boolean allowLeadingWS) {
8284
return SourceType.SCRIPT;
8385
}
8486

85-
public Pair<Label, LoCInfo> extract(
87+
public Pair<Label, ParseResultInfo> extract(
8688
TextualExtractor textualExtractor,
8789
String source,
8890
TopLevelKind toplevelKind,
@@ -97,7 +99,7 @@ public Pair<Label, LoCInfo> extract(
9799
Platform platform = config.getPlatform();
98100
Node ast = parserRes.getAST();
99101
LexicalExtractor lexicalExtractor;
100-
LoCInfo loc;
102+
ParseResultInfo loc;
101103
if (ast != null) {
102104
platform = getPlatform(platform, ast);
103105
if (sourceType == SourceType.SCRIPT && platform == Platform.NODE) {
@@ -124,9 +126,10 @@ public Pair<Label, LoCInfo> extract(
124126

125127
trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind.getValue());
126128
locationManager.emitSnippetLocation(toplevelLabel, 1, 1, 1, 1);
127-
loc = new LoCInfo(0, 0);
129+
loc = new ParseResultInfo(0, 0, Collections.emptyList());
128130
}
129131

132+
loc.addParseErrors(parserRes.getErrors());
130133
for (ParseError parseError : parserRes.getErrors()) {
131134
if (!config.isTolerateParseErrors()) throw parseError;
132135
Label key = trapwriter.freshLabel();

0 commit comments

Comments
 (0)