Skip to content

Commit 942cd7c

Browse files
authored
Merge pull request github#12113 from erik-krogh/diagnostics
JS: Implement diagnostics
2 parents 2bbeb73 + 7ab0f88 commit 942cd7c

20 files changed

+313
-97
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 167 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
package com.semmle.js.extractor;
22

33
import java.io.File;
4+
import java.io.FileNotFoundException;
45
import java.io.IOException;
56
import java.io.Reader;
67
import java.lang.ProcessBuilder.Redirect;
78
import java.net.URI;
89
import java.net.URISyntaxException;
910
import java.nio.charset.StandardCharsets;
11+
import java.nio.file.DirectoryNotEmptyException;
1012
import java.nio.file.FileVisitResult;
1113
import java.nio.file.FileVisitor;
1214
import java.nio.file.Files;
1315
import java.nio.file.InvalidPathException;
16+
import java.nio.file.NoSuchFileException;
1417
import java.nio.file.Path;
1518
import java.nio.file.Paths;
1619
import java.nio.file.SimpleFileVisitor;
1720
import java.nio.file.attribute.BasicFileAttributes;
1821
import java.util.ArrayList;
1922
import java.util.Arrays;
23+
import java.util.Collections;
2024
import java.util.Comparator;
2125
import java.util.LinkedHashMap;
2226
import java.util.LinkedHashSet;
@@ -27,6 +31,7 @@
2731
import java.util.concurrent.ExecutorService;
2832
import java.util.concurrent.Executors;
2933
import java.util.concurrent.TimeUnit;
34+
import java.util.concurrent.atomic.AtomicInteger;
3035
import java.util.function.Predicate;
3136
import java.util.stream.Collectors;
3237
import java.util.stream.Stream;
@@ -41,11 +46,16 @@
4146
import com.semmle.js.extractor.trapcache.DefaultTrapCache;
4247
import com.semmle.js.extractor.trapcache.DummyTrapCache;
4348
import com.semmle.js.extractor.trapcache.ITrapCache;
49+
import com.semmle.js.parser.ParseError;
4450
import com.semmle.js.parser.ParsedProject;
4551
import com.semmle.ts.extractor.TypeExtractor;
4652
import com.semmle.ts.extractor.TypeScriptParser;
53+
import com.semmle.ts.extractor.TypeScriptWrapperOOMError;
4754
import com.semmle.ts.extractor.TypeTable;
4855
import com.semmle.util.data.StringUtil;
56+
import com.semmle.util.diagnostics.DiagnosticLevel;
57+
import com.semmle.util.diagnostics.DiagnosticWriter;
58+
import com.semmle.util.diagnostics.DiagnosticLocation;
4959
import com.semmle.util.exception.CatastrophicError;
5060
import com.semmle.util.exception.Exceptions;
5161
import com.semmle.util.exception.ResourceError;
@@ -444,33 +454,141 @@ protected boolean hasSeenCode() {
444454

445455
/** Perform extraction. */
446456
public int run() throws IOException {
447-
startThreadPool();
448-
try {
449-
CompletableFuture<?> sourceFuture = extractSource();
450-
sourceFuture.join(); // wait for source extraction to complete
451-
if (hasSeenCode()) { // don't bother with the externs if no code was seen
452-
extractExterns();
457+
startThreadPool();
458+
try {
459+
CompletableFuture<?> sourceFuture = extractSource();
460+
sourceFuture.join(); // wait for source extraction to complete
461+
if (hasSeenCode()) { // don't bother with the externs if no code was seen
462+
extractExterns();
463+
}
464+
extractXml();
465+
} catch (OutOfMemoryError oom) {
466+
System.err.println("Out of memory while extracting the project.");
467+
return 137; // the CodeQL CLI will interpret this as an out-of-memory error
468+
// purpusely not doing anything else (printing stack, etc.), as the JVM
469+
// basically guarantees nothing after an OOM
470+
} catch (TypeScriptWrapperOOMError oom) {
471+
System.err.println("Out of memory while extracting the project.");
472+
System.err.println(oom.getMessage());
473+
oom.printStackTrace(System.err);
474+
return 137;
475+
} catch (RuntimeException | IOException e) {
476+
writeDiagnostics("Internal error: " + e, JSDiagnosticKind.INTERNAL_ERROR);
477+
e.printStackTrace(System.err);
478+
return 1;
479+
} finally {
480+
shutdownThreadPool();
481+
diagnosticsToClose.forEach(DiagnosticWriter::close);
453482
}
454-
extractXml();
455-
} finally {
456-
shutdownThreadPool();
483+
484+
if (!hasSeenCode()) {
485+
if (seenFiles) {
486+
warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors.");
487+
} else {
488+
warn("No JavaScript or TypeScript code found.");
489+
}
490+
// ensuring that the finalize steps detects that no code was seen.
491+
Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src");
492+
try {
493+
// Non-recursive delete because "src/" should be empty.
494+
FileUtil8.delete(srcFolder);
495+
} catch (NoSuchFileException e) {
496+
Exceptions.ignore(e, "the directory did not exist");
497+
} catch (DirectoryNotEmptyException e) {
498+
Exceptions.ignore(e, "just leave the directory if it is not empty");
499+
}
500+
return 0;
501+
}
502+
return 0;
503+
}
504+
505+
/**
506+
* A kind of error that can happen during extraction of JavaScript or TypeScript
507+
* code.
508+
* For use with the {@link #writeDiagnostics(String, JSDiagnosticKind)} method.
509+
*/
510+
public static enum JSDiagnosticKind {
511+
PARSE_ERROR("parse-error", "Parse error", DiagnosticLevel.Warning),
512+
INTERNAL_ERROR("internal-error", "Internal error", DiagnosticLevel.Debug);
513+
514+
private final String id;
515+
private final String name;
516+
private final DiagnosticLevel level;
517+
518+
private JSDiagnosticKind(String id, String name, DiagnosticLevel level) {
519+
this.id = id;
520+
this.name = name;
521+
this.level = level;
522+
}
523+
524+
public String getId() {
525+
return id;
526+
}
527+
528+
public String getName() {
529+
return name;
530+
}
531+
532+
public DiagnosticLevel getLevel() {
533+
return level;
457534
}
458-
if (!hasSeenCode()) {
459-
if (seenFiles) {
460-
warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors.");
535+
}
536+
537+
private AtomicInteger diagnosticCount = new AtomicInteger(0);
538+
private List<DiagnosticWriter> diagnosticsToClose = Collections.synchronizedList(new ArrayList<>());
539+
private ThreadLocal<DiagnosticWriter> diagnostics = new ThreadLocal<DiagnosticWriter>(){
540+
@Override protected DiagnosticWriter initialValue() {
541+
DiagnosticWriter result = initDiagnosticsWriter(diagnosticCount.incrementAndGet());
542+
diagnosticsToClose.add(result);
543+
return result;
544+
}
545+
};
546+
547+
/**
548+
* Persist a diagnostic message to a file in the diagnostics directory.
549+
* See {@link JSDiagnosticKind} for the kinds of errors that can be reported,
550+
* and see
551+
* {@link DiagnosticWriter} for more details.
552+
*/
553+
public void writeDiagnostics(String message, JSDiagnosticKind error) throws IOException {
554+
writeDiagnostics(message, error, null);
555+
}
556+
557+
558+
/**
559+
* Persist a diagnostic message with a location to a file in the diagnostics directory.
560+
* See {@link JSDiagnosticKind} for the kinds of errors that can be reported,
561+
* and see
562+
* {@link DiagnosticWriter} for more details.
563+
*/
564+
public void writeDiagnostics(String message, JSDiagnosticKind error, DiagnosticLocation location) throws IOException {
565+
if (diagnostics.get() == null) {
566+
warn("No diagnostics directory, so not writing diagnostic: " + message);
567+
return;
568+
}
569+
570+
// DiagnosticLevel level, String extractorName, String sourceId, String sourceName, String markdown
571+
diagnostics.get().writeMarkdown(error.getLevel(), "javascript", "javascript/" + error.getId(), error.getName(),
572+
message, location);
573+
}
574+
575+
private DiagnosticWriter initDiagnosticsWriter(int count) {
576+
String diagnosticsDir = System.getenv("CODEQL_EXTRACTOR_JAVASCRIPT_DIAGNOSTIC_DIR");
577+
578+
if (diagnosticsDir != null) {
579+
File diagnosticsDirFile = new File(diagnosticsDir);
580+
if (!diagnosticsDirFile.isDirectory()) {
581+
warn("Diagnostics directory " + diagnosticsDir + " does not exist");
461582
} else {
462-
warn("No JavaScript or TypeScript code found.");
463-
}
464-
// ensuring that the finalize steps detects that no code was seen.
465-
Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src");
466-
// check that the srcFolder is empty
467-
if (Files.list(srcFolder).count() == 0) {
468-
// Non-recursive delete because "src/" should be empty.
469-
FileUtil8.delete(srcFolder);
583+
File diagnosticsFile = new File(diagnosticsDirFile, "autobuilder-" + count + ".jsonl");
584+
try {
585+
return new DiagnosticWriter(diagnosticsFile);
586+
} catch (FileNotFoundException e) {
587+
warn("Failed to open diagnostics file " + diagnosticsFile);
588+
}
470589
}
471-
return 0;
472590
}
473-
return 0;
591+
return null;
474592
}
475593

476594
private void startThreadPool() {
@@ -1113,13 +1231,38 @@ private void doExtract(FileExtractor extractor, Path file, ExtractorState state)
11131231

11141232
try {
11151233
long start = logBeginProcess("Extracting " + file);
1116-
Integer loc = extractor.extract(f, state);
1117-
if (!extractor.getConfig().isExterns() && (loc == null || loc != 0)) seenCode = true;
1234+
ParseResultInfo loc = extractor.extract(f, state);
1235+
if (!extractor.getConfig().isExterns() && (loc == null || loc.getLinesOfCode() != 0)) seenCode = true;
11181236
if (!extractor.getConfig().isExterns()) seenFiles = true;
1237+
for (ParseError err : loc.getParseErrors()) {
1238+
String msg = "A parse error occurred: " + StringUtil.escapeMarkdown(err.getMessage())
1239+
+ ". Check the syntax of the file. If the file is invalid, correct the error or [exclude](https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/customizing-code-scanning) the file from analysis.";
1240+
// file, relative to the source root
1241+
String relativeFilePath = null;
1242+
if (file.startsWith(LGTM_SRC)) {
1243+
relativeFilePath = file.subpath(LGTM_SRC.getNameCount(), file.getNameCount()).toString();
1244+
}
1245+
DiagnosticLocation diagLoc = DiagnosticLocation.builder()
1246+
.setFile(relativeFilePath)
1247+
.setStartLine(err.getPosition().getLine())
1248+
.setStartColumn(err.getPosition().getColumn())
1249+
.setEndLine(err.getPosition().getLine())
1250+
.setEndColumn(err.getPosition().getColumn())
1251+
.build();
1252+
writeDiagnostics(msg, JSDiagnosticKind.PARSE_ERROR, diagLoc);
1253+
}
11191254
logEndProcess(start, "Done extracting " + file);
1255+
} catch (OutOfMemoryError oom) {
1256+
System.err.println("Out of memory while extracting a file.");
1257+
System.exit(137); // caught by the CodeQL CLI
11201258
} catch (Throwable t) {
11211259
System.err.println("Exception while extracting " + file + ".");
11221260
t.printStackTrace(System.err);
1261+
try {
1262+
writeDiagnostics("Internal error: " + t, JSDiagnosticKind.INTERNAL_ERROR);
1263+
} catch (IOException ignored) {
1264+
Exceptions.ignore(ignored, "we are already crashing");
1265+
}
11231266
System.exit(1);
11241267
}
11251268
}

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import java.io.FileInputStream;
66
import java.io.FileReader;
77
import java.io.IOException;
8-
import java.nio.charset.Charset;
98
import java.nio.charset.StandardCharsets;
109
import java.nio.file.Path;
1110
import java.util.LinkedHashSet;
@@ -434,7 +433,7 @@ public boolean supports(File f) {
434433
}
435434

436435
/** @return the number of lines of code extracted, or {@code null} if the file was cached */
437-
public Integer extract(File f, ExtractorState state) throws IOException {
436+
public ParseResultInfo extract(File f, ExtractorState state) throws IOException {
438437
FileSnippet snippet = state.getSnippets().get(f.toPath());
439438
if (snippet != null) {
440439
return this.extractSnippet(f.toPath(), snippet, state);
@@ -461,7 +460,7 @@ public Integer extract(File f, ExtractorState state) throws IOException {
461460
* <p>A trap file will be derived from the snippet file, but its file label, source locations, and
462461
* source archive entry are based on the original file.
463462
*/
464-
private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException {
463+
private ParseResultInfo extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException {
465464
TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(file.toFile());
466465

467466
File originalFile = origin.getOriginalFile().toFile();
@@ -495,7 +494,7 @@ private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState sta
495494
* <p>Also note that we support extraction with TRAP writer factories that are not file-backed;
496495
* obviously, no caching is done in that scenario.
497496
*/
498-
private Integer extractContents(
497+
private ParseResultInfo extractContents(
499498
File extractedFile, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
500499
throws IOException {
501500
ExtractionMetrics metrics = new ExtractionMetrics();
@@ -545,15 +544,15 @@ private Integer extractContents(
545544
TextualExtractor textualExtractor =
546545
new TextualExtractor(
547546
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
548-
LoCInfo loc = extractor.extract(textualExtractor);
547+
ParseResultInfo loc = extractor.extract(textualExtractor);
549548
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
550549
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
551550
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
552551
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
553552
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
554553
metrics.writeTimingsToTrap(trapwriter);
555554
successful = true;
556-
return linesOfCode;
555+
return loc;
557556
} finally {
558557
if (!successful && trapwriter instanceof CachingTrapWriter)
559558
((CachingTrapWriter) trapwriter).discard();

javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.File;
44
import java.io.IOException;
55
import java.nio.file.Path;
6+
import java.util.Collections;
67
import java.util.List;
78
import java.util.function.Supplier;
89
import java.util.regex.Matcher;
@@ -29,7 +30,7 @@
2930

3031
/** Extractor for handling HTML and XHTML files. */
3132
public class HTMLExtractor implements IExtractor {
32-
private LoCInfo locInfo = new LoCInfo(0, 0);
33+
private ParseResultInfo locInfo = new ParseResultInfo(0, 0, Collections.emptyList());
3334

3435
private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler {
3536
private final ScopeManager scopeManager;
@@ -212,11 +213,11 @@ public static HTMLExtractor forEmbeddedHtml(ExtractorConfig config) {
212213
}
213214

214215
@Override
215-
public LoCInfo extract(TextualExtractor textualExtractor) throws IOException {
216+
public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException {
216217
return extractEx(textualExtractor).snd();
217218
}
218219

219-
public Pair<List<Label>, LoCInfo> extractEx(TextualExtractor textualExtractor) {
220+
public Pair<List<Label>, ParseResultInfo> extractEx(TextualExtractor textualExtractor) {
220221
// Angular templates contain attribute names that are not valid HTML/XML, such
221222
// as [foo], (foo), [(foo)], and *foo.
222223
// Allow a large number of errors in attribute names, so the Jericho parser does
@@ -369,7 +370,7 @@ private void extractSnippet(
369370
config.getExtractLines(),
370371
textualExtractor.getMetrics(),
371372
textualExtractor.getExtractedFile());
372-
Pair<Label, LoCInfo> result = extractor.extract(tx, source, toplevelKind, scopeManager);
373+
Pair<Label, ParseResultInfo> result = extractor.extract(tx, source, toplevelKind, scopeManager);
373374
Label toplevelLabel = result.fst();
374375
if (toplevelLabel != null) { // can be null when script ends up being parsed as JSON
375376
emitTopLevelXmlNodeBinding(parentLabel, toplevelLabel, trapWriter);
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.semmle.js.extractor;
22

33
import java.io.IOException;
4+
import com.semmle.js.parser.ParseError;
45

56
/** Generic extractor interface. */
67
public interface IExtractor {
@@ -9,5 +10,5 @@ public interface IExtractor {
910
* TextualExtractor}, and return information about the number of lines of code and the number of
1011
* lines of comments extracted.
1112
*/
12-
public LoCInfo extract(TextualExtractor textualExtractor) throws IOException;
13+
public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException;
1314
}

0 commit comments

Comments
 (0)