|
1 | 1 | package com.semmle.js.extractor;
|
2 | 2 |
|
3 | 3 | import java.io.File;
|
| 4 | +import java.io.FileNotFoundException; |
4 | 5 | import java.io.IOException;
|
5 | 6 | import java.io.Reader;
|
6 | 7 | import java.lang.ProcessBuilder.Redirect;
|
|
17 | 18 | import java.nio.file.attribute.BasicFileAttributes;
|
18 | 19 | import java.util.ArrayList;
|
19 | 20 | import java.util.Arrays;
|
| 21 | +import java.util.Collections; |
20 | 22 | import java.util.Comparator;
|
21 | 23 | import java.util.LinkedHashMap;
|
22 | 24 | import java.util.LinkedHashSet;
|
|
27 | 29 | import java.util.concurrent.ExecutorService;
|
28 | 30 | import java.util.concurrent.Executors;
|
29 | 31 | import java.util.concurrent.TimeUnit;
|
| 32 | +import java.util.concurrent.atomic.AtomicInteger; |
30 | 33 | import java.util.function.Predicate;
|
31 | 34 | import java.util.stream.Collectors;
|
32 | 35 | import java.util.stream.Stream;
|
|
41 | 44 | import com.semmle.js.extractor.trapcache.DefaultTrapCache;
|
42 | 45 | import com.semmle.js.extractor.trapcache.DummyTrapCache;
|
43 | 46 | import com.semmle.js.extractor.trapcache.ITrapCache;
|
| 47 | +import com.semmle.js.parser.ParseError; |
44 | 48 | import com.semmle.js.parser.ParsedProject;
|
45 | 49 | import com.semmle.ts.extractor.TypeExtractor;
|
46 | 50 | import com.semmle.ts.extractor.TypeScriptParser;
|
| 51 | +import com.semmle.ts.extractor.TypeScriptWrapperOOMError; |
47 | 52 | import com.semmle.ts.extractor.TypeTable;
|
48 | 53 | import com.semmle.util.data.StringUtil;
|
| 54 | +import com.semmle.util.diagnostics.DiagnosticLevel; |
| 55 | +import com.semmle.util.diagnostics.DiagnosticWriter; |
49 | 56 | import com.semmle.util.exception.CatastrophicError;
|
50 | 57 | import com.semmle.util.exception.Exceptions;
|
51 | 58 | import com.semmle.util.exception.ResourceError;
|
@@ -444,33 +451,127 @@ protected boolean hasSeenCode() {
|
444 | 451 |
|
445 | 452 | /** Perform extraction. */
|
446 | 453 | public int run() throws IOException {
|
447 |
| - startThreadPool(); |
448 |
| - try { |
449 |
| - CompletableFuture<?> sourceFuture = extractSource(); |
450 |
| - sourceFuture.join(); // wait for source extraction to complete |
451 |
| - if (hasSeenCode()) { // don't bother with the externs if no code was seen |
452 |
| - extractExterns(); |
| 454 | + startThreadPool(); |
| 455 | + try { |
| 456 | + CompletableFuture<?> sourceFuture = extractSource(); |
| 457 | + sourceFuture.join(); // wait for source extraction to complete |
| 458 | + if (hasSeenCode()) { // don't bother with the externs if no code was seen |
| 459 | + extractExterns(); |
| 460 | + } |
| 461 | + extractXml(); |
| 462 | + } catch (OutOfMemoryError oom) { |
| 463 | + System.err.println("Out of memory while extracting the project."); |
| 464 | + return 137; // the CodeQL CLI will interpret this as an out-of-memory error |
| 465 | + // purpusely not doing anything else (printing stack, etc.), as the JVM |
| 466 | + // basically guarantees nothing after an OOM |
| 467 | + } catch (TypeScriptWrapperOOMError oom) { |
| 468 | + System.err.println("Out of memory while extracting the project."); |
| 469 | + System.err.println(oom.getMessage()); |
| 470 | + oom.printStackTrace(System.err); |
| 471 | + return 137; |
| 472 | + } catch (RuntimeException | IOException e) { |
| 473 | + writeDiagnostics("Internal error: " + e, JSDiagnosticKind.INTERNAL_ERROR); |
| 474 | + e.printStackTrace(System.err); |
| 475 | + return 1; |
| 476 | + } finally { |
| 477 | + shutdownThreadPool(); |
| 478 | + diagnosticsToClose.forEach(DiagnosticWriter::close); |
453 | 479 | }
|
454 |
| - extractXml(); |
455 |
| - } finally { |
456 |
| - shutdownThreadPool(); |
| 480 | + |
| 481 | + if (!hasSeenCode()) { |
| 482 | + if (seenFiles) { |
| 483 | + warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors."); |
| 484 | + } else { |
| 485 | + warn("No JavaScript or TypeScript code found."); |
| 486 | + } |
| 487 | + // ensuring that the finalize steps detects that no code was seen. |
| 488 | + Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src"); |
| 489 | + // check that the srcFolder is empty |
| 490 | + if (Files.list(srcFolder).count() == 0) { |
| 491 | + // Non-recursive delete because "src/" should be empty. |
| 492 | + FileUtil8.delete(srcFolder); |
| 493 | + } |
| 494 | + return 0; |
| 495 | + } |
| 496 | + return 0; |
| 497 | + } |
| 498 | + |
| 499 | + /** |
| 500 | + * A kind of error that can happen during extraction of JavaScript or TypeScript |
| 501 | + * code. |
| 502 | + * For use with the {@link #writeDiagnostics(String, JSDiagnosticKind)} method. |
| 503 | + */ |
| 504 | + public static enum JSDiagnosticKind { |
| 505 | + PARSE_ERROR("parse-error", "Parse error", DiagnosticLevel.Warning), |
| 506 | + INTERNAL_ERROR("internal-error", "Internal error", DiagnosticLevel.Debug); |
| 507 | + |
| 508 | + private final String id; |
| 509 | + private final String name; |
| 510 | + private final DiagnosticLevel level; |
| 511 | + |
| 512 | + private JSDiagnosticKind(String id, String name, DiagnosticLevel level) { |
| 513 | + this.id = id; |
| 514 | + this.name = name; |
| 515 | + this.level = level; |
| 516 | + } |
| 517 | + |
| 518 | + public String getId() { |
| 519 | + return id; |
| 520 | + } |
| 521 | + |
| 522 | + public String getName() { |
| 523 | + return name; |
| 524 | + } |
| 525 | + |
| 526 | + public DiagnosticLevel getLevel() { |
| 527 | + return level; |
457 | 528 | }
|
458 |
| - if (!hasSeenCode()) { |
459 |
| - if (seenFiles) { |
460 |
| - warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors."); |
| 529 | + } |
| 530 | + |
| 531 | + private AtomicInteger diagnosticCount = new AtomicInteger(0); |
| 532 | + private List<DiagnosticWriter> diagnosticsToClose = Collections.synchronizedList(new ArrayList<>()); |
| 533 | + private ThreadLocal<DiagnosticWriter> diagnostics = new ThreadLocal<DiagnosticWriter>(){ |
| 534 | + @Override protected DiagnosticWriter initialValue() { |
| 535 | + DiagnosticWriter result = initDiagnosticsWriter(diagnosticCount.incrementAndGet()); |
| 536 | + diagnosticsToClose.add(result); |
| 537 | + return result; |
| 538 | + } |
| 539 | + }; |
| 540 | + |
| 541 | + /** |
| 542 | + * Persist a diagnostic message to a file in the diagnostics directory. |
| 543 | + * See {@link JSDiagnosticKind} for the kinds of errors that can be reported, |
| 544 | + * and see |
| 545 | + * {@link DiagnosticWriter} for more details. |
| 546 | + */ |
| 547 | + public void writeDiagnostics(String message, JSDiagnosticKind error) throws IOException { |
| 548 | + if (diagnostics.get() == null) { |
| 549 | + warn("No diagnostics directory, so not writing diagnostic: " + message); |
| 550 | + return; |
| 551 | + } |
| 552 | + |
| 553 | + // DiagnosticLevel level, String extractorName, String sourceId, String sourceName, String markdown |
| 554 | + diagnostics.get().writeMarkdown(error.getLevel(), "javascript", "javascript/" + error.getId(), error.getName(), |
| 555 | + message); |
| 556 | + } |
| 557 | + |
| 558 | + private DiagnosticWriter initDiagnosticsWriter(int count) { |
| 559 | + String diagnosticsDir = System.getenv("CODEQL_EXTRACTOR_JAVASCRIPT_DIAGNOSTIC_DIR"); |
| 560 | + |
| 561 | + if (diagnosticsDir != null) { |
| 562 | + File diagnosticsDirFile = new File(diagnosticsDir); |
| 563 | + if (!diagnosticsDirFile.isDirectory()) { |
| 564 | + warn("Diagnostics directory " + diagnosticsDir + " does not exist"); |
461 | 565 | } else {
|
462 |
| - warn("No JavaScript or TypeScript code found."); |
463 |
| - } |
464 |
| - // ensuring that the finalize steps detects that no code was seen. |
465 |
| - Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src"); |
466 |
| - // check that the srcFolder is empty |
467 |
| - if (Files.list(srcFolder).count() == 0) { |
468 |
| - // Non-recursive delete because "src/" should be empty. |
469 |
| - FileUtil8.delete(srcFolder); |
| 566 | + File diagnosticsFile = new File(diagnosticsDirFile, "autobuilder-" + count + ".jsonl"); |
| 567 | + try { |
| 568 | + return new DiagnosticWriter(diagnosticsFile); |
| 569 | + } catch (FileNotFoundException e) { |
| 570 | + warn("Failed to open diagnostics file " + diagnosticsFile); |
| 571 | + } |
470 | 572 | }
|
471 |
| - return 0; |
472 | 573 | }
|
473 |
| - return 0; |
| 574 | + return null; |
474 | 575 | }
|
475 | 576 |
|
476 | 577 | private void startThreadPool() {
|
@@ -1113,13 +1214,26 @@ private void doExtract(FileExtractor extractor, Path file, ExtractorState state)
|
1113 | 1214 |
|
1114 | 1215 | try {
|
1115 | 1216 | long start = logBeginProcess("Extracting " + file);
|
1116 |
| - Integer loc = extractor.extract(f, state); |
1117 |
| - if (!extractor.getConfig().isExterns() && (loc == null || loc != 0)) seenCode = true; |
| 1217 | + ParseResultInfo loc = extractor.extract(f, state); |
| 1218 | + if (!extractor.getConfig().isExterns() && (loc == null || loc.getLinesOfCode() != 0)) seenCode = true; |
1118 | 1219 | if (!extractor.getConfig().isExterns()) seenFiles = true;
|
| 1220 | + for (ParseError err : loc.getParseErrors()) { |
| 1221 | + String msg = "A parse error occurred: " + err.getMessage() + ". Check the syntax of the file. If the file is invalid, correct the error or exclude the file from analysis."; |
| 1222 | + writeDiagnostics(msg, JSDiagnosticKind.PARSE_ERROR); |
| 1223 | + } |
1119 | 1224 | logEndProcess(start, "Done extracting " + file);
|
| 1225 | + } catch (OutOfMemoryError oom) { |
| 1226 | + System.err.println("Out of memory while extracting " + file + "."); |
| 1227 | + oom.printStackTrace(System.err); |
| 1228 | + System.exit(137); // caught by the CodeQL CLI |
1120 | 1229 | } catch (Throwable t) {
|
1121 | 1230 | System.err.println("Exception while extracting " + file + ".");
|
1122 | 1231 | t.printStackTrace(System.err);
|
| 1232 | + try { |
| 1233 | + writeDiagnostics("Internal error: " + t, JSDiagnosticKind.INTERNAL_ERROR); |
| 1234 | + } catch (IOException ignored) { |
| 1235 | + // ignore - we are already crashing |
| 1236 | + } |
1123 | 1237 | System.exit(1);
|
1124 | 1238 | }
|
1125 | 1239 | }
|
|
0 commit comments