Skip to content

Commit 9c65318

Browse files
committed
JS: Extract TypeScript in HTML files to a snippet in virtual source root
1 parent da58fb5 commit 9c65318

File tree

9 files changed

+186
-49
lines changed

9 files changed

+186
-49
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ public class AutoBuild {
213213
private boolean installDependencies = false;
214214
private int installDependenciesTimeout;
215215
private final VirtualSourceRoot virtualSourceRoot;
216+
private ExtractorState state;
216217

217218
/** The default timeout when running <code>yarn</code>, in milliseconds. */
218219
public static final int INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT = 10 * 60 * 1000; // 10 minutes
@@ -234,6 +235,7 @@ public AutoBuild() {
234235
setupFileTypes();
235236
setupXmlMode();
236237
setupMatchers();
238+
this.state = new ExtractorState();
237239
}
238240

239241
private String getEnvVar(String envVarName) {
@@ -534,7 +536,7 @@ public File lookup(String source, ExtractorConfig config, FileType type) {
534536
@Override
535537
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
536538
throws IOException {
537-
if (".js".equals(FileUtil.extension(file.toString()))) extract(extractor, file, null);
539+
if (".js".equals(FileUtil.extension(file.toString()))) extract(extractor, file, true);
538540
return super.visitFile(file, attrs);
539541
}
540542
};
@@ -656,7 +658,7 @@ private CompletableFuture<?> extractFiles(
656658
continue;
657659
}
658660
extractedFiles.add(f);
659-
futures.add(extract(extractors.forFile(f), f, null));
661+
futures.add(extract(extractors.forFile(f), f, true));
660662
}
661663
return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
662664
}
@@ -980,9 +982,8 @@ private Set<Path> extractTypeScript(
980982
List<Path> tsconfig,
981983
DependencyInstallationResult deps) {
982984
if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) {
983-
ExtractorState extractorState = new ExtractorState();
984-
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
985-
verifyTypeScriptInstallation(extractorState);
985+
TypeScriptParser tsParser = state.getTypeScriptParser();
986+
verifyTypeScriptInstallation(state);
986987

987988
// Collect all files included in a tsconfig.json inclusion pattern.
988989
// If a given file is referenced by multiple tsconfig files, we prefer to extract it using
@@ -1005,7 +1006,10 @@ private Set<Path> extractTypeScript(
10051006
List<Path> typeScriptFiles = new ArrayList<Path>();
10061007
for (File sourceFile : project.getAllFiles()) {
10071008
Path sourcePath = sourceFile.toPath();
1008-
if (!files.contains(normalizePath(sourcePath))) continue;
1009+
Path normalizedFile = normalizePath(sourcePath);
1010+
if (!files.contains(normalizedFile) && !state.getSnippets().containsKey(normalizedFile)) {
1011+
continue;
1012+
}
10091013
if (!project.getOwnFiles().contains(sourceFile) && explicitlyIncludedFiles.contains(sourceFile)) continue;
10101014
if (extractors.fileType(sourcePath) != FileType.TYPESCRIPT) {
10111015
// For the time being, skip non-TypeScript files, even if the TypeScript
@@ -1017,7 +1021,7 @@ private Set<Path> extractTypeScript(
10171021
}
10181022
}
10191023
typeScriptFiles.sort(PATH_ORDERING);
1020-
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors, extractorState);
1024+
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors);
10211025
tsParser.closeProject(projectFile);
10221026
}
10231027

@@ -1036,7 +1040,7 @@ private Set<Path> extractTypeScript(
10361040
}
10371041
}
10381042
if (!remainingTypeScriptFiles.isEmpty()) {
1039-
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractors, extractorState);
1043+
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractors);
10401044
}
10411045

10421046
// The TypeScript compiler instance is no longer needed.
@@ -1122,16 +1126,15 @@ public void verifyTypeScriptInstallation(ExtractorState extractorState) {
11221126
public void extractTypeScriptFiles(
11231127
List<Path> files,
11241128
Set<Path> extractedFiles,
1125-
FileExtractors extractors,
1126-
ExtractorState extractorState) {
1129+
FileExtractors extractors) {
11271130
List<File> list = files
11281131
.stream()
11291132
.sorted(PATH_ORDERING)
11301133
.map(p -> p.toFile()).collect(Collectors.toList());
1131-
extractorState.getTypeScriptParser().prepareFiles(list);
1134+
state.getTypeScriptParser().prepareFiles(list);
11321135
for (Path path : files) {
11331136
extractedFiles.add(path);
1134-
extract(extractors.forFile(path), path, extractorState);
1137+
extract(extractors.forFile(path), path, false);
11351138
}
11361139
}
11371140

@@ -1174,8 +1177,8 @@ private SourceType getSourceType() {
11741177
* <p>If the state is {@code null}, the extraction job will be submitted to the {@link
11751178
* #threadPool}, otherwise extraction will happen on the main thread.
11761179
*/
1177-
protected CompletableFuture<?> extract(FileExtractor extractor, Path file, ExtractorState state) {
1178-
if (state == null && threadPool != null) {
1180+
protected CompletableFuture<?> extract(FileExtractor extractor, Path file, boolean concurrent) {
1181+
if (concurrent && threadPool != null) {
11791182
return CompletableFuture.runAsync(() -> doExtract(extractor, file, state), threadPool);
11801183
} else {
11811184
doExtract(extractor, file, state);

javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package com.semmle.js.extractor;
22

3+
import java.nio.file.Path;
4+
import java.util.concurrent.ConcurrentHashMap;
5+
36
import com.semmle.js.parser.TypeScriptParser;
47

58
/**
@@ -17,16 +20,28 @@
1720
*/
1821
public class ExtractorState {
1922
private TypeScriptParser typeScriptParser = new TypeScriptParser();
23+
24+
private final ConcurrentHashMap<Path, FileSnippet> snippets = new ConcurrentHashMap<>();
2025

2126
public TypeScriptParser getTypeScriptParser() {
2227
return typeScriptParser;
2328
}
2429

30+
/**
31+
* Returns the mapping that denotes where a snippet file originated from.
32+
*
33+
* <p>The map is thread-safe and may be mutated by the caller.
34+
*/
35+
public ConcurrentHashMap<Path, FileSnippet> getSnippets() {
36+
return snippets;
37+
}
38+
2539
/**
2640
* Makes this semantically equivalent to a fresh state, but may internally retain shared resources
2741
* that are expensive to reacquire.
2842
*/
2943
public void reset() {
3044
typeScriptParser.reset();
45+
snippets.clear();
3146
}
3247
}

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
11
package com.semmle.js.extractor;
22

3-
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
4-
import com.semmle.js.extractor.trapcache.CachingTrapWriter;
5-
import com.semmle.js.extractor.trapcache.ITrapCache;
6-
import com.semmle.util.data.StringUtil;
7-
import com.semmle.util.exception.Exceptions;
8-
import com.semmle.util.extraction.ExtractorOutputConfig;
9-
import com.semmle.util.files.FileUtil;
10-
import com.semmle.util.io.WholeIO;
11-
import com.semmle.util.trap.TrapWriter;
12-
import com.semmle.util.trap.TrapWriter.Label;
133
import java.io.BufferedReader;
144
import java.io.File;
155
import java.io.FileInputStream;
166
import java.io.FileReader;
177
import java.io.IOException;
188
import java.nio.charset.Charset;
199
import java.nio.charset.StandardCharsets;
10+
import java.nio.file.Path;
2011
import java.util.LinkedHashSet;
2112
import java.util.Set;
2213
import java.util.regex.Pattern;
2314

15+
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
16+
import com.semmle.js.extractor.trapcache.CachingTrapWriter;
17+
import com.semmle.js.extractor.trapcache.ITrapCache;
18+
import com.semmle.util.data.StringUtil;
19+
import com.semmle.util.exception.Exceptions;
20+
import com.semmle.util.extraction.ExtractorOutputConfig;
21+
import com.semmle.util.files.FileUtil;
22+
import com.semmle.util.io.WholeIO;
23+
import com.semmle.util.trap.TrapWriter;
24+
import com.semmle.util.trap.TrapWriter.Label;
25+
2426
/**
2527
* The file extractor extracts a single file and handles source archive population and TRAP caching;
2628
* it delegates to the appropriate {@link IExtractor} for extracting the contents of the file.
@@ -47,7 +49,7 @@ public static enum FileType {
4749
HTML(".htm", ".html", ".xhtm", ".xhtml", ".vue") {
4850
@Override
4951
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
50-
return new HTMLExtractor(config);
52+
return new HTMLExtractor(config, state);
5153
}
5254

5355
@Override
@@ -293,7 +295,7 @@ private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
293295

294296
@Override
295297
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
296-
return new TypeScriptExtractor(config, state.getTypeScriptParser());
298+
return new TypeScriptExtractor(config, state);
297299
}
298300

299301
@Override
@@ -398,6 +400,10 @@ public boolean supports(File f) {
398400

399401
/** @return the number of lines of code extracted, or {@code null} if the file was cached */
400402
public Integer extract(File f, ExtractorState state) throws IOException {
403+
FileSnippet snippet = state.getSnippets().get(f.toPath());
404+
if (snippet != null) {
405+
return this.extractSnippet(f.toPath(), snippet, state);
406+
}
401407

402408
// populate source archive
403409
String source = new WholeIO(config.getDefaultEncoding()).strictread(f);
@@ -414,6 +420,25 @@ public Integer extract(File f, ExtractorState state) throws IOException {
414420
return extractContents(f, fileLabel, source, locationManager, state);
415421
}
416422

423+
/**
424+
* Extract the contents of a file that is a snippet from another file.
425+
*
426+
* <p>A trap file will be derived from the snippet file, but its file label, source locations, and
427+
* source archive entry are based on the original file.
428+
*/
429+
private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException {
430+
TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(file.toFile());
431+
432+
File originalFile = origin.getOriginalFile().toFile();
433+
Label fileLabel = trapwriter.populateFile(originalFile);
434+
LocationManager locationManager = new LocationManager(originalFile, trapwriter, fileLabel);
435+
locationManager.setStart(origin.getLine(), origin.getColumn());
436+
437+
String source = new WholeIO(config.getDefaultEncoding()).strictread(file);
438+
439+
return extractContents(file.toFile(), fileLabel, source, locationManager, state);
440+
}
441+
417442
/**
418443
* Extract the contents of a file, potentially making use of cached information.
419444
*
@@ -436,20 +461,20 @@ public Integer extract(File f, ExtractorState state) throws IOException {
436461
* obviously, no caching is done in that scenario.
437462
*/
438463
private Integer extractContents(
439-
File f, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
464+
File extractedFile, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
440465
throws IOException {
441466
ExtractionMetrics metrics = new ExtractionMetrics();
442467
metrics.startPhase(ExtractionPhase.FileExtractor_extractContents);
443468
metrics.setLength(source.length());
444469
metrics.setFileLabel(fileLabel);
445470
TrapWriter trapwriter = locationManager.getTrapWriter();
446-
FileType fileType = getFileType(f);
471+
FileType fileType = getFileType(extractedFile);
447472

448473
File cacheFile = null, // the cache file for this extraction
449474
resultFile = null; // the final result TRAP file for this extraction
450475

451476
if (bumpIdCounter(trapwriter)) {
452-
resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(f);
477+
resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(extractedFile);
453478
}
454479
// check whether we can perform caching
455480
if (resultFile != null && fileType.isTrapCachingAllowed()) {
@@ -475,7 +500,7 @@ private Integer extractContents(
475500
trapwriter = new CachingTrapWriter(cacheFile, resultFile);
476501
bumpIdCounter(trapwriter);
477502
// re-initialise the location manager, since it keeps a reference to the TRAP writer
478-
locationManager = new LocationManager(f, trapwriter, locationManager.getFileLabel());
503+
locationManager = new LocationManager(extractedFile, trapwriter, locationManager.getFileLabel());
479504
}
480505

481506
// now do the extraction itself
@@ -484,7 +509,7 @@ private Integer extractContents(
484509
IExtractor extractor = fileType.mkExtractor(config, state);
485510
TextualExtractor textualExtractor =
486511
new TextualExtractor(
487-
trapwriter, locationManager, source, config.getExtractLines(), metrics);
512+
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
488513
LoCInfo loc = extractor.extract(textualExtractor);
489514
int numLines = textualExtractor.getNumLines();
490515
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package com.semmle.js.extractor;
2+
3+
import java.nio.file.Path;
4+
5+
/**
6+
* Denotes where a code snippet originated from within a file.
7+
*/
8+
public class FileSnippet {
9+
private Path originalFile;
10+
private int line;
11+
private int column;
12+
private int topLevelKind;
13+
14+
public FileSnippet(Path originalFile, int line, int column, int topLevelKind) {
15+
this.originalFile = originalFile;
16+
this.line = line;
17+
this.column = column;
18+
this.topLevelKind = topLevelKind;
19+
}
20+
21+
public Path getOriginalFile() {
22+
return originalFile;
23+
}
24+
25+
public int getLine() {
26+
return line;
27+
}
28+
29+
public int getColumn() {
30+
return column;
31+
}
32+
33+
public int getTopLevelKind() {
34+
return topLevelKind;
35+
}
36+
}

javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package com.semmle.js.extractor;
22

3+
import java.nio.file.Path;
34
import java.util.regex.Pattern;
45

56
import com.semmle.js.extractor.ExtractorConfig.Platform;
67
import com.semmle.js.extractor.ExtractorConfig.SourceType;
78
import com.semmle.js.parser.ParseError;
89
import com.semmle.util.data.StringUtil;
10+
import com.semmle.util.io.WholeIO;
911
import com.semmle.util.trap.TrapWriter;
1012
import com.semmle.util.trap.TrapWriter.Label;
1113

@@ -28,9 +30,11 @@ public class HTMLExtractor implements IExtractor {
2830
Pattern.CASE_INSENSITIVE);
2931

3032
private final ExtractorConfig config;
33+
private final ExtractorState state;
3134

32-
public HTMLExtractor(ExtractorConfig config) {
35+
public HTMLExtractor(ExtractorConfig config, ExtractorState state) {
3336
this.config = config.withPlatform(Platform.WEB);
37+
this.state = state;
3438
}
3539

3640
@Override
@@ -208,8 +212,25 @@ private LoCInfo extractSnippet(
208212
int line,
209213
int column,
210214
boolean isTypeScript) {
211-
if (isTypeScript)
212-
return null; // not supported right now
215+
if (isTypeScript) {
216+
Path file = textualExtractor.getExtractedFile().toPath();
217+
FileSnippet snippet = new FileSnippet(file, line, column, toplevelKind);
218+
VirtualSourceRoot vroot = config.getVirtualSourceRoot();
219+
// Vue files are special in that they can be imported as modules, and may only contain one <script> tag.
220+
// For .vue files we omit the usual snippet decoration to ensure the TypeScript compiler can find it.
221+
Path virtualFile =
222+
file.getFileName().toString().endsWith(".vue")
223+
? vroot.toVirtualFile(file.resolveSibling(file.getFileName() + ".ts"))
224+
: vroot.getVirtualFileForSnippet(snippet, ".ts");
225+
if (virtualFile != null) {
226+
virtualFile = virtualFile.toAbsolutePath().normalize();
227+
synchronized(vroot.getLock()) {
228+
new WholeIO().strictwrite(virtualFile, source);
229+
}
230+
state.getSnippets().put(virtualFile, snippet);
231+
}
232+
return null; // LoC info is accounted for later
233+
}
213234
TrapWriter trapwriter = textualExtractor.getTrapwriter();
214235
LocationManager locationManager = textualExtractor.getLocationManager();
215236
LocationManager scriptLocationManager =
@@ -224,7 +245,8 @@ private LoCInfo extractSnippet(
224245
scriptLocationManager,
225246
source,
226247
config.getExtractLines(),
227-
textualExtractor.getMetrics());
248+
textualExtractor.getMetrics(),
249+
textualExtractor.getExtractedFile());
228250
return extractor.extract(tx, source, toplevelKind, scopeManager).snd();
229251
} catch (ParseError e) {
230252
e.setPosition(scriptLocationManager.translatePosition(e.getPosition()));

0 commit comments

Comments
 (0)