1
1
package com .semmle .js .extractor ;
2
2
3
- import com .semmle .js .extractor .ExtractionMetrics .ExtractionPhase ;
4
- import com .semmle .js .extractor .trapcache .CachingTrapWriter ;
5
- import com .semmle .js .extractor .trapcache .ITrapCache ;
6
- import com .semmle .util .data .StringUtil ;
7
- import com .semmle .util .exception .Exceptions ;
8
- import com .semmle .util .extraction .ExtractorOutputConfig ;
9
- import com .semmle .util .files .FileUtil ;
10
- import com .semmle .util .io .WholeIO ;
11
- import com .semmle .util .trap .TrapWriter ;
12
- import com .semmle .util .trap .TrapWriter .Label ;
13
3
import java .io .BufferedReader ;
14
4
import java .io .File ;
15
5
import java .io .FileInputStream ;
16
6
import java .io .FileReader ;
17
7
import java .io .IOException ;
18
8
import java .nio .charset .Charset ;
19
9
import java .nio .charset .StandardCharsets ;
10
+ import java .nio .file .Path ;
20
11
import java .util .LinkedHashSet ;
21
12
import java .util .Set ;
22
13
import java .util .regex .Pattern ;
23
14
15
+ import com .semmle .js .extractor .ExtractionMetrics .ExtractionPhase ;
16
+ import com .semmle .js .extractor .trapcache .CachingTrapWriter ;
17
+ import com .semmle .js .extractor .trapcache .ITrapCache ;
18
+ import com .semmle .util .data .StringUtil ;
19
+ import com .semmle .util .exception .Exceptions ;
20
+ import com .semmle .util .extraction .ExtractorOutputConfig ;
21
+ import com .semmle .util .files .FileUtil ;
22
+ import com .semmle .util .io .WholeIO ;
23
+ import com .semmle .util .trap .TrapWriter ;
24
+ import com .semmle .util .trap .TrapWriter .Label ;
25
+
24
26
/**
25
27
* The file extractor extracts a single file and handles source archive population and TRAP caching;
26
28
* it delegates to the appropriate {@link IExtractor} for extracting the contents of the file.
@@ -47,7 +49,7 @@ public static enum FileType {
47
49
HTML (".htm" , ".html" , ".xhtm" , ".xhtml" , ".vue" ) {
48
50
@ Override
49
51
public IExtractor mkExtractor (ExtractorConfig config , ExtractorState state ) {
50
- return new HTMLExtractor (config );
52
+ return new HTMLExtractor (config , state );
51
53
}
52
54
53
55
@ Override
@@ -293,7 +295,7 @@ private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
293
295
294
296
@ Override
295
297
public IExtractor mkExtractor (ExtractorConfig config , ExtractorState state ) {
296
- return new TypeScriptExtractor (config , state . getTypeScriptParser () );
298
+ return new TypeScriptExtractor (config , state );
297
299
}
298
300
299
301
@ Override
@@ -398,6 +400,10 @@ public boolean supports(File f) {
398
400
399
401
/** @return the number of lines of code extracted, or {@code null} if the file was cached */
400
402
public Integer extract (File f , ExtractorState state ) throws IOException {
403
+ FileSnippet snippet = state .getSnippets ().get (f .toPath ());
404
+ if (snippet != null ) {
405
+ return this .extractSnippet (f .toPath (), snippet , state );
406
+ }
401
407
402
408
// populate source archive
403
409
String source = new WholeIO (config .getDefaultEncoding ()).strictread (f );
@@ -414,6 +420,25 @@ public Integer extract(File f, ExtractorState state) throws IOException {
414
420
return extractContents (f , fileLabel , source , locationManager , state );
415
421
}
416
422
423
+ /**
424
+ * Extract the contents of a file that is a snippet from another file.
425
+ *
426
+ * <p>A trap file will be derived from the snippet file, but its file label, source locations, and
427
+ * source archive entry are based on the original file.
428
+ */
429
+ private Integer extractSnippet (Path file , FileSnippet origin , ExtractorState state ) throws IOException {
430
+ TrapWriter trapwriter = outputConfig .getTrapWriterFactory ().mkTrapWriter (file .toFile ());
431
+
432
+ File originalFile = origin .getOriginalFile ().toFile ();
433
+ Label fileLabel = trapwriter .populateFile (originalFile );
434
+ LocationManager locationManager = new LocationManager (originalFile , trapwriter , fileLabel );
435
+ locationManager .setStart (origin .getLine (), origin .getColumn ());
436
+
437
+ String source = new WholeIO (config .getDefaultEncoding ()).strictread (file );
438
+
439
+ return extractContents (file .toFile (), fileLabel , source , locationManager , state );
440
+ }
441
+
417
442
/**
418
443
* Extract the contents of a file, potentially making use of cached information.
419
444
*
@@ -436,20 +461,20 @@ public Integer extract(File f, ExtractorState state) throws IOException {
436
461
* obviously, no caching is done in that scenario.
437
462
*/
438
463
private Integer extractContents (
439
- File f , Label fileLabel , String source , LocationManager locationManager , ExtractorState state )
464
+ File extractedFile , Label fileLabel , String source , LocationManager locationManager , ExtractorState state )
440
465
throws IOException {
441
466
ExtractionMetrics metrics = new ExtractionMetrics ();
442
467
metrics .startPhase (ExtractionPhase .FileExtractor_extractContents );
443
468
metrics .setLength (source .length ());
444
469
metrics .setFileLabel (fileLabel );
445
470
TrapWriter trapwriter = locationManager .getTrapWriter ();
446
- FileType fileType = getFileType (f );
471
+ FileType fileType = getFileType (extractedFile );
447
472
448
473
File cacheFile = null , // the cache file for this extraction
449
474
resultFile = null ; // the final result TRAP file for this extraction
450
475
451
476
if (bumpIdCounter (trapwriter )) {
452
- resultFile = outputConfig .getTrapWriterFactory ().getTrapFileFor (f );
477
+ resultFile = outputConfig .getTrapWriterFactory ().getTrapFileFor (extractedFile );
453
478
}
454
479
// check whether we can perform caching
455
480
if (resultFile != null && fileType .isTrapCachingAllowed ()) {
@@ -475,7 +500,7 @@ private Integer extractContents(
475
500
trapwriter = new CachingTrapWriter (cacheFile , resultFile );
476
501
bumpIdCounter (trapwriter );
477
502
// re-initialise the location manager, since it keeps a reference to the TRAP writer
478
- locationManager = new LocationManager (f , trapwriter , locationManager .getFileLabel ());
503
+ locationManager = new LocationManager (extractedFile , trapwriter , locationManager .getFileLabel ());
479
504
}
480
505
481
506
// now do the extraction itself
@@ -484,7 +509,7 @@ private Integer extractContents(
484
509
IExtractor extractor = fileType .mkExtractor (config , state );
485
510
TextualExtractor textualExtractor =
486
511
new TextualExtractor (
487
- trapwriter , locationManager , source , config .getExtractLines (), metrics );
512
+ trapwriter , locationManager , source , config .getExtractLines (), metrics , extractedFile );
488
513
LoCInfo loc = extractor .extract (textualExtractor );
489
514
int numLines = textualExtractor .getNumLines ();
490
515
int linesOfCode = loc .getLinesOfCode (), linesOfComments = loc .getLinesOfComments ();
0 commit comments