Skip to content

Commit 9082972

Browse files
authored
Merge pull request github#16061 from RasmusWL/js-extractor-fix
JS: More robust CommonJS/ES2015 detection logic for extractor
2 parents 116873c + 64321b3 commit 9082972

18 files changed

+581
-275
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package com.semmle.js.extractor;
2+
3+
import com.semmle.js.ast.AssignmentExpression;
4+
import com.semmle.js.ast.BlockStatement;
5+
import com.semmle.js.ast.CallExpression;
6+
import com.semmle.js.ast.Expression;
7+
import com.semmle.js.ast.ExpressionStatement;
8+
import com.semmle.js.ast.IFunction;
9+
import com.semmle.js.ast.Identifier;
10+
import com.semmle.js.ast.IfStatement;
11+
import com.semmle.js.ast.MemberExpression;
12+
import com.semmle.js.ast.Node;
13+
import com.semmle.js.ast.ParenthesizedExpression;
14+
import com.semmle.js.ast.Program;
15+
import com.semmle.js.ast.Statement;
16+
import com.semmle.js.ast.TryStatement;
17+
import com.semmle.js.ast.UnaryExpression;
18+
import com.semmle.js.ast.VariableDeclaration;
19+
import com.semmle.js.ast.VariableDeclarator;
20+
import java.util.List;
21+
22+
/**
23+
* A utility base class for running detection logic on statements/expressions by
24+
* visiting each node. It performs recursive decent into assignment expressions and
25+
* callee expressions for call-expressions (to handle detection of `foo` in `foo()()`)
26+
* */
27+
public abstract class AbstractDetector {
28+
protected boolean programDetection(Node ast) {
29+
if (!(ast instanceof Program)) return false;
30+
31+
return visitStatements(((Program) ast).getBody());
32+
}
33+
34+
protected boolean visitStatements(List<Statement> stmts) {
35+
for (Statement stmt : stmts) if (visitStatement(stmt)) return true;
36+
return false;
37+
}
38+
39+
protected boolean visitStatement(Statement stmt) {
40+
if (stmt instanceof ExpressionStatement) {
41+
Expression e = stripParens(((ExpressionStatement) stmt).getExpression());
42+
43+
// check whether `e` is an iife; if so, recursively check its body
44+
45+
// strip off unary operators to handle `!function(){...}()`
46+
if (e instanceof UnaryExpression) e = ((UnaryExpression) e).getArgument();
47+
48+
if (e instanceof CallExpression && ((CallExpression) e).getArguments().isEmpty()) {
49+
Expression callee = stripParens(((CallExpression) e).getCallee());
50+
if (callee instanceof IFunction) {
51+
Node body = ((IFunction) callee).getBody();
52+
if (body instanceof BlockStatement)
53+
return visitStatements(((BlockStatement) body).getBody());
54+
}
55+
}
56+
57+
if (visitExpression(e)) return true;
58+
59+
} else if (stmt instanceof VariableDeclaration) {
60+
for (VariableDeclarator decl : ((VariableDeclaration) stmt).getDeclarations()) {
61+
Expression init = stripParens(decl.getInit());
62+
if (visitExpression(init)) return true;
63+
}
64+
65+
} else if (stmt instanceof TryStatement) {
66+
return visitStatement(((TryStatement) stmt).getBlock());
67+
68+
} else if (stmt instanceof BlockStatement) {
69+
return visitStatements(((BlockStatement) stmt).getBody());
70+
71+
} else if (stmt instanceof IfStatement) {
72+
IfStatement is = (IfStatement) stmt;
73+
return visitStatement(is.getConsequent())
74+
|| visitStatement(is.getAlternate());
75+
}
76+
77+
return false;
78+
}
79+
80+
private static Expression stripParens(Expression e) {
81+
if (e instanceof ParenthesizedExpression)
82+
return stripParens(((ParenthesizedExpression) e).getExpression());
83+
return e;
84+
}
85+
86+
/**
87+
* Recursively check {@code e} if it's a call or an assignment.
88+
*/
89+
protected boolean visitExpression(Expression e) {
90+
if (e instanceof CallExpression) {
91+
CallExpression call = (CallExpression) e;
92+
Expression callee = call.getCallee();
93+
// recurse, to handle things like `foo()()`
94+
if (visitExpression(callee)) return true;
95+
return false;
96+
} else if (e instanceof MemberExpression) {
97+
return visitExpression(((MemberExpression) e).getObject());
98+
} else if (e instanceof AssignmentExpression) {
99+
AssignmentExpression assgn = (AssignmentExpression) e;
100+
101+
// filter out compound assignments
102+
if (!"=".equals(assgn.getOperator())) return false;
103+
104+
return visitExpression(assgn.getRight());
105+
}
106+
return false;
107+
}
108+
109+
/** Is {@code e} an identifier with name {@code name}? */
110+
protected static boolean isIdentifier(Expression e, String name) {
111+
return e instanceof Identifier && name.equals(((Identifier) e).getName());
112+
}
113+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package com.semmle.js.extractor;
2+
3+
import com.semmle.js.ast.DynamicImport;
4+
import com.semmle.js.ast.ExportDeclaration;
5+
import com.semmle.js.ast.Expression;
6+
import com.semmle.js.ast.ImportDeclaration;
7+
import com.semmle.js.ast.Node;
8+
import com.semmle.js.ast.Statement;
9+
10+
/** A utility class for detecting Node.js code. */
11+
public class ES2015Detector extends AbstractDetector {
12+
/**
13+
* Is {@code ast} a program that uses ES2015 import/export code?
14+
*/
15+
public static boolean looksLikeES2015(Node ast) {
16+
return new ES2015Detector().programDetection(ast);
17+
}
18+
19+
@Override
20+
protected boolean visitStatement(Statement stmt) {
21+
if (stmt instanceof ImportDeclaration || stmt instanceof ExportDeclaration) {
22+
return true;
23+
}
24+
return super.visitStatement(stmt);
25+
}
26+
27+
@Override
28+
protected boolean visitExpression(Expression e) {
29+
if (e instanceof DynamicImport) {
30+
return true;
31+
}
32+
return super.visitExpression(e);
33+
}
34+
}

javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,26 @@ public Pair<Label, ParseResultInfo> extract(
5858

5959
JSParser.Result parserRes =
6060
JSParser.parse(config, sourceType, source, textualExtractor.getMetrics());
61+
62+
// Check if we guessed wrong with the regex in `establishSourceType`, (which could
63+
// happen due to a block-comment line starting with ' import').
64+
if (config.getSourceType() == SourceType.AUTO && sourceType != SourceType.SCRIPT) {
65+
boolean wrongGuess = false;
66+
67+
if (sourceType == SourceType.MODULE) {
68+
// check that we did see an import/export declaration
69+
wrongGuess = ES2015Detector.looksLikeES2015(parserRes.getAST()) == false;
70+
} else if (sourceType == SourceType.CLOSURE_MODULE ) {
71+
// TODO
72+
}
73+
74+
if (wrongGuess) {
75+
sourceType = SourceType.SCRIPT;
76+
parserRes =
77+
JSParser.parse(config, sourceType, source, textualExtractor.getMetrics());
78+
}
79+
}
80+
6181
return extract(textualExtractor, source, toplevelKind, scopeManager, sourceType, parserRes);
6282
}
6383

Lines changed: 7 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,132 +1,30 @@
11
package com.semmle.js.extractor;
22

33
import com.semmle.js.ast.AssignmentExpression;
4-
import com.semmle.js.ast.BlockStatement;
54
import com.semmle.js.ast.CallExpression;
65
import com.semmle.js.ast.Expression;
7-
import com.semmle.js.ast.ExpressionStatement;
8-
import com.semmle.js.ast.IFunction;
9-
import com.semmle.js.ast.Identifier;
10-
import com.semmle.js.ast.IfStatement;
116
import com.semmle.js.ast.MemberExpression;
127
import com.semmle.js.ast.Node;
13-
import com.semmle.js.ast.ParenthesizedExpression;
14-
import com.semmle.js.ast.Program;
15-
import com.semmle.js.ast.Statement;
16-
import com.semmle.js.ast.TryStatement;
17-
import com.semmle.js.ast.UnaryExpression;
18-
import com.semmle.js.ast.VariableDeclaration;
19-
import com.semmle.js.ast.VariableDeclarator;
20-
import java.util.List;
218

229
/** A utility class for detecting Node.js code. */
23-
public class NodeJSDetector {
10+
public class NodeJSDetector extends AbstractDetector {
2411
/**
2512
* Is {@code ast} a program that looks like Node.js code, that is, does it contain a top-level
26-
* {@code require} or an export?
13+
* {@code require} or an {@code module.exports = ...}/{@code exports = ...}?
2714
*/
2815
public static boolean looksLikeNodeJS(Node ast) {
29-
if (!(ast instanceof Program)) return false;
30-
31-
return hasToplevelRequireOrExport(((Program) ast).getBody());
32-
}
33-
34-
/**
35-
* Does this program contain a statement that looks like a Node.js {@code require} or an export?
36-
*
37-
* <p>We recursively traverse argument-less immediately invoked function expressions (i.e., no UMD
38-
* modules), but not loops or if statements.
39-
*/
40-
private static boolean hasToplevelRequireOrExport(List<Statement> stmts) {
41-
for (Statement stmt : stmts) if (hasToplevelRequireOrExport(stmt)) return true;
42-
return false;
16+
return new NodeJSDetector().programDetection(ast);
4317
}
4418

45-
private static boolean hasToplevelRequireOrExport(Statement stmt) {
46-
if (stmt instanceof ExpressionStatement) {
47-
Expression e = stripParens(((ExpressionStatement) stmt).getExpression());
48-
49-
// check whether `e` is an iife; if so, recursively check its body
50-
51-
// strip off unary operators to handle `!function(){...}()`
52-
if (e instanceof UnaryExpression) e = ((UnaryExpression) e).getArgument();
53-
54-
if (e instanceof CallExpression && ((CallExpression) e).getArguments().isEmpty()) {
55-
Expression callee = stripParens(((CallExpression) e).getCallee());
56-
if (callee instanceof IFunction) {
57-
Node body = ((IFunction) callee).getBody();
58-
if (body instanceof BlockStatement)
59-
return hasToplevelRequireOrExport(((BlockStatement) body).getBody());
60-
}
61-
}
62-
63-
if (isRequireCall(e) || isExport(e)) return true;
64-
65-
} else if (stmt instanceof VariableDeclaration) {
66-
for (VariableDeclarator decl : ((VariableDeclaration) stmt).getDeclarations()) {
67-
Expression init = stripParens(decl.getInit());
68-
if (isRequireCall(init) || isExport(init)) return true;
69-
}
70-
71-
} else if (stmt instanceof TryStatement) {
72-
return hasToplevelRequireOrExport(((TryStatement) stmt).getBlock());
73-
74-
} else if (stmt instanceof BlockStatement) {
75-
return hasToplevelRequireOrExport(((BlockStatement) stmt).getBody());
76-
77-
} else if (stmt instanceof IfStatement) {
78-
IfStatement is = (IfStatement) stmt;
79-
return hasToplevelRequireOrExport(is.getConsequent())
80-
|| hasToplevelRequireOrExport(is.getAlternate());
81-
}
82-
83-
return false;
84-
}
85-
86-
private static Expression stripParens(Expression e) {
87-
if (e instanceof ParenthesizedExpression)
88-
return stripParens(((ParenthesizedExpression) e).getExpression());
89-
return e;
90-
}
91-
92-
/**
93-
* Is {@code e} a call to a function named {@code require} with one argument, or an assignment
94-
* whose right hand side is the result of such a call?
95-
*/
96-
private static boolean isRequireCall(Expression e) {
19+
@Override
20+
protected boolean visitExpression(Expression e) {
21+
// require('...')
9722
if (e instanceof CallExpression) {
9823
CallExpression call = (CallExpression) e;
9924
Expression callee = call.getCallee();
10025
if (isIdentifier(callee, "require") && call.getArguments().size() == 1) return true;
101-
if (isRequireCall(callee)) return true;
102-
return false;
103-
} else if (e instanceof MemberExpression) {
104-
return isRequireCall(((MemberExpression) e).getObject());
105-
} else if (e instanceof AssignmentExpression) {
106-
AssignmentExpression assgn = (AssignmentExpression) e;
107-
108-
// filter out compound assignments
109-
if (!"=".equals(assgn.getOperator())) return false;
110-
111-
return isRequireCall(assgn.getRight());
11226
}
113-
return false;
114-
}
11527

116-
/**
117-
* Does {@code e} look like a Node.js export?
118-
*
119-
* <p>Currently, three kinds of exports are recognised:
120-
*
121-
* <ul>
122-
* <li><code>exports.foo = ...</code>
123-
* <li><code>module.exports = ...</code>
124-
* <li><code>module.exports.foo = ...</code>
125-
* </ul>
126-
*
127-
* Detection is done recursively, so <code>foo = exports.foo = ...</code> is handled correctly.
128-
*/
129-
private static boolean isExport(Expression e) {
13028
if (e instanceof AssignmentExpression) {
13129
AssignmentExpression assgn = (AssignmentExpression) e;
13230

@@ -149,12 +47,9 @@ private static boolean isExport(Expression e) {
14947
if (isModuleExports(targetBase)) return true;
15048
}
15149
}
152-
153-
// recursively check right hand side
154-
return isExport(assgn.getRight());
15550
}
15651

157-
return false;
52+
return super.visitExpression(e);
15853
}
15954

16055
/** Is {@code me} a member expression {@code module.exports}? */
@@ -163,9 +58,4 @@ private static boolean isModuleExports(MemberExpression me) {
16358
&& isIdentifier(me.getObject(), "module")
16459
&& isIdentifier(me.getProperty(), "exports");
16560
}
166-
167-
/** Is {@code e} an identifier with name {@code name}? */
168-
private static boolean isIdentifier(Expression e, String name) {
169-
return e instanceof Identifier && name.equals(((Identifier) e).getName());
170-
}
17161
}

javascript/extractor/test/com/semmle/js/extractor/test/AllTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
@SuiteClasses({
1919
JSXTests.class,
2020
NodeJSDetectorTests.class,
21+
ES2015DetectorTests.class,
2122
TrapTests.class,
2223
ObjectRestSpreadTests.class,
2324
ClassPropertiesTests.class,

javascript/extractor/test/com/semmle/js/extractor/test/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ java_test(
1212
"TS_WRAPPER_ZIP": "$(rlocationpath //javascript/extractor/lib/typescript)",
1313
},
1414
test_class = "com.semmle.js.extractor.test.AllTests",
15+
# To use `replaceExpectedOutput` you need to uncomment the following line
16+
# (to be allowed to override the .trap files on disk)
17+
# tags = ["no-sandbox"],
1518
deps = [
1619
"//javascript/extractor",
1720
"//javascript/extractor:deps",

0 commit comments

Comments
 (0)