Skip to content

Commit fd23e0b

Browse files
committed
use more API nodes in XmlParsers, and recognize more results from parsing XML
1 parent 3b6cd0f commit fd23e0b

File tree

4 files changed

+139
-47
lines changed

4 files changed

+139
-47
lines changed

javascript/ql/src/semmle/javascript/ApiGraphs.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ private module Label {
924924
result = member(pn) and
925925
// only consider properties with alphanumeric(-ish) names, excluding special properties
926926
// and properties whose names look like they are meant to be internal
927-
pn.regexpMatch("(?!prototype$|__)[a-zA-Z_$][\\w\\-.$]*")
927+
pn.regexpMatch("(?!prototype$|__)[\\w_$][\\w\\-.$]*")
928928
)
929929
or
930930
not exists(pr.getPropertyName()) and

javascript/ql/src/semmle/javascript/frameworks/XmlParsers.qll

Lines changed: 110 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
* Provides classes for working with XML parser APIs.
33
*/
44

5-
import javascript as js
5+
private import javascript as js
6+
private import js::DataFlow as DataFlow
7+
private import js::API as API
68

79
module XML {
810
/**
@@ -12,9 +14,9 @@ module XML {
1214
/** Internal general entity. */
1315
InternalEntity() or
1416
/** External general entity, either parsed or unparsed. */
15-
ExternalEntity(boolean parsed) { parsed = true or parsed = false } or
17+
ExternalEntity(boolean parsed) { parsed = [true, false] } or
1618
/** Parameter entity, either internal or external. */
17-
ParameterEntity(boolean external) { external = true or external = false }
19+
ParameterEntity(boolean external) { external = [true, false] }
1820

1921
/**
2022
* A call to an XML parsing function.
@@ -27,16 +29,19 @@ module XML {
2729
abstract predicate resolvesEntities(EntityKind kind);
2830

2931
/** Gets a reference to a value resulting from parsing the XML. */
30-
js::DataFlow::Node getAResult() { none() }
32+
DataFlow::Node getAResult() { none() }
3133
}
3234

3335
/**
3436
* An invocation of `libxmljs.parseXml` or `libxmljs.parseXmlString`.
3537
*/
3638
class LibXmlJsParserInvocation extends ParserInvocation {
39+
API::CallNode call;
40+
3741
LibXmlJsParserInvocation() {
3842
exists(string m |
39-
this = js::DataFlow::moduleMember("libxmljs", m).getACall().asExpr() and
43+
call = API::moduleImport("libxmljs").getMember(m).getACall() and
44+
this = call.asExpr() and
4045
m.matches("parseXml%")
4146
)
4247
}
@@ -53,24 +58,67 @@ module XML {
5358
noent.mayHaveBooleanValue(true)
5459
)
5560
}
56-
}
5761

58-
/**
59-
* Gets a call to method `methodName` on an instance of class `className` from module `modName`.
60-
*/
61-
private js::MethodCallExpr moduleMethodCall(string modName, string className, string methodName) {
62-
exists(js::DataFlow::ModuleImportNode mod |
63-
mod.getPath() = modName and
64-
result = mod.getAConstructorInvocation(className).getAMethodCall(methodName).asExpr()
65-
)
62+
/**
63+
* A document from the `libxmljs` library.
64+
* The API is based on https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/libxmljs/index.d.ts
65+
*/
66+
private API::Node doc() {
67+
result = call.getReturn()
68+
or
69+
result = doc().getMember("encoding").getReturn()
70+
or
71+
result = element().getMember("doc").getReturn()
72+
or
73+
result = element().getMember("parent").getReturn()
74+
}
75+
76+
/**
77+
* An `Element` from the `libxmljs` library.
78+
*/
79+
private API::Node element() {
80+
result = doc().getMember(["child", "get", "node", "root"]).getReturn()
81+
or
82+
result = [doc(), element()].getMember(["childNodes", "find"]).getReturn().getAMember()
83+
or
84+
result =
85+
element()
86+
.getMember([
87+
"parent", "prevSibling", "nextSibling", "remove", "clone", "node", "child",
88+
"prevElement", "nextElement"
89+
])
90+
.getReturn()
91+
}
92+
93+
/**
94+
* An `Attr` from the `libxmljs` library.
95+
*/
96+
private API::Node attr() {
97+
result = element().getMember("attr").getReturn()
98+
or
99+
result = element().getMember("attrs").getReturn().getAMember()
100+
}
101+
102+
override DataFlow::Node getAResult() {
103+
result = [doc(), element(), attr()].getAnImmediateUse()
104+
or
105+
result = element().getMember(["name", "text"]).getACall()
106+
or
107+
result = attr().getMember(["name", "value"]).getACall()
108+
or
109+
result = element().getMember("namespace").getReturn().getMember(["href", "prefix"]).getACall()
110+
}
66111
}
67112

68113
/**
69114
* An invocation of `libxmljs.SaxParser.parseString`.
70115
*/
71116
class LibXmlJsSaxParserInvocation extends ParserInvocation {
117+
API::Node parser;
118+
72119
LibXmlJsSaxParserInvocation() {
73-
this = moduleMethodCall("libxmljs", "SaxParser", "parseString")
120+
parser = API::moduleImport("libxmljs").getMember("SaxParser").getInstance() and
121+
this = parser.getMember("parseString").getACall().asExpr()
74122
}
75123

76124
override js::Expr getSourceArgument() { result = getArgument(0) }
@@ -79,14 +127,21 @@ module XML {
79127
// entities are resolved by default
80128
any()
81129
}
130+
131+
override DataFlow::Node getAResult() {
132+
result = parser.getMember("on").getACall().getABoundCallbackParameter(1, _)
133+
}
82134
}
83135

84136
/**
85137
* An invocation of `libxmljs.SaxPushParser.push`.
86138
*/
87139
class LibXmlJsSaxPushParserInvocation extends ParserInvocation {
140+
API::Node parser;
141+
88142
LibXmlJsSaxPushParserInvocation() {
89-
this = moduleMethodCall("libxmljs", "SaxPushParser", "push")
143+
parser = API::moduleImport("libxmljs").getMember("SaxPushParser").getInstance() and
144+
this = parser.getMember("push").getACall().asExpr()
90145
}
91146

92147
override js::Expr getSourceArgument() { result = getArgument(0) }
@@ -95,17 +150,21 @@ module XML {
95150
// entities are resolved by default
96151
any()
97152
}
153+
154+
override DataFlow::Node getAResult() {
155+
result = parser.getMember("on").getACall().getABoundCallbackParameter(1, _)
156+
}
98157
}
99158

100159
/**
101160
* An invocation of `expat.Parser.parse` or `expat.Parser.write`.
102161
*/
103162
class ExpatParserInvocation extends ParserInvocation {
104-
js::DataFlow::NewNode parser;
163+
API::Node parser;
105164

106165
ExpatParserInvocation() {
107-
parser = js::DataFlow::moduleMember("node-expat", "Parser").getAnInstantiation() and
108-
this = parser.getAMemberCall(["parse", "write"]).asExpr()
166+
parser = API::moduleImport("node-expat").getMember("Parser").getInstance() and
167+
this = parser.getMember(["parse", "write"]).getACall().asExpr()
109168
}
110169

111170
override js::Expr getSourceArgument() { result = getArgument(0) }
@@ -115,8 +174,8 @@ module XML {
115174
kind = InternalEntity()
116175
}
117176

118-
override js::DataFlow::Node getAResult() {
119-
result = parser.getAMemberCall("on").getABoundCallbackParameter(1, _)
177+
override DataFlow::Node getAResult() {
178+
result = parser.getMember("on").getACall().getABoundCallbackParameter(1, _)
120179
}
121180
}
122181

@@ -125,26 +184,31 @@ module XML {
125184
*/
126185
private class DOMParserXmlParserInvocation extends XML::ParserInvocation {
127186
DOMParserXmlParserInvocation() {
128-
exists(js::DataFlow::GlobalVarRefNode domparser |
129-
domparser.getName() = "DOMParser" and
130-
this = domparser.getAnInstantiation().getAMethodCall("parseFromString").asExpr() and
131-
// type contains the string `xml`, that is, it's not `text/html`
132-
getArgument(1).mayHaveStringValue(any(string tp | tp.matches("%xml%")))
133-
)
187+
this =
188+
DataFlow::globalVarRef("DOMParser")
189+
.getAnInstantiation()
190+
.getAMethodCall("parseFromString")
191+
.asExpr() and
192+
// type contains the string `xml`, that is, it's not `text/html`
193+
getArgument(1).mayHaveStringValue(any(string tp | tp.matches("%xml%")))
134194
}
135195

136196
override js::Expr getSourceArgument() { result = getArgument(0) }
137197

138198
override predicate resolvesEntities(XML::EntityKind kind) { kind = InternalEntity() }
199+
200+
// The result is an XMLDocument (https://developer.mozilla.org/en-US/docs/Web/API/XMLDocument).
201+
// The API of the XMLDocument is not modelled.
202+
override DataFlow::Node getAResult() { result.asExpr() = this }
139203
}
140204

141205
/**
142206
* An invocation of `loadXML` on an IE legacy XML DOM or MSXML object.
143207
*/
144208
private class IELegacyXmlParserInvocation extends XML::ParserInvocation {
145209
IELegacyXmlParserInvocation() {
146-
exists(js::DataFlow::NewNode activeXObject, string activeXType |
147-
activeXObject = js::DataFlow::globalVarRef("ActiveXObject").getAnInstantiation() and
210+
exists(DataFlow::NewNode activeXObject, string activeXType |
211+
activeXObject = DataFlow::globalVarRef("ActiveXObject").getAnInstantiation() and
148212
activeXObject.getArgument(0).asExpr().mayHaveStringValue(activeXType) and
149213
activeXType.regexpMatch("Microsoft\\.XMLDOM|Msxml.*\\.DOMDocument.*") and
150214
this = activeXObject.getAMethodCall("loadXML").asExpr()
@@ -173,10 +237,10 @@ module XML {
173237
* An invocation of `xml2js`.
174238
*/
175239
private class Xml2JSInvocation extends XML::ParserInvocation {
176-
js::DataFlow::CallNode call;
240+
API::CallNode call;
177241

178242
Xml2JSInvocation() {
179-
exists(js::API::Node imp | imp = js::API::moduleImport("xml2js") |
243+
exists(API::Node imp | imp = API::moduleImport("xml2js") |
180244
call = [imp, imp.getMember("Parser").getInstance()].getMember("parseString").getACall() and
181245
this = call.asExpr()
182246
)
@@ -189,7 +253,7 @@ module XML {
189253
none()
190254
}
191255

192-
override js::DataFlow::Node getAResult() {
256+
override DataFlow::Node getAResult() {
193257
result = call.getABoundCallbackParameter(call.getNumArgument() - 1, 1)
194258
}
195259
}
@@ -198,10 +262,10 @@ module XML {
198262
* An invocation of `sax`.
199263
*/
200264
private class SaxInvocation extends XML::ParserInvocation {
201-
js::DataFlow::InvokeNode parser;
265+
API::InvokeNode parser;
202266

203267
SaxInvocation() {
204-
exists(js::API::Node imp | imp = js::API::moduleImport("sax") |
268+
exists(API::Node imp | imp = API::moduleImport("sax") |
205269
parser = imp.getMember("parser").getACall()
206270
or
207271
parser = imp.getMember("SAXParser").getAnInstantiation()
@@ -216,13 +280,13 @@ module XML {
216280
none()
217281
}
218282

219-
override js::DataFlow::Node getAResult() {
283+
override DataFlow::Node getAResult() {
220284
result =
221285
parser
222-
.getAPropertyWrite(any(string s | s.matches("on%")))
223-
.getRhs()
224-
.getAFunctionValue()
286+
.getReturn()
287+
.getMember(any(string s | s.matches("on%")))
225288
.getAParameter()
289+
.getAnImmediateUse()
226290
}
227291
}
228292

@@ -232,7 +296,8 @@ module XML {
232296
private class XmlJSInvocation extends XML::ParserInvocation {
233297
XmlJSInvocation() {
234298
this =
235-
js::DataFlow::moduleMember("xml-js", ["xml2json", "xml2js", "json2xml", "js2xml"])
299+
API::moduleImport("xml-js")
300+
.getMember(["xml2json", "xml2js", "json2xml", "js2xml"])
236301
.getACall()
237302
.asExpr()
238303
}
@@ -244,18 +309,18 @@ module XML {
244309
none()
245310
}
246311

247-
override js::DataFlow::Node getAResult() { result.asExpr() = this }
312+
override DataFlow::Node getAResult() { result.asExpr() = this }
248313
}
249314

250315
/**
251316
* An invocation of `htmlparser2`.
252317
*/
253318
private class HtmlParser2Invocation extends XML::ParserInvocation {
254-
js::DataFlow::NewNode parser;
319+
API::NewNode parser;
255320

256321
HtmlParser2Invocation() {
257-
parser = js::DataFlow::moduleMember("htmlparser2", "Parser").getAnInstantiation() and
258-
this = parser.getAMemberCall("write").asExpr()
322+
parser = API::moduleImport("htmlparser2").getMember("Parser").getAnInstantiation() and
323+
this = parser.getReturn().getMember("write").getACall().asExpr()
259324
}
260325

261326
override js::Expr getSourceArgument() { result = getArgument(0) }
@@ -265,7 +330,7 @@ module XML {
265330
none()
266331
}
267332

268-
override js::DataFlow::Node getAResult() {
333+
override DataFlow::Node getAResult() {
269334
result =
270335
parser
271336
.getArgument(0)
@@ -277,7 +342,7 @@ module XML {
277342
}
278343

279344
private class XMLParserTaintStep extends js::TaintTracking::SharedTaintStep {
280-
override predicate deserializeStep(js::DataFlow::Node pred, js::DataFlow::Node succ) {
345+
override predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) {
281346
exists(XML::ParserInvocation parser |
282347
pred.asExpr() = parser.getSourceArgument() and
283348
succ = parser.getAResult()

javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,7 @@ typeInferenceMismatch
166166
| xml.js:23:18:23:25 | source() | xml.js:20:14:20:17 | attr |
167167
| xml.js:26:27:26:34 | source() | xml.js:26:10:26:39 | convert ... (), {}) |
168168
| xml.js:34:18:34:25 | source() | xml.js:31:18:31:21 | name |
169+
| xml.js:41:15:41:22 | source() | xml.js:44:10:44:22 | gchild.text() |
170+
| xml.js:41:15:41:22 | source() | xml.js:49:10:49:34 | child.a ... value() |
171+
| xml.js:41:15:41:22 | source() | xml.js:52:10:52:34 | child2. ... .name() |
172+
| xml.js:41:15:41:22 | source() | xml.js:58:12:58:14 | str |

javascript/ql/test/library-tests/TaintTracking/xml.js

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,27 @@
3434
parser.write(source());
3535
parser.end();
3636

37-
})();
37+
})();
38+
39+
(function () {
40+
var libxml = require("libxmljs");
41+
var xml = source();
42+
var xmlDoc = libxml.parseXmlString(xml);
43+
var gchild = xmlDoc.get('//grandchild');
44+
sink(gchild.text()); // NOT OK
45+
46+
var children = xmlDoc.root().childNodes();
47+
var child = children[0];
48+
49+
sink(child.attr('foo').value()); // NOT OK
50+
51+
var child2 = xmlDoc.root().child()
52+
sink(child2.attr('foo').name()); // NOT OK
53+
54+
const SaxPushParser = libxml.SaxPushParser;
55+
var parser = new SaxPushParser();
56+
parser.push(xml);
57+
parser.on('characters', function (str) {
58+
sink(str); // NOT OK
59+
})
60+
});

0 commit comments

Comments
 (0)