|
8 | 8 |
|
9 | 9 | private import python
|
10 | 10 | private import semmle.python.dataflow.new.DataFlow
|
| 11 | +private import semmle.python.dataflow.new.TaintTracking |
11 | 12 | private import semmle.python.Concepts
|
12 | 13 | private import semmle.python.ApiGraphs
|
13 | 14 | private import semmle.python.frameworks.data.ModelsAsData
|
@@ -68,16 +69,7 @@ module Lxml {
|
68 | 69 | */
|
69 | 70 | class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
|
70 | 71 | XPathCall() {
|
71 |
| - exists(API::Node parseResult | |
72 |
| - parseResult = |
73 |
| - etreeRef().getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]).getReturn() |
74 |
| - or |
75 |
| - // TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text> |
76 |
| - // but we don't really have a way to model that nicely. |
77 |
| - parseResult = etreeRef().getMember("XMLParser").getReturn().getMember("close").getReturn() |
78 |
| - | |
79 |
| - this = parseResult.getMember("xpath").getACall() |
80 |
| - ) |
| 72 | + this.(DataFlow::MethodCallNode).calls([Element::instance(), ElementTree::instance()], "xpath") |
81 | 73 | }
|
82 | 74 |
|
83 | 75 | override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] }
|
@@ -318,4 +310,145 @@ module Lxml {
|
318 | 310 |
|
319 | 311 | override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
320 | 312 | }
|
| 313 | + |
| 314 | + /** Provides models for instances of the `lxml.etree.Element` class. */ |
| 315 | + module Element { |
| 316 | + /** Gets a reference to the `Element` class. */ |
| 317 | + API::Node classRef() { result = etreeRef().getMember("Element") } |
| 318 | + |
| 319 | + abstract class InstanceSource extends DataFlow::LocalSourceNode { } |
| 320 | + |
| 321 | + /** Gets a reference to an `lxml.etree.Element` instance. */ |
| 322 | + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { |
| 323 | + t.start() and |
| 324 | + result instanceof InstanceSource |
| 325 | + or |
| 326 | + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) |
| 327 | + } |
| 328 | + |
| 329 | + /** Gets a reference to an `lxml.etree.Element` instance. */ |
| 330 | + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } |
| 331 | + |
| 332 | + /** An `Element` instantiated directly. */ |
| 333 | + private class ElementInstance extends InstanceSource { |
| 334 | + ElementInstance() { this = classRef().getACall() } |
| 335 | + } |
| 336 | + |
| 337 | + /** The result of a parse operation that returns an `Element`. */ |
| 338 | + private class ParseResult extends InstanceSource, DataFlow::MethodCallNode { |
| 339 | + ParseResult() { |
| 340 | + this.calls(XmlParser::instance(_), "close") |
| 341 | + or |
| 342 | + // TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element` |
| 343 | + this = etreeRef().getMember(["XML", "HTML", "fromstring", "fromstringlist"]).getACall() |
| 344 | + } |
| 345 | + } |
| 346 | + |
| 347 | + /** An element index such as `etree.parse(...)[0]` */ |
| 348 | + private class ElementIndex extends InstanceSource, DataFlow::Node { |
| 349 | + ElementIndex() { this.asExpr().(Subscript).getObject() = instance().asExpr() } |
| 350 | + } |
| 351 | + |
| 352 | + /** A call to a method on an `Element` that returns another `Element`. */ |
| 353 | + private class ElementMethod extends InstanceSource, DataFlow::MethodCallNode { |
| 354 | + ElementMethod() { |
| 355 | + // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others |
| 356 | + // an `Element` itself can be used as an iterator of its children. |
| 357 | + this.calls(instance(), ["find", "getnext", "getprevious", "getparent"]) |
| 358 | + } |
| 359 | + } |
| 360 | + |
| 361 | + /** A call to a method on an `ElementTree` that returns an `Element`. */ |
| 362 | + private class ElementTreeMethod extends InstanceSource, DataFlow::MethodCallNode { |
| 363 | + ElementTreeMethod() { this.calls(ElementTree::instance(), "getroot") } |
| 364 | + } |
| 365 | + |
| 366 | + /** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */ |
| 367 | + private class ElementTaintStep extends TaintTracking::AdditionalTaintStep { |
| 368 | + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { |
| 369 | + exists(DataFlow::MethodCallNode call | nodeTo = call and nodeFrom = instance() | |
| 370 | + call.calls(nodeFrom, |
| 371 | + // We don't consider sibling nodes to be tainted (getnext, getprevious, itersiblings) |
| 372 | + [ |
| 373 | + "cssselect", "find", "findall", "findtext", "get", "getchildren", "getiterator", |
| 374 | + "getparent", "getroottree", "items", "iter", "iterancestors", "iterchildren", |
| 375 | + "iterdescendants", "iterfind", "itertext", "keys", "values", "xpath" |
| 376 | + ]) |
| 377 | + ) |
| 378 | + or |
| 379 | + exists(DataFlow::AttrRead attr | nodeTo = attr and nodeFrom = instance() | |
| 380 | + attr.accesses(nodeFrom, ["attrib", "base", "nsmap", "tag", "tail", "text"]) |
| 381 | + ) |
| 382 | + } |
| 383 | + } |
| 384 | + } |
| 385 | + |
| 386 | + /** Provides models for instances of the `lxml.etree.ElementTree` class. */ |
| 387 | + module ElementTree { |
| 388 | + API::Node classRef() { result = etreeRef().getMember("ElementTree") } |
| 389 | + |
| 390 | + abstract class InstanceSource extends DataFlow::LocalSourceNode { } |
| 391 | + |
| 392 | + /** Gets a reference to an `lxml.etree.ElementTree` instance.` */ |
| 393 | + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { |
| 394 | + t.start() and |
| 395 | + result instanceof InstanceSource |
| 396 | + or |
| 397 | + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) |
| 398 | + } |
| 399 | + |
| 400 | + /** Gets a reference to an `lxml.etree.ElementTree` parsers instance. */ |
| 401 | + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } |
| 402 | + |
| 403 | + /** An `ElementTree` instantiated directly. */ |
| 404 | + private class ElementTreeInstance extends InstanceSource { |
| 405 | + ElementTreeInstance() { this = classRef().getACall() } |
| 406 | + } |
| 407 | + |
| 408 | + /** The result of a parst operation that returns an `ElementTree` */ |
| 409 | + private class ParseResult extends InstanceSource, DataFlow::MethodCallNode { |
| 410 | + ParseResult() { this = etreeRef().getMember("parse").getACall() } |
| 411 | + } |
| 412 | + |
| 413 | + /** A call to a method on an `Element` that returns another `Element`. */ |
| 414 | + private class ElementMethod extends InstanceSource, DataFlow::MethodCallNode { |
| 415 | + ElementMethod() { |
| 416 | + // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others |
| 417 | + // an `Element` itself can be used as an iterator of its children. |
| 418 | + this.calls(Element::instance(), "getroottree") |
| 419 | + } |
| 420 | + } |
| 421 | + |
| 422 | + /** An additional taint step from an `ElementTree` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree._ElementTree */ |
| 423 | + private class ElementTaintStep extends TaintTracking::AdditionalTaintStep { |
| 424 | + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { |
| 425 | + exists(DataFlow::MethodCallNode call | nodeTo = call and nodeFrom = instance() | |
| 426 | + call.calls(nodeFrom, |
| 427 | + [ |
| 428 | + "find", "findall", "findtext", "get", "getiterator", "getroot", "iter", "iterfind", |
| 429 | + "xpath" |
| 430 | + ]) |
| 431 | + ) |
| 432 | + or |
| 433 | + exists(DataFlow::AttrRead attr | nodeTo = attr and nodeFrom = instance() | |
| 434 | + attr.accesses(nodeFrom, "docinfo") |
| 435 | + ) |
| 436 | + } |
| 437 | + } |
| 438 | + } |
| 439 | + |
| 440 | + /** A call to serialise xml to a string */ |
| 441 | + private class XmlEncoding extends Encoding::Range, DataFlow::CallCfgNode { |
| 442 | + XmlEncoding() { this = etreeRef().getMember("tostring").getACall() } |
| 443 | + |
| 444 | + override DataFlow::Node getAnInput() { |
| 445 | + result = [this.getArg(0), this.getArgByName("element_or_tree")] |
| 446 | + } |
| 447 | + |
| 448 | + override DataFlow::Node getOutput() { result = this } |
| 449 | + |
| 450 | + override string getFormat() { result = "XML" } |
| 451 | + } |
| 452 | + // TODO: ElementTree.write can write to a file-like object; should that be a flow step? |
| 453 | + // It also can accept a filepath which could be a path injection sink. |
321 | 454 | }
|
0 commit comments