@@ -69,6 +69,8 @@ module Lxml {
69
69
*/
70
70
class XPathCall extends XML:: XPathExecution:: Range , DataFlow:: CallCfgNode {
71
71
XPathCall ( ) {
72
+ // TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
73
+ // but we don't really have a way to model that nicely.
72
74
this = [ Element:: instance ( ) , ElementTree:: instance ( ) ] .getMember ( "xpath" ) .getACall ( )
73
75
}
74
76
@@ -201,9 +203,10 @@ module Lxml {
201
203
* A call to either of:
202
204
* - `lxml.etree.fromstring`
203
205
* - `lxml.etree.fromstringlist`
204
- * -
206
+ * - `lxml.etree.HTML`
205
207
* - `lxml.etree.XML`
206
208
* - `lxml.etree.XMLID`
209
+ * - `lxml.etree.XMLDTDID`
207
210
* - `lxml.etree.parse`
208
211
* - `lxml.etree.parseid`
209
212
*
@@ -329,7 +332,7 @@ module Lxml {
329
332
* calls, or a special parameter that will be set when functions are called by an external
330
333
* library.
331
334
*
332
- * Use the predicate `Element::instance()` to get references to instances of `lxml.etree.ElementTree ` instances.
335
+ * Use the predicate `Element::instance()` to get references to instances of `lxml.etree.Element ` instances.
333
336
*/
334
337
abstract class InstanceSource instanceof API:: Node {
335
338
/** Gets a textual representation of this element. */
@@ -354,7 +357,8 @@ module Lxml {
354
357
etreeRef ( ) .getMember ( "get_default_parser" ) .getReturn ( )
355
358
] .getMember ( "close" ) .getReturn ( )
356
359
or
357
- // TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element`
360
+ // TODO: `XMLID`, `XMLDTDID`, `parseid` returns a tuple of which the first element is an `Element`.
361
+ // `iterparse` returns an iterator of tuples, each of which has a second element that is an `Element`.
358
362
this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getReturn ( )
359
363
}
360
364
}
@@ -393,15 +397,18 @@ module Lxml {
393
397
}
394
398
}
395
399
396
- /** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */
400
+ /**
401
+ * An additional taint step from an `Element` instance.
402
+ * See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase.
403
+ */
397
404
private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
398
405
override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
399
406
exists ( DataFlow:: MethodCallNode call |
400
407
nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
401
408
|
402
409
call .calls ( nodeFrom ,
403
- // We consider a node to be tainted if there could be taint anywhere in the element tree
404
- // So sibling nodes (e.g. `getnext`) are also tainted
410
+ // We consider a node to be tainted if there could be taint anywhere in the element tree;
411
+ // So sibling nodes (e.g. `getnext`) are also tainted.
405
412
// This ensures nodes like `elem[0].getnext()` are tracked.
406
413
[
407
414
"cssselect" , "find" , "findall" , "findtext" , "get" , "getchildren" , "getiterator" ,
@@ -445,7 +452,7 @@ module Lxml {
445
452
ElementTreeInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
446
453
}
447
454
448
- /** The result of a parst operation that returns an `ElementTree` */
455
+ /** The result of a parst operation that returns an `ElementTree`. */
449
456
private class ParseResult extends InstanceSource {
450
457
ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getReturn ( ) }
451
458
}
0 commit comments