@@ -69,7 +69,7 @@ module Lxml {
69
69
*/
70
70
class XPathCall extends XML:: XPathExecution:: Range , DataFlow:: CallCfgNode {
71
71
XPathCall ( ) {
72
- this . ( DataFlow :: MethodCallNode ) . calls ( [ Element:: instance ( ) , ElementTree:: instance ( ) ] , "xpath" )
72
+ this = [ Element:: instance ( ) , ElementTree:: instance ( ) ] . getMember ( "xpath" ) . getACall ( )
73
73
}
74
74
75
75
override DataFlow:: Node getXPath ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "_path" ) ] }
@@ -201,6 +201,7 @@ module Lxml {
201
201
* A call to either of:
202
202
* - `lxml.etree.fromstring`
203
203
* - `lxml.etree.fromstringlist`
204
+ * -
204
205
* - `lxml.etree.XML`
205
206
* - `lxml.etree.XMLID`
206
207
* - `lxml.etree.parse`
@@ -209,23 +210,27 @@ module Lxml {
209
210
* See
210
211
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
211
212
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist
213
+ * - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.HTML
212
214
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML
213
215
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID
216
+ * - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLDTDID
214
217
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
215
218
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
216
219
*/
217
220
private class LxmlParsing extends DataFlow:: CallCfgNode , XML:: XmlParsing:: Range {
218
221
string functionName ;
219
222
220
223
LxmlParsing ( ) {
221
- functionName in [ "fromstring" , "fromstringlist" , "XML" , "XMLID" , "parse" , "parseid" ] and
224
+ functionName in [
225
+ "fromstring" , "fromstringlist" , "HTML" , "XML" , "XMLID" , "XMLDTDID" , "parse" , "parseid"
226
+ ] and
222
227
this = etreeRef ( ) .getMember ( functionName ) .getACall ( )
223
228
}
224
229
225
230
override DataFlow:: Node getAnInput ( ) {
226
231
result in [
227
232
this .getArg ( 0 ) ,
228
- // fromstring / XML / XMLID
233
+ // fromstring / HTML / XML / XMLID / XMLDTDID
229
234
this .getArgByName ( "text" ) ,
230
235
// fromstringlist
231
236
this .getArgByName ( "strings" ) ,
@@ -240,7 +245,8 @@ module Lxml {
240
245
this .getParserArg ( ) = XmlParser:: instanceVulnerableTo ( kind )
241
246
or
242
247
kind .isXxe ( ) and
243
- not exists ( this .getParserArg ( ) )
248
+ not exists ( this .getParserArg ( ) ) and
249
+ not functionName = "HTML"
244
250
}
245
251
246
252
override predicate mayExecuteInput ( ) { none ( ) }
@@ -314,78 +320,97 @@ module Lxml {
314
320
/** Provides models for instances of the `lxml.etree.Element` class. */
315
321
module Element {
316
322
/** Gets a reference to the `Element` class. */
317
- API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( "Element" ) }
318
-
319
- abstract class InstanceSource extends DataFlow:: LocalSourceNode { }
323
+ API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( [ "Element" , "_Element" ] ) }
320
324
321
- /** Gets a reference to an `lxml.etree.Element` instance. */
322
- private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t ) {
323
- t .start ( ) and
324
- result instanceof InstanceSource
325
- or
326
- exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 ) .track ( t2 , t ) )
325
+ abstract class InstanceSource instanceof API:: Node {
326
+ string toString ( ) { result = super .toString ( ) }
327
327
}
328
328
329
329
/** Gets a reference to an `lxml.etree.Element` instance. */
330
- DataFlow :: Node instance ( ) { instance ( DataFlow :: TypeTracker :: end ( ) ) . flowsTo ( result ) }
330
+ API :: Node instance ( ) { result instanceof InstanceSource }
331
331
332
332
/** An `Element` instantiated directly. */
333
333
private class ElementInstance extends InstanceSource {
334
- ElementInstance ( ) { this = classRef ( ) .getACall ( ) }
334
+ ElementInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
335
335
}
336
336
337
337
/** The result of a parse operation that returns an `Element`. */
338
- private class ParseResult extends InstanceSource , DataFlow :: MethodCallNode {
338
+ private class ParseResult extends InstanceSource {
339
339
ParseResult ( ) {
340
- this .calls ( XmlParser:: instance ( _) , "close" )
340
+ // TODO: The XmlParser module does not currently use API graphs
341
+ this =
342
+ [
343
+ etreeRef ( ) .getMember ( "XMLParser" ) .getAnInstance ( ) ,
344
+ etreeRef ( ) .getMember ( "get_default_parser" ) .getReturn ( )
345
+ ] .getMember ( "close" ) .getReturn ( )
341
346
or
342
347
// TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element`
343
- this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getACall ( )
348
+ this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getReturn ( )
344
349
}
345
350
}
346
351
347
- /** An element index such as `etree.parse(...)[0]` */
348
- private class ElementIndex extends InstanceSource , DataFlow:: Node {
349
- ElementIndex ( ) { this .asExpr ( ) .( Subscript ) .getObject ( ) = instance ( ) .asExpr ( ) }
350
- }
351
-
352
352
/** A call to a method on an `Element` that returns another `Element`. */
353
- private class ElementMethod extends InstanceSource , DataFlow :: MethodCallNode {
353
+ private class ElementMethod extends InstanceSource {
354
354
ElementMethod ( ) {
355
- // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others
356
- // an `Element` itself can be used as an iterator of its children.
357
- this .calls ( instance ( ) , [ "find" , "getnext" , "getprevious" , "getparent" ] )
355
+ // an Element is an iterator of Elements
356
+ this = instance ( ) .getASubscript ( )
357
+ or
358
+ // methods that return an Element
359
+ this = instance ( ) .getMember ( [ "find" , "getnext" , "getprevious" , "getparent" ] ) .getReturn ( )
360
+ or
361
+ // methods that return an iterator of Elements
362
+ this =
363
+ instance ( )
364
+ .getMember ( [
365
+ "cssselect" , "findall" , "getchildren" , "getiterator" , "iter" , "iterancestors" ,
366
+ "iterdecendants" , "iterchildren" , "itersiblings" , "iterfind" , "xpath"
367
+ ] )
368
+ .getReturn ( )
369
+ .getASubscript ( )
358
370
}
359
371
}
360
372
361
373
/** A call to a method on an `ElementTree` that returns an `Element`. */
362
- private class ElementTreeMethod extends InstanceSource , DataFlow:: MethodCallNode {
363
- ElementTreeMethod ( ) { this .calls ( ElementTree:: instance ( ) , "getroot" ) }
374
+ private class ElementTreeMethod extends InstanceSource {
375
+ ElementTreeMethod ( ) {
376
+ this = ElementTree:: instance ( ) .getMember ( [ "getroot" , "find" ] ) .getReturn ( )
377
+ or
378
+ this =
379
+ ElementTree:: instance ( )
380
+ .getMember ( [ "findall" , "getiterator" , "iter" , "iterfind" , "xpath" ] )
381
+ .getReturn ( )
382
+ .getASubscript ( )
383
+ }
364
384
}
365
385
366
386
/** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */
367
387
private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
368
388
override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
369
- exists ( DataFlow:: MethodCallNode call | nodeTo = call and nodeFrom = instance ( ) |
389
+ exists ( DataFlow:: MethodCallNode call |
390
+ nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
391
+ |
370
392
call .calls ( nodeFrom ,
371
- // We don't consider sibling nodes to be tainted (getnext, getprevious, itersiblings)
393
+ // We consider a node to be tainted if there could be taint anywhere in the element tree
394
+ // So sibling nodes (e.g. `getnext`) are also tainted
395
+ // This ensures nodes like `elem[0].getnext()` are tracked.
372
396
[
373
397
"cssselect" , "find" , "findall" , "findtext" , "get" , "getchildren" , "getiterator" ,
374
- "getparent" , "getroottree" , "items" , "iter" , "iterancestors" , "iterchildren" ,
375
- "iterdescendants" , "iterfind" , "itertext" , "keys" , "values" , "xpath"
398
+ "getnext" , "getparent" , "getprevious" , "getroottree" , "items" , "iter" ,
399
+ "iterancestors" , "iterchildren" , "iterdescendants" , "itersiblings" , "iterfind" ,
400
+ "itertext" , "keys" , "values" , "xpath"
376
401
] )
377
402
)
378
403
or
379
- exists ( DataFlow:: AttrRead attr | nodeTo = attr and nodeFrom = instance ( ) |
380
- attr .accesses ( nodeFrom , [ "attrib" , "base" , "nsmap" , "tag" , "tail" , "text" ] )
404
+ exists ( DataFlow:: AttrRead attr | nodeTo = attr and instance ( ) . asSource ( ) . flowsTo ( nodeFrom ) |
405
+ attr .accesses ( nodeFrom , [ "attrib" , "base" , "nsmap" , "prefix" , " tag", "tail" , "text" ] )
381
406
)
382
407
}
383
408
}
384
409
}
385
410
386
411
/** Provides models for instances of the `lxml.etree.ElementTree` class. */
387
412
module ElementTree {
388
- API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( "ElementTree" ) }
413
+ API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( [ "ElementTree" , "_ElementTree" ] ) }
389
414
390
415
/**
391
416
* A source of instances of `lxml.etree.ElementTree` instances, extend this class to model new instances.
@@ -396,50 +421,42 @@ module Lxml {
396
421
*
397
422
* Use the predicate `ElementTree::instance()` to get references to instances of `lxml.etree.ElementTree` instances.
398
423
*/
399
- abstract class InstanceSource extends DataFlow:: LocalSourceNode { }
400
-
401
- /** Gets a reference to an `lxml.etree.ElementTree` instance.` */
402
- private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t ) {
403
- t .start ( ) and
404
- result instanceof InstanceSource
405
- or
406
- exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 ) .track ( t2 , t ) )
424
+ abstract class InstanceSource instanceof API:: Node {
425
+ string toString ( ) { result = super .toString ( ) }
407
426
}
408
427
409
428
/** Gets a reference to an `lxml.etree.ElementTree` instance. */
410
- DataFlow :: Node instance ( ) { instance ( DataFlow :: TypeTracker :: end ( ) ) . flowsTo ( result ) }
429
+ API :: Node instance ( ) { result instanceof InstanceSource }
411
430
412
431
/** An `ElementTree` instantiated directly. */
413
432
private class ElementTreeInstance extends InstanceSource {
414
- ElementTreeInstance ( ) { this = classRef ( ) .getACall ( ) }
433
+ ElementTreeInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
415
434
}
416
435
417
436
/** The result of a parst operation that returns an `ElementTree` */
418
- private class ParseResult extends InstanceSource , DataFlow :: MethodCallNode {
419
- ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getACall ( ) }
437
+ private class ParseResult extends InstanceSource {
438
+ ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getReturn ( ) }
420
439
}
421
440
422
441
/** A call to a method on an `Element` that returns another `Element`. */
423
- private class ElementMethod extends InstanceSource , DataFlow:: MethodCallNode {
424
- ElementMethod ( ) {
425
- // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others
426
- // an `Element` itself can be used as an iterator of its children.
427
- this .calls ( Element:: instance ( ) , "getroottree" )
428
- }
442
+ private class ElementMethod extends InstanceSource {
443
+ ElementMethod ( ) { this = Element:: instance ( ) .getMember ( "getroottree" ) .getReturn ( ) }
429
444
}
430
445
431
446
/** An additional taint step from an `ElementTree` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree._ElementTree */
432
447
private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
433
448
override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
434
- exists ( DataFlow:: MethodCallNode call | nodeTo = call and nodeFrom = instance ( ) |
449
+ exists ( DataFlow:: MethodCallNode call |
450
+ nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
451
+ |
435
452
call .calls ( nodeFrom ,
436
453
[
437
454
"find" , "findall" , "findtext" , "get" , "getiterator" , "getroot" , "iter" , "iterfind" ,
438
455
"xpath"
439
456
] )
440
457
)
441
458
or
442
- exists ( DataFlow:: AttrRead attr | nodeTo = attr and nodeFrom = instance ( ) |
459
+ exists ( DataFlow:: AttrRead attr | nodeTo = attr and instance ( ) . asSource ( ) . flowsTo ( nodeFrom ) |
443
460
attr .accesses ( nodeFrom , "docinfo" )
444
461
)
445
462
}
0 commit comments