Skip to content

Commit c3f942f

Browse files
committed
Python: Provide internal InstanceTaintStepsHelper
I realized that if you ever wanted to the way taint-steps works again, you would have to go to all the 117 places it has been implemented, and change EVERY ONE OF THEM :( so trying to solve that problem here. Not super happy with the name, but that was just the best I could come up with :D
1 parent 6e9d9fc commit c3f942f

File tree

12 files changed

+448
-294
lines changed

12 files changed

+448
-294
lines changed

python/.vscode/ql.code-snippets

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -199,31 +199,51 @@
199199
" /**",
200200
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
201201
" */",
202+
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
203+
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
204+
" ",
205+
" override DataFlow::Node getInstance() { result = instance() }",
206+
" ",
207+
" override string getAttributeName() { none() }",
208+
" ",
209+
" override string getMethodName() { none() }",
210+
" ",
211+
" override string getAsyncMethodName() { none() }",
212+
" }",
213+
"",
214+
" /**",
215+
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
216+
" */",
202217
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
203218
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
204-
" // normal (non-async) methods",
205-
" nodeFrom = instance() and",
206-
" nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, [\"TODO\"])",
207-
" or",
208-
" // async methods",
209-
" exists(DataFlow::MethodCallNode call, Await await |",
210-
" nodeTo.asExpr() = await and",
211-
" nodeFrom = instance()",
212-
" |",
213-
" await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and",
214-
" call.calls(nodeFrom, [\"TODO\"])",
215-
" )",
216-
" or",
217-
" // Attributes",
218-
" nodeFrom = instance() and",
219-
" nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and",
220-
" nodeTo.(DataFlow::AttrRead).getAttributeName() in [\"TODO\"]",
219+
" // TODO",
220+
" none()",
221221
" }",
222222
" }",
223223
"}",
224224
],
225225
"description": "Type tracking class (select full class path before inserting)",
226226
},
227+
"foo": {
228+
"scope": "ql",
229+
"prefix": "foo",
230+
"body": [
231+
" /**",
232+
" * Taint propagation for `$1`.",
233+
" */",
234+
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
235+
" InstanceTaintSteps() { this = \"$1\" }",
236+
"",
237+
" override DataFlow::Node getInstance() { result = instance() }",
238+
"",
239+
" override string getAttributeName() { none() }",
240+
"",
241+
" override string getMethodName() { none() }",
242+
"",
243+
" override string getAsyncMethodName() { none() }",
244+
" }",
245+
],
246+
},
227247
"API graph .getMember chain": {
228248
"scope": "ql",
229249
"prefix": "api graph .getMember chain",

python/ql/src/semmle/python/frameworks/Aiohttp.qll

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ private import semmle.python.frameworks.internal.PoorMansFunctionResolution
1313
private import semmle.python.frameworks.internal.SelfRefMixin
1414
private import semmle.python.frameworks.Multidict
1515
private import semmle.python.frameworks.Yarl
16+
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
1617

1718
/**
1819
* INTERNAL: Do not use.
@@ -296,33 +297,25 @@ module AiohttpWebModel {
296297

297298
/**
298299
* Taint propagation for `aiohttp.web.Request`.
299-
*
300-
* See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
301300
*/
302-
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
303-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
304-
// normal (non-async) methods
305-
nodeFrom = Request::instance() and
306-
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["clone", "get_extra_info"])
307-
or
308-
// async methods
309-
exists(DataFlow::MethodCallNode call, Await await |
310-
nodeTo.asExpr() = await and
311-
nodeFrom = Request::instance()
312-
|
313-
await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and
314-
call.calls(nodeFrom, ["read", "text", "json", "multipart", "post"])
315-
)
316-
or
317-
// Attributes
318-
nodeFrom = Request::instance() and
319-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
320-
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
301+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
302+
InstanceTaintSteps() { this = "aiohttp.web.Request" }
303+
304+
override DataFlow::Node getInstance() { result = instance() }
305+
306+
override string getAttributeName() {
307+
result in [
321308
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
322309
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
323310
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
324311
]
325312
}
313+
314+
override string getMethodName() { result in ["clone", "get_extra_info"] }
315+
316+
override string getAsyncMethodName() {
317+
result in ["read", "text", "json", "multipart", "post"]
318+
}
326319
}
327320

328321
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
@@ -424,24 +417,20 @@ module AiohttpWebModel {
424417
/**
425418
* Taint propagation for `aiohttp.StreamReader`.
426419
*/
427-
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
428-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
429-
// normal (non-async) methods
430-
nodeFrom = instance() and
431-
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read_nowait"])
432-
or
433-
// async methods
434-
exists(DataFlow::MethodCallNode call, Await await |
435-
nodeTo.asExpr() = await and
436-
nodeFrom = instance()
437-
|
438-
await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and
439-
call.calls(nodeFrom,
440-
[
441-
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any",
442-
"iter_chunks"
443-
])
444-
)
420+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
421+
InstanceTaintSteps() { this = "aiohttp.StreamReader" }
422+
423+
override DataFlow::Node getInstance() { result = instance() }
424+
425+
override string getAttributeName() { none() }
426+
427+
override string getMethodName() { result in ["read_nowait"] }
428+
429+
override string getAsyncMethodName() {
430+
result in [
431+
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any",
432+
"iter_chunks"
433+
]
445434
}
446435
}
447436
}

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ private import semmle.python.frameworks.Stdlib
1414
private import semmle.python.regex
1515
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
1616
private import semmle.python.frameworks.internal.SelfRefMixin
17+
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
1718

1819
/**
1920
* Provides models for the `django` PyPI package.
@@ -340,19 +341,30 @@ private module Django {
340341
/**
341342
* Taint propagation for `django.utils.datastructures.MultiValueDict`.
342343
*/
344+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
345+
InstanceTaintSteps() { this = "django.utils.datastructures.MultiValueDict" }
346+
347+
override DataFlow::Node getInstance() { result = instance() }
348+
349+
override string getAttributeName() { none() }
350+
351+
override string getMethodName() {
352+
result in ["getlist", "lists", "popitem", "dict", "urlencode"]
353+
}
354+
355+
override string getAsyncMethodName() { none() }
356+
}
357+
358+
/**
359+
* Extra taint propagation for `django.utils.datastructures.MultiValueDict`, not covered by `InstanceTaintSteps`.
360+
*/
343361
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
344362
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
345363
// class instantiation
346364
exists(ClassInstantiation call |
347365
nodeFrom = call.getArg(0) and
348366
nodeTo = call
349367
)
350-
or
351-
// normal (non-async) methods
352-
nodeFrom = instance() and
353-
nodeTo
354-
.(DataFlow::MethodCallNode)
355-
.calls(nodeFrom, ["getlist", "lists", "popitem", "dict", "urlencode"])
356368
}
357369
}
358370
}
@@ -388,15 +400,20 @@ private module Django {
388400
/**
389401
* Taint propagation for `django.core.files.uploadedfile.UploadedFile`.
390402
*/
391-
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
392-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
393-
// Attributes
394-
nodeFrom = instance() and
395-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
396-
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
403+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
404+
InstanceTaintSteps() { this = "django.core.files.uploadedfile.UploadedFile" }
405+
406+
override DataFlow::Node getInstance() { result = instance() }
407+
408+
override string getAttributeName() {
409+
result in [
397410
"content_type", "content_type_extra", "content_type_extra", "charset", "name", "file"
398411
]
399412
}
413+
414+
override string getMethodName() { none() }
415+
416+
override string getAsyncMethodName() { none() }
400417
}
401418

402419
/** A file-like object instance that originates from a `UploadedFile`. */
@@ -436,13 +453,16 @@ private module Django {
436453
/**
437454
* Taint propagation for `django.urls.ResolverMatch`.
438455
*/
439-
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
440-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
441-
// Attributes
442-
nodeFrom = instance() and
443-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
444-
nodeTo.(DataFlow::AttrRead).getAttributeName() in ["args", "kwargs"]
445-
}
456+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
457+
InstanceTaintSteps() { this = "django.urls.ResolverMatch" }
458+
459+
override DataFlow::Node getInstance() { result = instance() }
460+
461+
override string getAttributeName() { result in ["args", "kwargs"] }
462+
463+
override string getMethodName() { none() }
464+
465+
override string getAsyncMethodName() { none() }
446466
}
447467
}
448468
}
@@ -747,15 +767,43 @@ private module PrivateDjango {
747767
/**
748768
* Taint propagation for `django.http.request.HttpRequest`.
749769
*/
770+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
771+
InstanceTaintSteps() { this = "django.http.request.HttpRequest" }
772+
773+
override DataFlow::Node getInstance() { result = instance() }
774+
775+
override string getAttributeName() {
776+
result in [
777+
// str / bytes
778+
"body", "path", "path_info", "method", "encoding", "content_type",
779+
// django.http.QueryDict
780+
"GET", "POST",
781+
// dict[str, str]
782+
"content_params", "COOKIES",
783+
// dict[str, Any]
784+
"META",
785+
// HttpHeaders (case insensitive dict-like)
786+
"headers",
787+
// MultiValueDict[str, UploadedFile]
788+
"FILES",
789+
// django.urls.ResolverMatch
790+
"resolver_match"
791+
]
792+
// TODO: Handle that a HttpRequest is iterable
793+
}
794+
795+
override string getMethodName() {
796+
result in ["get_full_path", "get_full_path_info", "read", "readline", "readlines"]
797+
}
798+
799+
override string getAsyncMethodName() { none() }
800+
}
801+
802+
/**
803+
* Extra taint propagation for `django.http.request.HttpRequest`, not covered by `InstanceTaintSteps`.
804+
*/
750805
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
751806
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
752-
// normal (non-async) methods
753-
nodeFrom = django::http::request::HttpRequest::instance() and
754-
nodeTo
755-
.(DataFlow::MethodCallNode)
756-
.calls(nodeFrom,
757-
["get_full_path", "get_full_path_info", "read", "readline", "readlines"])
758-
or
759807
// special handling of the `build_absolute_uri` method, see
760808
// https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri
761809
exists(DataFlow::AttrRead attr, DataFlow::CallCfgNode call, DataFlow::Node instance |
@@ -775,27 +823,6 @@ private module PrivateDjango {
775823
nodeFrom = call.getArgByName("location")
776824
)
777825
)
778-
or
779-
// Attributes
780-
nodeFrom = django::http::request::HttpRequest::instance() and
781-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
782-
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
783-
// str / bytes
784-
"body", "path", "path_info", "method", "encoding", "content_type",
785-
// django.http.QueryDict
786-
"GET", "POST",
787-
// dict[str, str]
788-
"content_params", "COOKIES",
789-
// dict[str, Any]
790-
"META",
791-
// HttpHeaders (case insensitive dict-like)
792-
"headers",
793-
// MultiValueDict[str, UploadedFile]
794-
"FILES",
795-
// django.urls.ResolverMatch
796-
"resolver_match"
797-
]
798-
// TODO: Handle that a HttpRequest is iterable
799826
}
800827
}
801828

0 commit comments

Comments
 (0)