Skip to content

Commit fc71a64

Browse files
authored
Merge pull request github#6092 from RasmusWL/markupsafe-modeling
Python: Add `MarkupSafe` model
2 parents d8b017e + c270817 commit fc71a64

File tree

12 files changed

+367
-0
lines changed

12 files changed

+367
-0
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,4 @@ Python built-in support
178178
pycryptodome, Cryptography library
179179
pycryptodomex, Cryptography library
180180
rsa, Cryptography library
181+
MarkupSafe, Escaping Library
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the PyPI package `MarkupSafe`.

python/ql/src/semmle/python/Concepts.qll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,78 @@ module SqlExecution {
293293
}
294294
}
295295

296+
/**
297+
* A data-flow node that escapes meta-characters, which could be used to prevent
298+
* injection attacks.
299+
*
300+
* Extend this class to refine existing API models. If you want to model new APIs,
301+
* extend `Escaping::Range` instead.
302+
*/
303+
class Escaping extends DataFlow::Node {
304+
Escaping::Range range;
305+
306+
Escaping() {
307+
this = range and
308+
// escapes that don't have _both_ input/output defined are not valid
309+
exists(range.getAnInput()) and
310+
exists(range.getOutput())
311+
}
312+
313+
/** Gets an input that will be escaped. */
314+
DataFlow::Node getAnInput() { result = range.getAnInput() }
315+
316+
/** Gets the output that contains the escaped data. */
317+
DataFlow::Node getOutput() { result = range.getOutput() }
318+
319+
/**
320+
* Gets the context that this function escapes for, such as `html`, or `url`.
321+
*/
322+
string getKind() { result = range.getKind() }
323+
}
324+
325+
/** Provides a class for modeling new escaping APIs. */
326+
module Escaping {
327+
/**
328+
* A data-flow node that escapes meta-characters, which could be used to prevent
329+
* injection attacks.
330+
*
331+
* Extend this class to model new APIs. If you want to refine existing API models,
332+
* extend `Escaping` instead.
333+
*/
334+
abstract class Range extends DataFlow::Node {
335+
/** Gets an input that will be escaped. */
336+
abstract DataFlow::Node getAnInput();
337+
338+
/** Gets the output that contains the escaped data. */
339+
abstract DataFlow::Node getOutput();
340+
341+
/**
342+
* Gets the context that this function escapes for.
343+
*
344+
* While kinds are represented as strings, this should not be relied upon. Use the
345+
* predicates in the `Escaping` module, such as `getHtmlKind`.
346+
*/
347+
abstract string getKind();
348+
}
349+
350+
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
351+
string getHtmlKind() { result = "html" }
352+
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
353+
//
354+
// Technically it claims to escape for both HTML and XML, but for now we don't have
355+
// anything that relies on XML escaping, so I'm going to defer deciding whether they
356+
// should be the same kind, or whether they deserve to be treated differently.
357+
}
358+
359+
/**
360+
* An escape of a string so it can be safely included in
361+
* the body of an HTML element, for example, replacing `{}` in
362+
* `<p>{}</p>`.
363+
*/
364+
class HtmlEscaping extends Escaping {
365+
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
366+
}
367+
296368
/** Provides classes for modeling HTTP-related APIs. */
297369
module HTTP {
298370
import semmle.python.web.HttpConstants

python/ql/src/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ private import semmle.python.frameworks.Flask
1616
private import semmle.python.frameworks.Idna
1717
private import semmle.python.frameworks.Invoke
1818
private import semmle.python.frameworks.Jmespath
19+
private import semmle.python.frameworks.MarkupSafe
1920
private import semmle.python.frameworks.Multidict
2021
private import semmle.python.frameworks.Mysql
2122
private import semmle.python.frameworks.MySQLdb
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `MarkupSafe` PyPI package.
3+
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `MarkupSafe` PyPI package.
14+
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
15+
*/
16+
private module MarkupSafeModel {
17+
/**
18+
* Provides models for the `markupsafe.Markup` class
19+
*
20+
* See https://markupsafe.palletsprojects.com/en/2.0.x/escaping/#markupsafe.Markup.
21+
*/
22+
module Markup {
23+
/** Gets a reference to the `markupsafe.Markup` class. */
24+
API::Node classRef() {
25+
result = API::moduleImport("markupsafe").getMember("Markup")
26+
or
27+
result = API::moduleImport("flask").getMember("Markup")
28+
}
29+
30+
/**
31+
* A source of instances of `markupsafe.Markup`, extend this class to model new instances.
32+
*
33+
* This can include instantiations of the class, return values from function
34+
* calls, or a special parameter that will be set when functions are called by an external
35+
* library.
36+
*
37+
* Use the predicate `Markup::instance()` to get references to instances of `markupsafe.Markup`.
38+
*/
39+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
40+
41+
/** A direct instantiation of `markupsafe.Markup`. */
42+
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
43+
override CallNode node;
44+
45+
ClassInstantiation() { this = classRef().getACall() }
46+
}
47+
48+
/** Gets a reference to an instance of `markupsafe.Markup`. */
49+
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
50+
t.start() and
51+
result instanceof InstanceSource
52+
or
53+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
54+
}
55+
56+
/** Gets a reference to an instance of `markupsafe.Markup`. */
57+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
58+
59+
/** A string concatenation with a `markupsafe.Markup` involved. */
60+
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
61+
override BinaryExprNode node;
62+
63+
StringConcat() {
64+
node.getOp() instanceof Add and
65+
instance().asCfgNode() in [node.getLeft(), node.getRight()]
66+
}
67+
}
68+
69+
/** A string format with `markupsafe.Markup` as the format string. */
70+
class StringFormat extends Markup::InstanceSource, DataFlow::MethodCallNode {
71+
StringFormat() { this.calls(instance(), "format") }
72+
}
73+
74+
/** A %-style string format with `markupsafe.Markup` as the format string. */
75+
class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode {
76+
override BinaryExprNode node;
77+
78+
PercentStringFormat() {
79+
node.getOp() instanceof Mod and
80+
instance().asCfgNode() = node.getLeft()
81+
}
82+
}
83+
84+
/** Taint propagation for `markupsafe.Markup`. */
85+
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
86+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
87+
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
88+
}
89+
}
90+
}
91+
92+
/** Any escaping performed via the `markupsafe` package. */
93+
abstract private class MarkupSafeEscape extends Escaping::Range {
94+
override string getKind() {
95+
// TODO: this package claims to escape for both HTML and XML, but for now we don't
96+
// model XML.
97+
result = Escaping::getHtmlKind()
98+
}
99+
}
100+
101+
/** A call to any of the escaping functions in `markupsafe` */
102+
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
103+
DataFlow::CallCfgNode {
104+
MarkupSafeEscapeCall() {
105+
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
106+
or
107+
this = Markup::classRef().getMember("escape").getACall()
108+
or
109+
this = API::moduleImport("flask").getMember("escape").getACall()
110+
}
111+
112+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
113+
114+
override DataFlow::Node getOutput() { result = this }
115+
}
116+
117+
/**
118+
* An escape from string concatenation with a `markupsafe.Markup` involved.
119+
*
120+
* Only things that are not already a `markupsafe.Markup` instances will be escaped.
121+
*/
122+
private class MarkupEscapeFromStringConcat extends MarkupSafeEscape, Markup::StringConcat {
123+
override DataFlow::Node getAnInput() {
124+
result.asCfgNode() in [node.getLeft(), node.getRight()] and
125+
not result = Markup::instance()
126+
}
127+
128+
override DataFlow::Node getOutput() { result = this }
129+
}
130+
131+
/** A escape from string format with `markupsafe.Markup` as the format string. */
132+
private class MarkupEscapeFromStringFormat extends MarkupSafeEscape, Markup::StringFormat {
133+
override DataFlow::Node getAnInput() {
134+
result in [this.getArg(_), this.getArgByName(_)] and
135+
not result = Markup::instance()
136+
}
137+
138+
override DataFlow::Node getOutput() { result = this }
139+
}
140+
141+
/** A escape from %-style string format with `markupsafe.Markup` as the format string. */
142+
private class MarkupEscapeFromPercentStringFormat extends MarkupSafeEscape,
143+
Markup::PercentStringFormat {
144+
override DataFlow::Node getAnInput() {
145+
result.asCfgNode() = node.getRight() and
146+
not result = Markup::instance()
147+
}
148+
149+
override DataFlow::Node getOutput() { result = this }
150+
}
151+
}

python/ql/src/semmle/python/security/dataflow/ReflectedXSS.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,13 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
2929
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
3030
guard instanceof StringConstCompare
3131
}
32+
33+
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
34+
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
35+
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
36+
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
37+
//
38+
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
39+
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
40+
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
3241
}

python/ql/test/experimental/meta/ConceptsTest.qll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,38 @@ class SqlExecutionTest extends InlineExpectationsTest {
129129
}
130130
}
131131

132+
class EscapingTest extends InlineExpectationsTest {
133+
EscapingTest() { this = "EscapingTest" }
134+
135+
override string getARelevantTag() { result in ["escapeInput", "escapeOutput", "escapeKind"] }
136+
137+
override predicate hasActualResult(Location location, string element, string tag, string value) {
138+
exists(location.getFile().getRelativePath()) and
139+
exists(Escaping esc |
140+
exists(DataFlow::Node data |
141+
location = data.getLocation() and
142+
element = data.toString() and
143+
value = prettyNodeForInlineTest(data) and
144+
(
145+
data = esc.getAnInput() and
146+
tag = "escapeInput"
147+
or
148+
data = esc.getOutput() and
149+
tag = "escapeOutput"
150+
)
151+
)
152+
or
153+
exists(string format |
154+
location = esc.getLocation() and
155+
element = format and
156+
value = format and
157+
format = esc.getKind() and
158+
tag = "escapeKind"
159+
)
160+
)
161+
}
162+
}
163+
132164
class HttpServerRouteSetupTest extends InlineExpectationsTest {
133165
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }
134166

python/ql/test/library-tests/frameworks/markupsafe/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
argumentToEnsureNotTaintedNotMarkedAsSpurious
2+
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
3+
failures

0 commit comments

Comments
 (0)