Skip to content

Commit c5b471c

Browse files
authored
Semgrep defused xml codemod (#705)
* do not short circuit on entire tre * new semgrep use defusdxml * update docs * fix description
1 parent 0105c7d commit c5b471c

File tree

6 files changed

+136
-6
lines changed

6 files changed

+136
-6
lines changed

src/codemodder/codemods/import_modifier_codemod.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,6 @@ def mapping(self) -> Mapping[str, str]:
5252
pass
5353

5454
def transform_module_impl(self, tree: cst.Module) -> cst.Module:
55-
if not self.node_is_selected(tree):
56-
return tree
57-
5855
visitor = MappingImportedCallModifier(
5956
self.context,
6057
self.file_context,

src/codemodder/scripts/generate_docs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ class DocMetadata:
325325
SEMGREP_CODEMOD_NAMES = [
326326
"enable-jinja2-autoescape",
327327
"jwt-decode-verify",
328+
"use-defusedxml",
328329
]
329330
SEMGREP_CODEMODS = {
330331
name: DocMetadata(

src/core_codemods/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
from .secure_random import SecureRandom
5757
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
5858
from .semgrep.semgrep_jwt_decode_verify import SemgrepJwtDecodeVerify
59+
from .semgrep.semgrep_use_defused_xml import SemgrepUseDefusedXml
5960
from .sonar.sonar_break_or_continue_out_of_loop import SonarBreakOrContinueOutOfLoop
6061
from .sonar.sonar_disable_graphql_introspection import SonarDisableGraphQLIntrospection
6162
from .sonar.sonar_django_json_response_type import SonarDjangoJsonResponseType
@@ -200,5 +201,6 @@
200201
codemods=[
201202
SemgrepEnableJinja2Autoescape,
202203
SemgrepJwtDecodeVerify,
204+
SemgrepUseDefusedXml,
203205
],
204206
)

src/core_codemods/semgrep/api.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1+
from codemodder.codemods.api import SimpleCodemod
12
from codemodder.codemods.base_codemod import Metadata, Reference, ToolMetadata, ToolRule
23
from codemodder.codemods.base_transformer import BaseTransformerPipeline
4+
from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline
35
from codemodder.codemods.semgrep import SemgrepSarifFileDetector
46
from core_codemods.api.core_codemod import CoreCodemod, SASTCodemod
57

68

9+
def semgrep_url_from_id(rule_id: str) -> str:
10+
return f"https://semgrep.dev/r?q={rule_id}"
11+
12+
713
class SemgrepCodemod(SASTCodemod):
814
@property
915
def origin(self):
@@ -18,14 +24,18 @@ def from_core_codemod(
1824
rule_name: str,
1925
transformer: BaseTransformerPipeline | None = None,
2026
):
21-
rule_url = f"https://semgrep.dev/r?q={rule_id}"
2227
return SemgrepCodemod(
2328
metadata=Metadata(
2429
name=name,
2530
summary=other.summary,
2631
review_guidance=other._metadata.review_guidance,
2732
references=(
28-
other.references + [Reference(url=rule_url, description=rule_name)]
33+
other.references
34+
+ [
35+
Reference(
36+
url=semgrep_url_from_id(rule_id), description=rule_name
37+
)
38+
]
2939
),
3040
description=other.description,
3141
tool=ToolMetadata(
@@ -34,7 +44,7 @@ def from_core_codemod(
3444
ToolRule(
3545
id=rule_id,
3646
name=rule_name,
37-
url=rule_url,
47+
url=semgrep_url_from_id(rule_id),
3848
)
3949
],
4050
),
@@ -43,3 +53,42 @@ def from_core_codemod(
4353
detector=SemgrepSarifFileDetector(),
4454
requested_rules=[rule_id],
4555
)
56+
57+
@classmethod
58+
def from_import_modifier_codemod(
59+
cls,
60+
name: str,
61+
other: type[SimpleCodemod],
62+
rule_id: str,
63+
rule_name: str,
64+
):
65+
metadata = other.metadata
66+
return SemgrepCodemod(
67+
metadata=Metadata(
68+
name=name,
69+
summary=metadata.summary,
70+
review_guidance=metadata.review_guidance,
71+
references=(
72+
metadata.references
73+
+ [
74+
Reference(
75+
url=semgrep_url_from_id(rule_id), description=rule_name
76+
)
77+
]
78+
),
79+
description=other.change_description,
80+
tool=ToolMetadata(
81+
name="Semgrep",
82+
rules=[
83+
ToolRule(
84+
id=rule_id,
85+
name=rule_name,
86+
url=semgrep_url_from_id(rule_id),
87+
)
88+
],
89+
),
90+
),
91+
transformer=LibcstTransformerPipeline(other),
92+
detector=SemgrepSarifFileDetector(),
93+
requested_rules=[rule_id],
94+
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from core_codemods.semgrep.api import SemgrepCodemod
2+
from core_codemods.use_defused_xml import UseDefusedXml
3+
4+
SemgrepUseDefusedXml = SemgrepCodemod.from_import_modifier_codemod(
5+
name="use-defusedxml",
6+
other=UseDefusedXml,
7+
rule_id="python.lang.security.use-defused-xml-parse.use-defused-xml-parse",
8+
rule_name="use-defused-xml-parse",
9+
)
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import json
2+
3+
import mock
4+
5+
from codemodder.codemods.test import BaseSASTCodemodTest
6+
from codemodder.dependency import DefusedXML
7+
from core_codemods.semgrep.semgrep_use_defused_xml import SemgrepUseDefusedXml
8+
9+
10+
class TestSemgrepUseDefusedXml(BaseSASTCodemodTest):
11+
codemod = SemgrepUseDefusedXml
12+
tool = "semgrep"
13+
14+
def test_name(self):
15+
assert self.codemod.name == "use-defusedxml"
16+
17+
@mock.patch("codemodder.codemods.api.FileContext.add_dependency")
18+
def test_etree_parse(self, add_dependency, tmpdir):
19+
original_code = """\
20+
from xml.etree.ElementTree import parse
21+
22+
et = parse(user_input)
23+
"""
24+
25+
new_code = """\
26+
import defusedxml.ElementTree
27+
28+
et = defusedxml.ElementTree.parse(user_input)
29+
"""
30+
31+
results = {
32+
"runs": [
33+
{
34+
"results": [
35+
{
36+
"fingerprints": {"matchBasedId/v1": "123"},
37+
"locations": [
38+
{
39+
"physicalLocation": {
40+
"artifactLocation": {
41+
"uri": "code.py",
42+
"uriBaseId": "%SRCROOT%",
43+
},
44+
"region": {
45+
"endColumn": 23,
46+
"endLine": 3,
47+
"snippet": {
48+
"text": "et = parse(user_input)"
49+
},
50+
"startColumn": 6,
51+
"startLine": 3,
52+
},
53+
}
54+
}
55+
],
56+
"message": {
57+
"text": 'The native Python `xml` library is vulnerable to XML External Entity (XXE) attacks. These attacks can leak confidential data and "XML bombs" can cause denial of service. Do not use this library to parse untrusted input. Instead the Python documentation recommends using `defusedxml`.'
58+
},
59+
"ruleId": "python.lang.security.use-defused-xml-parse.use-defused-xml-parse",
60+
}
61+
]
62+
}
63+
]
64+
}
65+
66+
self.run_and_assert(
67+
tmpdir,
68+
original_code,
69+
new_code,
70+
results=json.dumps(results),
71+
)
72+
add_dependency.assert_called_once_with(DefusedXML)

0 commit comments

Comments
 (0)