Skip to content

Commit fb1badd

Browse files
clavedelunadrdavellapixeebot[bot]
authored
Semgrep Nan Injection codemod (#758)
* Semgrep Nan Injection codemod * report change for each line added * Apply suggestions from code review Co-authored-by: Dan D'Avella <[email protected]> * add unit tests for all 3 funcs * test more and handle nested cases * Hardening suggestions for codemodder-python / semgrep-nan-inj (#766) Use Assignment Expression (Walrus) In Conditional Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> * handle more generic cases * handle binop case --------- Co-authored-by: Dan D'Avella <[email protected]> Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
1 parent 1a6596b commit fb1badd

File tree

7 files changed

+640
-28
lines changed

7 files changed

+640
-28
lines changed

src/codemodder/codemods/libcst_transformer.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from codemodder.codemods.base_transformer import BaseTransformerPipeline
1010
from codemodder.codemods.base_visitor import BaseTransformer
1111
from codemodder.codemods.utils import get_call_name
12-
from codemodder.codetf import Change, ChangeSet
12+
from codemodder.codetf import Change, ChangeSet, Finding
1313
from codemodder.context import CodemodExecutionContext
1414
from codemodder.dependency import Dependency
1515
from codemodder.diff import create_diff_from_tree
@@ -103,14 +103,8 @@ def add_change(self, node, description: str, start: bool = True):
103103
def add_change_from_position(
104104
self, position: CodeRange, description: str, start: bool = True
105105
):
106-
lineno = position.start.line if start else position.end.line
107-
self.file_context.codemod_changes.append(
108-
Change(
109-
lineNumber=lineno,
110-
description=description,
111-
findings=self.file_context.get_findings_for_location(lineno),
112-
)
113-
)
106+
line_number = position.start.line if start else position.end.line
107+
self.report_change_for_line(line_number, description)
114108

115109
def lineno_for_node(self, node):
116110
return self.node_position(node).start.line
@@ -120,11 +114,20 @@ def add_dependency(self, dependency: Dependency):
120114

121115
def report_change(self, original_node, description: str | None = None):
122116
line_number = self.lineno_for_node(original_node)
117+
self.report_change_for_line(line_number, description)
118+
119+
def report_change_for_line(
120+
self,
121+
line_number,
122+
description: str | None = None,
123+
findings: list[Finding] | None = None,
124+
):
123125
self.file_context.codemod_changes.append(
124126
Change(
125127
lineNumber=line_number,
126128
description=description or self.change_description,
127-
findings=self.file_context.get_findings_for_location(line_number),
129+
findings=findings
130+
or self.file_context.get_findings_for_location(line_number),
128131
)
129132
)
130133

src/codemodder/scripts/generate_docs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,11 @@ class DocMetadata:
352352
guidance_explained="This codemod removes the `@csrf_exempt` decorator from a Django view to ensure it's protected against CSRF attacks. However, there are valid cases for using this decorator so make sure to review your application to determine if this is the case.",
353353
need_sarif="Yes (Semgrep)",
354354
),
355+
"nan-injection": DocMetadata(
356+
importance="Medium",
357+
guidance_explained="We believe that this codemod fixes an unsafe typecast call and that the changes are safe and reliable.",
358+
need_sarif="Yes (Semgrep)",
359+
),
355360
}
356361
ALL_CODEMODS_METADATA = (
357362
CORE_CODEMODS | DEFECTDOJO_CODEMODS | SONAR_CODEMODS | SEMGREP_CODEMODS

src/core_codemods/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
5959
from .semgrep.semgrep_harden_pyyaml import SemgrepHardenPyyaml
6060
from .semgrep.semgrep_jwt_decode_verify import SemgrepJwtDecodeVerify
61+
from .semgrep.semgrep_nan_injection import SemgrepNanInjection
6162
from .semgrep.semgrep_no_csrf_exempt import SemgrepNoCsrfExempt
6263
from .semgrep.semgrep_rsa_key_size import SemgrepRsaKeySize
6364
from .semgrep.semgrep_sql_parameterization import SemgrepSQLParameterization
@@ -216,5 +217,6 @@
216217
SemgrepHardenPyyaml,
217218
SemgrepRsaKeySize,
218219
SemgrepSQLParameterization,
220+
SemgrepNanInjection,
219221
],
220222
)

src/core_codemods/fix_assert_tuple.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
LibcstTransformerPipeline,
88
)
99
from codemodder.codemods.utils_mixin import NameResolutionMixin
10-
from codemodder.codetf import Change
1110
from core_codemods.api import Metadata, ReviewGuidance
1211
from core_codemods.api.core_codemod import CoreCodemod
1312

@@ -47,14 +46,7 @@ def _report_new_lines(
4746
):
4847
start_line = self.node_position(original_node).start.line
4948
for idx in range(newlines_count):
50-
self.file_context.codemod_changes.append(
51-
Change(
52-
lineNumber=(line_number := start_line + idx),
53-
description=self.change_description,
54-
# For now we can only link the finding to the first line changed
55-
findings=self.file_context.get_findings_for_location(line_number),
56-
)
57-
)
49+
self.report_change_for_line(start_line + idx)
5850

5951

6052
FixAssertTuple = CoreCodemod(
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from textwrap import dedent
2+
3+
import libcst as cst
4+
from libcst.codemod import ContextAwareVisitor
5+
6+
from codemodder.codemods.base_codemod import (
7+
Metadata,
8+
ReviewGuidance,
9+
ToolMetadata,
10+
ToolRule,
11+
)
12+
from codemodder.codemods.base_visitor import UtilsMixin
13+
from codemodder.codemods.libcst_transformer import (
14+
LibcstResultTransformer,
15+
LibcstTransformerPipeline,
16+
)
17+
from codemodder.codemods.semgrep import SemgrepSarifFileDetector
18+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
19+
20+
21+
class NanInjectionTransformer(LibcstResultTransformer):
22+
change_description = "Add validation to untrusted numerical input to disallow `nan`"
23+
24+
def leave_SimpleStatementLine(
25+
self,
26+
original_node: cst.SimpleStatementLine,
27+
updated_node: cst.SimpleStatementLine,
28+
):
29+
30+
visitor = MatchNodesInLineVisitor(
31+
self.context, file_context=self.file_context, results=self.results
32+
)
33+
original_node.body[0].visit(visitor)
34+
if visitor.matched_nodes:
35+
# For now only handle one matched Call node in a line
36+
return self.replace_with_if_else(
37+
visitor.matched_nodes[0], original_node, updated_node
38+
)
39+
return original_node
40+
41+
def replace_with_if_else(
42+
self,
43+
node: cst.Call,
44+
original_node: cst.SimpleStatementLine,
45+
updated_node: cst.SimpleStatementLine,
46+
):
47+
if not (target_node := self._get_target_in_call(node)):
48+
return original_node
49+
50+
code = dedent(
51+
f"""\
52+
if {self.code(target_node).strip()}.lower() == "nan":
53+
raise ValueError
54+
else:
55+
{self.code(original_node).strip()}
56+
"""
57+
)
58+
self._report_new_lines(original_node)
59+
new_statement = cst.parse_statement(code)
60+
return new_statement.with_changes(leading_lines=updated_node.leading_lines)
61+
62+
def _get_target_in_call(self, node: cst.Call) -> cst.CSTNode:
63+
match (wrapped_node := node.args[0].value):
64+
case cst.Name():
65+
# float(var)
66+
return wrapped_node
67+
68+
case cst.Call(
69+
func=cst.Name("float") | cst.Name("bool") | cst.Name("complex")
70+
):
71+
# bool(float(var)), complex(float(var)), bool(float(var)), etc
72+
return self._get_target_in_call(wrapped_node)
73+
case cst.Call() | cst.BinaryOperation():
74+
return wrapped_node
75+
76+
def _report_new_lines(self, original_node: cst.SimpleStatementLine):
77+
self.report_change(original_node)
78+
line_number = self.lineno_for_node(original_node)
79+
findings = self.file_context.get_findings_for_location(line_number)
80+
for lineno in range(line_number + 1, line_number + 4):
81+
self.report_change_for_line(lineno, findings=findings)
82+
83+
84+
class MatchNodesInLineVisitor(ContextAwareVisitor, UtilsMixin):
85+
"""Visit Call nodes and match if node location matches results."""
86+
87+
def __init__(
88+
self,
89+
context,
90+
file_context,
91+
results,
92+
) -> None:
93+
self.file_context = file_context
94+
ContextAwareVisitor.__init__(self, context)
95+
UtilsMixin.__init__(
96+
self,
97+
results=results,
98+
line_include=file_context.line_include,
99+
line_exclude=file_context.line_exclude,
100+
)
101+
102+
self.matched_nodes: list[cst.Call] = []
103+
104+
def visit_Call(self, node: cst.Call) -> None:
105+
if self.node_is_selected(node):
106+
self.matched_nodes.append(node)
107+
108+
109+
SemgrepNanInjection = SemgrepCodemod(
110+
metadata=Metadata(
111+
name="nan-injection",
112+
summary=NanInjectionTransformer.change_description.title(),
113+
description=NanInjectionTransformer.change_description.title(),
114+
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
115+
tool=ToolMetadata(
116+
name="Semgrep",
117+
rules=[
118+
ToolRule(
119+
id=(
120+
rule_id := "python.django.security.nan-injection.nan-injection"
121+
),
122+
name="nan-injection",
123+
url=semgrep_url_from_id(rule_id),
124+
)
125+
],
126+
),
127+
references=[],
128+
),
129+
transformer=LibcstTransformerPipeline(NanInjectionTransformer),
130+
detector=SemgrepSarifFileDetector(),
131+
requested_rules=[rule_id],
132+
)

src/core_codemods/sql_parameterization.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
infer_expression_type,
3232
)
3333
from codemodder.codemods.utils_mixin import NameAndAncestorResolutionMixin
34-
from codemodder.codetf import Change
3534
from codemodder.utils.clean_code import (
3635
NormalizeFStrings,
3736
RemoveEmptyExpressionsFormatting,
@@ -249,15 +248,10 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module:
249248
result = tree.visit(ReplaceNodes(new_changed_nodes))
250249
self.changed_nodes = {}
251250
line_number = self.get_metadata(PositionProvider, call).start.line
252-
self.file_context.codemod_changes.append(
253-
Change(
254-
lineNumber=line_number,
255-
description=SQLQueryParameterizationTransformer.change_description,
256-
findings=self.file_context.get_findings_for_location(
257-
line_number
258-
),
259-
)
251+
self.report_change_for_line(
252+
line_number, SQLQueryParameterizationTransformer.change_description
260253
)
254+
261255
# Normalization and cleanup
262256
result = CleanCode(self.context).transform_module(result)
263257

0 commit comments

Comments
 (0)