Skip to content

Commit cbedd77

Browse files
gnufedeavara1986
andauthored
fix(iast): ast patching side effect (#7638) [backport 2.0] (#7657)
IAST: This fix resolves an issue where, at AST patching to replace code with IAST aspects, the original function/method was passed as an extra parameter for accurate patching (i.e. check the function inside the aspect), and this strategy lead to unintentionally triggers side effects in methods obtained from an expression (like ``decode`` in ``file.read(n).decode()``), resulting in unexpected multiple calls to the expression (``file.read(n)`` in the example). - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library. - [ ] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [ ] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [ ] This PR doesn't touch any of that. --------- Co-authored-by: Alberto Vara <[email protected]> (cherry picked from commit 601fd6a)
1 parent a7decf4 commit cbedd77

File tree

9 files changed

+374
-60
lines changed

9 files changed

+374
-60
lines changed

ddtrace/appsec/_iast/_ast/visitor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,9 @@ def visit_Call(self, call_node): # type: (ast.Call) -> Any
443443
# Send 1 as flag_added_args value
444444
call_node.args.insert(0, self._int_constant(call_node, 1))
445445

446-
# Insert original method as first parameter (a.b.c.method)
447-
call_node.args = self._add_original_function_as_arg(call_node, False)
446+
# Insert None as first parameter instead of a.b.c.method
447+
# to avoid unexpected side effects such as a.b.read(4).method
448+
call_node.args.insert(0, self._none_constant(call_node))
448449

449450
# Create a new Name node for the replacement and set it as node.func
450451
call_node.func = self._attr_node(call_node, aspect)

ddtrace/appsec/_iast/_taint_tracking/aspects.py

Lines changed: 100 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,16 @@ def add_aspect(op1, op2):
5252

5353

5454
def str_aspect(orig_function, flag_added_args, *args, **kwargs):
55-
# type: (Callable, int, Any, Any) -> str
56-
if orig_function != builtin_str:
57-
if flag_added_args > 0:
58-
args = args[flag_added_args:]
59-
return orig_function(*args, **kwargs)
55+
# type: (Optional[Callable], int, Any, Any) -> str
56+
if orig_function:
57+
if orig_function != builtin_str:
58+
if flag_added_args > 0:
59+
args = args[flag_added_args:]
60+
return orig_function(*args, **kwargs)
61+
result = builtin_str(*args, **kwargs)
62+
else:
63+
result = args[0].str(*args[1:], **kwargs)
6064

61-
result = builtin_str(*args, **kwargs)
6265
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
6366
try:
6467
if isinstance(args[0], (bytes, bytearray)):
@@ -75,13 +78,16 @@ def str_aspect(orig_function, flag_added_args, *args, **kwargs):
7578

7679

7780
def bytes_aspect(orig_function, flag_added_args, *args, **kwargs):
78-
# type: (Callable, int, Any, Any) -> bytes
79-
if orig_function != builtin_bytes:
80-
if flag_added_args > 0:
81-
args = args[flag_added_args:]
82-
return orig_function(*args, **kwargs)
81+
# type: (Optional[Callable], int, Any, Any) -> bytes
82+
if orig_function:
83+
if orig_function != builtin_bytes:
84+
if flag_added_args > 0:
85+
args = args[flag_added_args:]
86+
return orig_function(*args, **kwargs)
87+
result = builtin_bytes(*args, **kwargs)
88+
else:
89+
result = args[0].bytes(*args[1:], **kwargs)
8390

84-
result = builtin_bytes(*args, **kwargs)
8591
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
8692
try:
8793
taint_pyobject_with_ranges(result, tuple(get_ranges(args[0])))
@@ -91,13 +97,16 @@ def bytes_aspect(orig_function, flag_added_args, *args, **kwargs):
9197

9298

9399
def bytearray_aspect(orig_function, flag_added_args, *args, **kwargs):
94-
# type: (Callable, int, Any, Any) -> bytearray
95-
if orig_function != builtin_bytearray:
96-
if flag_added_args > 0:
97-
args = args[flag_added_args:]
98-
return orig_function(*args, **kwargs)
100+
# type: (Optional[Callable], int, Any, Any) -> bytearray
101+
if orig_function:
102+
if orig_function != builtin_bytearray:
103+
if flag_added_args > 0:
104+
args = args[flag_added_args:]
105+
return orig_function(*args, **kwargs)
106+
result = builtin_bytearray(*args, **kwargs)
107+
else:
108+
result = args[0].bytearray(*args[1:], **kwargs)
99109

100-
result = builtin_bytearray(*args, **kwargs)
101110
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
102111
try:
103112
taint_pyobject_with_ranges(result, tuple(get_ranges(args[0])))
@@ -107,7 +116,9 @@ def bytearray_aspect(orig_function, flag_added_args, *args, **kwargs):
107116

108117

109118
def join_aspect(orig_function, flag_added_args, *args, **kwargs):
110-
# type: (Callable, int, Any, Any) -> Any
119+
# type: (Optional[Callable], int, Any, Any) -> Any
120+
if not orig_function:
121+
orig_function = args[0].join
111122
if not isinstance(orig_function, BuiltinFunctionType):
112123
if flag_added_args > 0:
113124
args = args[flag_added_args:]
@@ -125,16 +136,22 @@ def join_aspect(orig_function, flag_added_args, *args, **kwargs):
125136

126137

127138
def bytearray_extend_aspect(orig_function, flag_added_args, *args, **kwargs):
128-
# type: (Callable, int, Any, Any) -> Any
129-
if not isinstance(orig_function, BuiltinFunctionType):
139+
# type: (Optional[Callable], int, Any, Any) -> Any
140+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
130141
if flag_added_args > 0:
131142
args = args[flag_added_args:]
132143
return orig_function(*args, **kwargs)
133144

145+
if len(args) < 2:
146+
# If we're not receiving at least 2 arguments, means the call was
147+
# ``x.extend()`` and not ``x.extend(y)``
148+
# so either not the extend we're looking for, or no changes in taint ranges.
149+
return args[0].extend(*args[1:], **kwargs)
150+
134151
op1 = args[0]
135152
op2 = args[1]
136153
if not isinstance(op1, bytearray) or not isinstance(op2, (bytearray, bytes)):
137-
return op1.extend(op2)
154+
return op1.extend(*args[1:], **kwargs)
138155
try:
139156
return _extend_aspect(op1, op2)
140157
except Exception as e:
@@ -184,7 +201,9 @@ def build_string_aspect(*args): # type: (List[Any]) -> str
184201

185202

186203
def ljust_aspect(orig_function, flag_added_args, *args, **kwargs):
187-
# type: (Callable, int, Any, Any) -> Union[str, bytes, bytearray]
204+
# type: (Optional[Callable], int, Any, Any) -> Union[str, bytes, bytearray]
205+
if not orig_function:
206+
orig_function = args[0].ljust
188207
if not isinstance(orig_function, BuiltinFunctionType):
189208
if flag_added_args > 0:
190209
args = args[flag_added_args:]
@@ -217,8 +236,8 @@ def ljust_aspect(orig_function, flag_added_args, *args, **kwargs):
217236

218237

219238
def zfill_aspect(orig_function, flag_added_args, *args, **kwargs):
220-
# type: (Callable, int, Any, Any) -> Any
221-
if not isinstance(orig_function, BuiltinFunctionType):
239+
# type: (Optional[Callable], int, Any, Any) -> Any
240+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
222241
if flag_added_args > 0:
223242
args = args[flag_added_args:]
224243
return orig_function(*args, **kwargs)
@@ -260,11 +279,14 @@ def zfill_aspect(orig_function, flag_added_args, *args, **kwargs):
260279

261280

262281
def format_aspect(
263-
orig_function, # type: Callable
282+
orig_function, # type: Optional[Callable]
264283
flag_added_args, # type: int
265284
*args, # type: Any
266285
**kwargs, # type: Dict[str, Any]
267286
): # type: (...) -> str
287+
if not orig_function:
288+
orig_function = args[0].format
289+
268290
if not isinstance(orig_function, BuiltinFunctionType):
269291
if flag_added_args > 0:
270292
args = args[flag_added_args:]
@@ -310,8 +332,10 @@ def format_aspect(
310332
return result
311333

312334

313-
def format_map_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> str
314-
if not isinstance(orig_function, BuiltinFunctionType):
335+
def format_map_aspect(
336+
orig_function, flag_added_args, *args, **kwargs
337+
): # type: (Optional[Callable], int, Any, Any) -> str
338+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
315339
if flag_added_args > 0:
316340
args = args[flag_added_args:]
317341
return orig_function(*args, **kwargs)
@@ -455,12 +479,14 @@ def incremental_translation(self, incr_coder, funcode, empty):
455479

456480

457481
def decode_aspect(orig_function, flag_added_args, *args, **kwargs):
458-
if flag_added_args > 0:
459-
self = args[0]
460-
args = args[flag_added_args:]
461-
else:
482+
if orig_function and not flag_added_args:
483+
# This patch is unexpected, so we fallback
484+
# to executing the original function
462485
return orig_function(*args, **kwargs)
463486

487+
self = args[0]
488+
args = args[(flag_added_args or 1) :]
489+
# Assume we call decode method of the first argument
464490
result = self.decode(*args, **kwargs)
465491

466492
if not is_pyobject_tainted(self) or not isinstance(self, bytes):
@@ -475,12 +501,14 @@ def decode_aspect(orig_function, flag_added_args, *args, **kwargs):
475501

476502

477503
def encode_aspect(orig_function, flag_added_args, *args, **kwargs):
478-
if flag_added_args > 0:
479-
self = args[0]
480-
args = args[flag_added_args:]
481-
else:
504+
if orig_function and not flag_added_args:
505+
# This patch is unexpected, so we fallback
506+
# to executing the original function
482507
return orig_function(*args, **kwargs)
483508

509+
self = args[0]
510+
args = args[(flag_added_args or 1) :]
511+
# Assume we call encode method of the first argument
484512
result = self.encode(*args, **kwargs)
485513

486514
if not is_pyobject_tainted(self) or not isinstance(self, str):
@@ -494,8 +522,10 @@ def encode_aspect(orig_function, flag_added_args, *args, **kwargs):
494522
return result
495523

496524

497-
def upper_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
498-
if not isinstance(orig_function, BuiltinFunctionType):
525+
def upper_aspect(
526+
orig_function, flag_added_args, *args, **kwargs
527+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
528+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
499529
if flag_added_args > 0:
500530
args = args[flag_added_args:]
501531
return orig_function(*args, **kwargs)
@@ -512,17 +542,17 @@ def upper_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
512542
return candidate_text.upper(*args, **kwargs)
513543

514544

515-
def lower_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
516-
if not isinstance(orig_function, BuiltinFunctionType):
545+
def lower_aspect(
546+
orig_function, flag_added_args, *args, **kwargs
547+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
548+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
517549
if flag_added_args > 0:
518550
args = args[flag_added_args:]
519551
return orig_function(*args, **kwargs)
520552

521553
candidate_text = args[0]
522554
args = args[flag_added_args:]
523555
if not isinstance(candidate_text, TEXT_TYPES):
524-
if flag_added_args > 0:
525-
args = args[flag_added_args:]
526556
return candidate_text.lower(*args, **kwargs)
527557

528558
try:
@@ -532,8 +562,10 @@ def lower_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
532562
return candidate_text.lower(*args, **kwargs)
533563

534564

535-
def swapcase_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
536-
if not isinstance(orig_function, BuiltinFunctionType):
565+
def swapcase_aspect(
566+
orig_function, flag_added_args, *args, **kwargs
567+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
568+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
537569
if flag_added_args > 0:
538570
args = args[flag_added_args:]
539571
return orig_function(*args, **kwargs)
@@ -549,8 +581,10 @@ def swapcase_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (
549581
return candidate_text.swapcase(*args, **kwargs)
550582

551583

552-
def title_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
553-
if not isinstance(orig_function, BuiltinFunctionType):
584+
def title_aspect(
585+
orig_function, flag_added_args, *args, **kwargs
586+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
587+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
554588
if flag_added_args > 0:
555589
args = args[flag_added_args:]
556590
return orig_function(*args, **kwargs)
@@ -566,8 +600,10 @@ def title_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
566600
return candidate_text.title(*args, **kwargs)
567601

568602

569-
def capitalize_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
570-
if not isinstance(orig_function, BuiltinFunctionType):
603+
def capitalize_aspect(
604+
orig_function, flag_added_args, *args, **kwargs
605+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
606+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
571607
if flag_added_args > 0:
572608
args = args[flag_added_args:]
573609
return orig_function(*args, **kwargs)
@@ -584,16 +620,21 @@ def capitalize_aspect(orig_function, flag_added_args, *args, **kwargs): # type:
584620
return candidate_text.capitalize(*args, **kwargs)
585621

586622

587-
def casefold_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
588-
if not isinstance(orig_function, BuiltinFunctionType):
589-
if flag_added_args > 0:
590-
args = args[flag_added_args:]
591-
return orig_function(*args, **kwargs)
623+
def casefold_aspect(
624+
orig_function, flag_added_args, *args, **kwargs
625+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
626+
if orig_function:
627+
if not isinstance(orig_function, BuiltinFunctionType):
628+
if flag_added_args > 0:
629+
args = args[flag_added_args:]
630+
return orig_function(*args, **kwargs)
631+
else:
632+
orig_function = getattr(args[0], "casefold", None)
592633

593-
if orig_function.__qualname__ not in ("str.casefold", "bytes.casefold", "bytearray.casefold"):
634+
if orig_function and orig_function.__qualname__ not in ("str.casefold", "bytes.casefold", "bytearray.casefold"):
594635
if flag_added_args > 0:
595636
args = args[flag_added_args:]
596-
return orig_function(args, **kwargs)
637+
return orig_function(*args, **kwargs)
597638

598639
candidate_text = args[0]
599640
args = args[flag_added_args:]
@@ -608,8 +649,10 @@ def casefold_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (
608649
return candidate_text.casefold(*args, **kwargs) # type: ignore[union-attr]
609650

610651

611-
def translate_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
612-
if not isinstance(orig_function, BuiltinFunctionType):
652+
def translate_aspect(
653+
orig_function, flag_added_args, *args, **kwargs
654+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
655+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
613656
if flag_added_args > 0:
614657
args = args[flag_added_args:]
615658
return orig_function(*args, **kwargs)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Vulnerability Management for Code-level (IAST): This fix resolves an issue where, at AST patching to replace code with IAST aspects, passing the original function/method as an extra parameter for accurate patching unintentionally triggers side effects in methods obtained from an expression (like ``decode`` in ``file.read(n).decode()``), resulting in unexpected multiple calls to the expression (``file.read(n)`` in the example).
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
import tests.appsec.iast.fixtures.aspects.callees as callees_module
4+
5+
6+
class FakeStr(str):
7+
def __init__(self, *args, **kwargs):
8+
for i in [x for x in dir(callees_module) if not x.startswith(("_", "@"))]:
9+
setattr(self, i, getattr(callees_module, i))
10+
11+
def call_ljust(self, *args, **kwargs):
12+
return self.ljust(*args, **kwargs)
13+
14+
def call_join(self, *args, **kwargs):
15+
return self.join(*args, **kwargs)
16+
17+
def call_zfill(self, *args, **kwargs):
18+
return self.zfill(*args, **kwargs)
19+
20+
def call_format(self, *args, **kwargs):
21+
return self.format(*args, **kwargs)
22+
23+
def call_format_map(self, *args, **kwargs):
24+
return self.format_map(*args, **kwargs)
25+
26+
def call_repr(self, *args, **kwargs):
27+
return self.repr(*args, **kwargs)
28+
29+
def call_upper(self, *args, **kwargs):
30+
return self.upper(*args, **kwargs)
31+
32+
def call_lower(self, *args, **kwargs):
33+
return self.lower(*args, **kwargs)
34+
35+
def call_casefold(self, *args, **kwargs):
36+
return self.casefold(*args, **kwargs)
37+
38+
def call_capitalize(self, *args, **kwargs):
39+
return self.capitalize(*args, **kwargs)
40+
41+
def call_title(self, *args, **kwargs):
42+
return self.title(*args, **kwargs)
43+
44+
def call_swapcase(self, *args, **kwargs):
45+
return self.swapcase(*args, **kwargs)
46+
47+
def call_translate(self, *args, **kwargs):
48+
return self.translate(*args, **kwargs)
49+
50+
def call_extend(self, *args, **kwargs):
51+
return self.extend(*args, **kwargs)
52+
53+
def call_encode(self, *args, **kwargs):
54+
return self.encode(*args, **kwargs)
55+
56+
def call_decode(self, *args, **kwargs):
57+
return self.decode(*args, **kwargs)
58+
59+
def call_str(self, *args, **kwargs):
60+
return self.str(*args, **kwargs)
61+
62+
def call_bytes(self, *args, **kwargs):
63+
return self.bytes(*args, **kwargs)
64+
65+
def call_bytearray(self, *args, **kwargs):
66+
return self.bytearray(*args, **kwargs)

0 commit comments

Comments
 (0)