Skip to content

Commit 7cb7db6

Browse files
gnufedeavara1986
andauthored
fix(iast): ast patching side effect (#7638) [backport 1.20] (#7655)
IAST: This fix resolves an issue where, at AST patching to replace code with IAST aspects, the original function/method was passed as an extra parameter for accurate patching (i.e. check the function inside the aspect), and this strategy lead to unintentionally triggers side effects in methods obtained from an expression (like ``decode`` in ``file.read(n).decode()``), resulting in unexpected multiple calls to the expression (``file.read(n)`` in the example). - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library. - [ ] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [ ] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [ ] This PR doesn't touch any of that. --------- Co-authored-by: Alberto Vara <[email protected]> (cherry picked from commit 601fd6a)
1 parent 0f0e0c2 commit 7cb7db6

File tree

9 files changed

+374
-61
lines changed

9 files changed

+374
-61
lines changed

ddtrace/appsec/iast/_ast/visitor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,9 @@ def visit_Call(self, call_node): # type: (ast.Call) -> Any
393393
# Send 1 as flag_added_args value
394394
call_node.args.insert(0, self._int_constant(call_node, 1))
395395

396-
# Insert original method as first parameter (a.b.c.method)
397-
call_node.args = self._add_original_function_as_arg(call_node, False)
396+
# Insert None as first parameter instead of a.b.c.method
397+
# to avoid unexpected side effects such as a.b.read(4).method
398+
call_node.args.insert(0, self._none_constant(call_node))
398399

399400
# Create a new Name node for the replacement and set it as node.func
400401
call_node.func = self._attr_node(call_node, aspect)

ddtrace/appsec/iast/_taint_tracking/aspects.py

Lines changed: 100 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ def add_aspect(op1, op2):
5151

5252

5353
def str_aspect(orig_function, flag_added_args, *args, **kwargs):
54-
# type: (Callable, int, Any, Any) -> str
55-
if orig_function != builtin_str:
56-
if flag_added_args > 0:
57-
args = args[flag_added_args:]
58-
return orig_function(*args, **kwargs)
59-
60-
result = builtin_str(*args, **kwargs)
54+
# type: (Optional[Callable], int, Any, Any) -> str
55+
if orig_function:
56+
if orig_function != builtin_str:
57+
if flag_added_args > 0:
58+
args = args[flag_added_args:]
59+
return orig_function(*args, **kwargs)
60+
result = builtin_str(*args, **kwargs)
61+
else:
62+
result = args[0].str(*args[1:], **kwargs)
6163

6264
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
6365
try:
@@ -75,13 +77,16 @@ def str_aspect(orig_function, flag_added_args, *args, **kwargs):
7577

7678

7779
def bytes_aspect(orig_function, flag_added_args, *args, **kwargs):
78-
# type: (Callable, int, Any, Any) -> bytes
79-
if orig_function != builtin_bytes:
80-
if flag_added_args > 0:
81-
args = args[flag_added_args:]
82-
return orig_function(*args, **kwargs)
80+
# type: (Optional[Callable], int, Any, Any) -> bytes
81+
if orig_function:
82+
if orig_function != builtin_bytes:
83+
if flag_added_args > 0:
84+
args = args[flag_added_args:]
85+
return orig_function(*args, **kwargs)
86+
result = builtin_bytes(*args, **kwargs)
87+
else:
88+
result = args[0].bytes(*args[1:], **kwargs)
8389

84-
result = builtin_bytes(*args, **kwargs)
8590
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
8691
try:
8792
taint_pyobject_with_ranges(result, tuple(get_ranges(args[0])))
@@ -91,13 +96,16 @@ def bytes_aspect(orig_function, flag_added_args, *args, **kwargs):
9196

9297

9398
def bytearray_aspect(orig_function, flag_added_args, *args, **kwargs):
94-
# type: (Callable, int, Any, Any) -> bytearray
95-
if orig_function != builtin_bytearray:
96-
if flag_added_args > 0:
97-
args = args[flag_added_args:]
98-
return orig_function(*args, **kwargs)
99+
# type: (Optional[Callable], int, Any, Any) -> bytearray
100+
if orig_function:
101+
if orig_function != builtin_bytearray:
102+
if flag_added_args > 0:
103+
args = args[flag_added_args:]
104+
return orig_function(*args, **kwargs)
105+
result = builtin_bytearray(*args, **kwargs)
106+
else:
107+
result = args[0].bytearray(*args[1:], **kwargs)
99108

100-
result = builtin_bytearray(*args, **kwargs)
101109
if isinstance(args[0], TEXT_TYPES) and is_pyobject_tainted(args[0]):
102110
try:
103111
taint_pyobject_with_ranges(result, tuple(get_ranges(args[0])))
@@ -107,7 +115,9 @@ def bytearray_aspect(orig_function, flag_added_args, *args, **kwargs):
107115

108116

109117
def join_aspect(orig_function, flag_added_args, *args, **kwargs):
110-
# type: (Callable, int, Any, Any) -> Any
118+
# type: (Optional[Callable], int, Any, Any) -> Any
119+
if not orig_function:
120+
orig_function = args[0].join
111121
if not isinstance(orig_function, BuiltinFunctionType):
112122
if flag_added_args > 0:
113123
args = args[flag_added_args:]
@@ -125,16 +135,22 @@ def join_aspect(orig_function, flag_added_args, *args, **kwargs):
125135

126136

127137
def bytearray_extend_aspect(orig_function, flag_added_args, *args, **kwargs):
128-
# type: (Callable, int, Any, Any) -> Any
129-
if not isinstance(orig_function, BuiltinFunctionType):
138+
# type: (Optional[Callable], int, Any, Any) -> Any
139+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
130140
if flag_added_args > 0:
131141
args = args[flag_added_args:]
132142
return orig_function(*args, **kwargs)
133143

144+
if len(args) < 2:
145+
# If we're not receiving at least 2 arguments, means the call was
146+
# ``x.extend()`` and not ``x.extend(y)``
147+
# so either not the extend we're looking for, or no changes in taint ranges.
148+
return args[0].extend(*args[1:], **kwargs)
149+
134150
op1 = args[0]
135151
op2 = args[1]
136152
if not isinstance(op1, bytearray) or not isinstance(op2, (bytearray, bytes)):
137-
return op1.extend(op2)
153+
return op1.extend(*args[1:], **kwargs)
138154
try:
139155
return _extend_aspect(op1, op2)
140156
except Exception as e:
@@ -184,7 +200,9 @@ def build_string_aspect(*args): # type: (List[Any]) -> str
184200

185201

186202
def ljust_aspect(orig_function, flag_added_args, *args, **kwargs):
187-
# type: (Callable, int, Any, Any) -> Union[str, bytes, bytearray]
203+
# type: (Optional[Callable], int, Any, Any) -> Union[str, bytes, bytearray]
204+
if not orig_function:
205+
orig_function = args[0].ljust
188206
if not isinstance(orig_function, BuiltinFunctionType):
189207
if flag_added_args > 0:
190208
args = args[flag_added_args:]
@@ -219,8 +237,8 @@ def ljust_aspect(orig_function, flag_added_args, *args, **kwargs):
219237

220238

221239
def zfill_aspect(orig_function, flag_added_args, *args, **kwargs):
222-
# type: (Callable, int, Any, Any) -> Any
223-
if not isinstance(orig_function, BuiltinFunctionType):
240+
# type: (Optional[Callable], int, Any, Any) -> Any
241+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
224242
if flag_added_args > 0:
225243
args = args[flag_added_args:]
226244
return orig_function(*args, **kwargs)
@@ -261,11 +279,14 @@ def zfill_aspect(orig_function, flag_added_args, *args, **kwargs):
261279

262280

263281
def format_aspect(
264-
orig_function, # type: Callable
282+
orig_function, # type: Optional[Callable]
265283
flag_added_args, # type: int
266284
*args, # type: Any
267285
**kwargs # type: Dict[str, Any]
268286
): # type: (...) -> str
287+
if not orig_function:
288+
orig_function = args[0].format
289+
269290
if not isinstance(orig_function, BuiltinFunctionType):
270291
if flag_added_args > 0:
271292
args = args[flag_added_args:]
@@ -312,8 +333,10 @@ def format_aspect(
312333
return result
313334

314335

315-
def format_map_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> str
316-
if not isinstance(orig_function, BuiltinFunctionType):
336+
def format_map_aspect(
337+
orig_function, flag_added_args, *args, **kwargs
338+
): # type: (Optional[Callable], int, Any, Any) -> str
339+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
317340
if flag_added_args > 0:
318341
args = args[flag_added_args:]
319342
return orig_function(*args, **kwargs)
@@ -460,12 +483,14 @@ def incremental_translation(self, incr_coder, funcode, empty):
460483

461484

462485
def decode_aspect(orig_function, flag_added_args, *args, **kwargs):
463-
if flag_added_args > 0:
464-
self = args[0]
465-
args = args[flag_added_args:]
466-
else:
486+
if orig_function and not flag_added_args:
487+
# This patch is unexpected, so we fallback
488+
# to executing the original function
467489
return orig_function(*args, **kwargs)
468490

491+
self = args[0]
492+
args = args[(flag_added_args or 1) :]
493+
# Assume we call decode method of the first argument
469494
result = self.decode(*args, **kwargs)
470495

471496
if not is_pyobject_tainted(self) or not isinstance(self, bytes):
@@ -481,12 +506,14 @@ def decode_aspect(orig_function, flag_added_args, *args, **kwargs):
481506

482507

483508
def encode_aspect(orig_function, flag_added_args, *args, **kwargs):
484-
if flag_added_args > 0:
485-
self = args[0]
486-
args = args[flag_added_args:]
487-
else:
509+
if orig_function and not flag_added_args:
510+
# This patch is unexpected, so we fallback
511+
# to executing the original function
488512
return orig_function(*args, **kwargs)
489513

514+
self = args[0]
515+
args = args[(flag_added_args or 1) :]
516+
# Assume we call encode method of the first argument
490517
result = self.encode(*args, **kwargs)
491518

492519
if not is_pyobject_tainted(self) or not isinstance(self, str):
@@ -501,8 +528,10 @@ def encode_aspect(orig_function, flag_added_args, *args, **kwargs):
501528
return result
502529

503530

504-
def upper_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
505-
if not isinstance(orig_function, BuiltinFunctionType):
531+
def upper_aspect(
532+
orig_function, flag_added_args, *args, **kwargs
533+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
534+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
506535
if flag_added_args > 0:
507536
args = args[flag_added_args:]
508537
return orig_function(*args, **kwargs)
@@ -519,17 +548,17 @@ def upper_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
519548
return candidate_text.upper(*args, **kwargs)
520549

521550

522-
def lower_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
523-
if not isinstance(orig_function, BuiltinFunctionType):
551+
def lower_aspect(
552+
orig_function, flag_added_args, *args, **kwargs
553+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
554+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
524555
if flag_added_args > 0:
525556
args = args[flag_added_args:]
526557
return orig_function(*args, **kwargs)
527558

528559
candidate_text = args[0]
529560
args = args[flag_added_args:]
530561
if not isinstance(candidate_text, TEXT_TYPES):
531-
if flag_added_args > 0:
532-
args = args[flag_added_args:]
533562
return candidate_text.lower(*args, **kwargs)
534563

535564
try:
@@ -539,8 +568,10 @@ def lower_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
539568
return candidate_text.lower(*args, **kwargs)
540569

541570

542-
def swapcase_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
543-
if not isinstance(orig_function, BuiltinFunctionType):
571+
def swapcase_aspect(
572+
orig_function, flag_added_args, *args, **kwargs
573+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
574+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
544575
if flag_added_args > 0:
545576
args = args[flag_added_args:]
546577
return orig_function(*args, **kwargs)
@@ -556,8 +587,10 @@ def swapcase_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (
556587
return candidate_text.swapcase(*args, **kwargs)
557588

558589

559-
def title_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
560-
if not isinstance(orig_function, BuiltinFunctionType):
590+
def title_aspect(
591+
orig_function, flag_added_args, *args, **kwargs
592+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
593+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
561594
if flag_added_args > 0:
562595
args = args[flag_added_args:]
563596
return orig_function(*args, **kwargs)
@@ -573,8 +606,10 @@ def title_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Cal
573606
return candidate_text.title(*args, **kwargs)
574607

575608

576-
def capitalize_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
577-
if not isinstance(orig_function, BuiltinFunctionType):
609+
def capitalize_aspect(
610+
orig_function, flag_added_args, *args, **kwargs
611+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
612+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
578613
if flag_added_args > 0:
579614
args = args[flag_added_args:]
580615
return orig_function(*args, **kwargs)
@@ -591,16 +626,21 @@ def capitalize_aspect(orig_function, flag_added_args, *args, **kwargs): # type:
591626
return candidate_text.capitalize(*args, **kwargs)
592627

593628

594-
def casefold_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
595-
if not isinstance(orig_function, BuiltinFunctionType):
596-
if flag_added_args > 0:
597-
args = args[flag_added_args:]
598-
return orig_function(*args, **kwargs)
629+
def casefold_aspect(
630+
orig_function, flag_added_args, *args, **kwargs
631+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
632+
if orig_function:
633+
if not isinstance(orig_function, BuiltinFunctionType):
634+
if flag_added_args > 0:
635+
args = args[flag_added_args:]
636+
return orig_function(*args, **kwargs)
637+
else:
638+
orig_function = getattr(args[0], "casefold", None)
599639

600-
if orig_function.__qualname__ not in ("str.casefold", "bytes.casefold", "bytearray.casefold"):
640+
if orig_function and orig_function.__qualname__ not in ("str.casefold", "bytes.casefold", "bytearray.casefold"):
601641
if flag_added_args > 0:
602642
args = args[flag_added_args:]
603-
return orig_function(args, **kwargs)
643+
return orig_function(*args, **kwargs)
604644

605645
candidate_text = args[0]
606646
args = args[flag_added_args:]
@@ -615,8 +655,10 @@ def casefold_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (
615655
return candidate_text.casefold(*args, **kwargs) # type: ignore[union-attr]
616656

617657

618-
def translate_aspect(orig_function, flag_added_args, *args, **kwargs): # type: (Callable, int, Any, Any) -> TEXT_TYPE
619-
if not isinstance(orig_function, BuiltinFunctionType):
658+
def translate_aspect(
659+
orig_function, flag_added_args, *args, **kwargs
660+
): # type: (Optional[Callable], int, Any, Any) -> TEXT_TYPE
661+
if orig_function and not isinstance(orig_function, BuiltinFunctionType):
620662
if flag_added_args > 0:
621663
args = args[flag_added_args:]
622664
return orig_function(*args, **kwargs)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Vulnerability Management for Code-level (IAST): This fix resolves an issue where, at AST patching to replace code with IAST aspects, passing the original function/method as an extra parameter for accurate patching unintentionally triggers side effects in methods obtained from an expression (like ``decode`` in ``file.read(n).decode()``), resulting in unexpected multiple calls to the expression (``file.read(n)`` in the example).
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
import tests.appsec.iast.fixtures.aspects.callees as callees_module
4+
5+
6+
class FakeStr(str):
7+
def __init__(self, *args, **kwargs):
8+
for i in [x for x in dir(callees_module) if not x.startswith(("_", "@"))]:
9+
setattr(self, i, getattr(callees_module, i))
10+
11+
def call_ljust(self, *args, **kwargs):
12+
return self.ljust(*args, **kwargs)
13+
14+
def call_join(self, *args, **kwargs):
15+
return self.join(*args, **kwargs)
16+
17+
def call_zfill(self, *args, **kwargs):
18+
return self.zfill(*args, **kwargs)
19+
20+
def call_format(self, *args, **kwargs):
21+
return self.format(*args, **kwargs)
22+
23+
def call_format_map(self, *args, **kwargs):
24+
return self.format_map(*args, **kwargs)
25+
26+
def call_repr(self, *args, **kwargs):
27+
return self.repr(*args, **kwargs)
28+
29+
def call_upper(self, *args, **kwargs):
30+
return self.upper(*args, **kwargs)
31+
32+
def call_lower(self, *args, **kwargs):
33+
return self.lower(*args, **kwargs)
34+
35+
def call_casefold(self, *args, **kwargs):
36+
return self.casefold(*args, **kwargs)
37+
38+
def call_capitalize(self, *args, **kwargs):
39+
return self.capitalize(*args, **kwargs)
40+
41+
def call_title(self, *args, **kwargs):
42+
return self.title(*args, **kwargs)
43+
44+
def call_swapcase(self, *args, **kwargs):
45+
return self.swapcase(*args, **kwargs)
46+
47+
def call_translate(self, *args, **kwargs):
48+
return self.translate(*args, **kwargs)
49+
50+
def call_extend(self, *args, **kwargs):
51+
return self.extend(*args, **kwargs)
52+
53+
def call_encode(self, *args, **kwargs):
54+
return self.encode(*args, **kwargs)
55+
56+
def call_decode(self, *args, **kwargs):
57+
return self.decode(*args, **kwargs)
58+
59+
def call_str(self, *args, **kwargs):
60+
return self.str(*args, **kwargs)
61+
62+
def call_bytes(self, *args, **kwargs):
63+
return self.bytes(*args, **kwargs)
64+
65+
def call_bytearray(self, *args, **kwargs):
66+
return self.bytearray(*args, **kwargs)

0 commit comments

Comments
 (0)