Skip to content

Commit bf7c7ef

Browse files
authored
Add memory pre-checks to padding, replace and f-string operations (#238)
1 parent 4ee3fd7 commit bf7c7ef

File tree

5 files changed

+288
-6
lines changed

5 files changed

+288
-6
lines changed

crates/monty/src/bytecode/vm/format.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
defer_drop,
66
exception_private::{ExcType, RunError, SimpleException},
77
fstring::{ParsedFormatSpec, ascii_escape, decode_format_spec, format_string, format_with_spec},
8-
resource::ResourceTracker,
8+
resource::{ResourceTracker, check_repeat_size},
99
types::{PyTrait, str::allocate_string},
1010
value::Value,
1111
};
@@ -61,6 +61,10 @@ impl<T: ResourceTracker> VM<'_, '_, T> {
6161

6262
let spec = this.get_format_spec(spec_value, value)?;
6363

64+
// Pre-check: reject format specs with huge width before pad_string
65+
// allocates an untracked Rust String.
66+
check_repeat_size(spec.width, spec.fill.len_utf8(), this.heap.tracker())?;
67+
6468
match conversion {
6569
// No conversion - format original value
6670
0 => format_with_spec(value, &spec, this.heap, this.interns)?,

crates/monty/src/resource.rs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ use crate::{
1717
/// the allocation check can catch them.
1818
pub const LARGE_RESULT_THRESHOLD: usize = 100_000;
1919

20-
/// Pre-checks that a sequence repeat won't exceed resource limits before allocating.
20+
/// Pre-checks that an operation producing `item_len * count` bytes won't exceed resource limits.
2121
///
22-
/// This prevents DoS via expressions like `'x' * 999_999_999` or `b'ab' * huge_int`
23-
/// by estimating the result size and checking against the resource tracker.
22+
/// Used for sequence repeats (`'x' * 999_999_999`), padding operations
23+
/// (`str.ljust`, `str.center`, `str.zfill`, etc.), and any other operation
24+
/// where the result size is a simple product of two known values.
2425
pub fn check_repeat_size(item_len: usize, count: usize, tracker: &impl ResourceTracker) -> Result<(), ResourceError> {
2526
check_estimated_size(item_len.saturating_mul(count), tracker)
2627
}
@@ -77,6 +78,41 @@ pub fn check_div_size(dividend_bits: u64, tracker: &impl ResourceTracker) -> Res
7778
check_estimated_size(estimate_bits_to_bytes(dividend_bits), tracker)
7879
}
7980

81+
/// Pre-checks that a string/bytes replace won't exceed resource limits before allocating.
82+
///
83+
/// This prevents DoS via expressions like `('a' * 1000).replace('a', 'b' * 10_000_000)`
84+
/// where a small tracked input is amplified into a huge untracked Rust `String`/`Vec`
85+
/// by `String::replace()` before `allocate_string()` can check the result.
86+
///
87+
/// The upper bound on result size is: if `old` is non-empty, at most `input_len / old_len`
88+
/// replacements can occur, each producing `new_len` bytes instead of `old_len`. When `count`
89+
/// is specified, replacements are capped to that value.
90+
pub fn check_replace_size(
91+
input_len: usize,
92+
old_len: usize,
93+
new_len: usize,
94+
count: i64,
95+
tracker: &impl ResourceTracker,
96+
) -> Result<(), ResourceError> {
97+
// Empty pattern (old_len == 0): inserts before each element + after the last = input_len + 1
98+
let max_replacements = input_len
99+
.checked_div(old_len)
100+
.unwrap_or_else(|| input_len.saturating_add(1));
101+
102+
let replacements = if count < 0 {
103+
max_replacements
104+
} else {
105+
max_replacements.min(usize::try_from(count).unwrap_or(usize::MAX))
106+
};
107+
108+
// Result = input_len - (replacements * old_len) + (replacements * new_len)
109+
let removed = replacements.saturating_mul(old_len);
110+
let added = replacements.saturating_mul(new_len);
111+
let estimated = input_len.saturating_sub(removed).saturating_add(added);
112+
113+
check_estimated_size(estimated, tracker)
114+
}
115+
80116
/// Checks an estimated result size against the resource tracker.
81117
///
82118
/// Only calls the tracker when the estimate exceeds `LARGE_RESULT_THRESHOLD`

crates/monty/src/types/bytes.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ use crate::{
7979
exception_private::{ExcType, RunResult, SimpleException},
8080
heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId},
8181
intern::{Interns, StaticStrings, StringId},
82-
resource::{ResourceError, ResourceTracker},
82+
resource::{ResourceError, ResourceTracker, check_repeat_size, check_replace_size},
8383
types::List,
8484
value::{EitherStr, Value},
8585
};
@@ -1774,6 +1774,8 @@ fn bytes_replace(
17741774
) -> RunResult<Value> {
17751775
let (old, new, count) = parse_bytes_replace_args("bytes.replace", args, heap, interns)?;
17761776

1777+
check_replace_size(bytes.len(), old.len(), new.len(), count, heap.tracker())?;
1778+
17771779
let result = if count < 0 {
17781780
bytes_replace_all(bytes, &old, &new, heap)?
17791781
} else {
@@ -1949,6 +1951,7 @@ fn bytes_center(
19491951
let result = if width <= len {
19501952
bytes.to_vec()
19511953
} else {
1954+
check_repeat_size(width, 1, heap.tracker())?;
19521955
let total_pad = width - len;
19531956
let left_pad = total_pad / 2;
19541957
let right_pad = total_pad - left_pad;
@@ -1981,6 +1984,7 @@ fn bytes_ljust(
19811984
let result = if width <= len {
19821985
bytes.to_vec()
19831986
} else {
1987+
check_repeat_size(width, 1, heap.tracker())?;
19841988
let pad = width - len;
19851989
let mut result = Vec::with_capacity(width);
19861990
result.extend_from_slice(bytes);
@@ -2008,6 +2012,7 @@ fn bytes_rjust(
20082012
let result = if width <= len {
20092013
bytes.to_vec()
20102014
} else {
2015+
check_repeat_size(width, 1, heap.tracker())?;
20112016
let pad = width - len;
20122017
let mut result = Vec::with_capacity(width);
20132018
for _ in 0..pad {
@@ -2076,6 +2081,7 @@ fn bytes_zfill(bytes: &[u8], args: ArgValues, heap: &mut Heap<impl ResourceTrack
20762081
let result = if width <= len {
20772082
bytes.to_vec()
20782083
} else {
2084+
check_repeat_size(width, 1, heap.tracker())?;
20792085
let pad = width - len;
20802086
let mut result = Vec::with_capacity(width);
20812087

crates/monty/src/types/str.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::{
1616
exception_private::{ExcType, RunResult},
1717
heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId},
1818
intern::{Interns, StaticStrings, StringId},
19-
resource::{ResourceError, ResourceTracker},
19+
resource::{ResourceError, ResourceTracker, check_repeat_size, check_replace_size},
2020
types::Type,
2121
value::{EitherStr, Value},
2222
};
@@ -1728,6 +1728,8 @@ fn str_rpartition(
17281728
fn str_replace(s: &str, args: ArgValues, heap: &mut Heap<impl ResourceTracker>, interns: &Interns) -> RunResult<Value> {
17291729
let (old, new, count) = parse_replace_args("str.replace", args, heap, interns)?;
17301730

1731+
check_replace_size(s.len(), old.len(), new.len(), count, heap.tracker())?;
1732+
17311733
let result = if count < 0 {
17321734
s.replace(&old, &new)
17331735
} else {
@@ -1820,6 +1822,7 @@ fn str_center(s: &str, args: ArgValues, heap: &mut Heap<impl ResourceTracker>, i
18201822
let result = if width <= len {
18211823
s.to_owned()
18221824
} else {
1825+
check_repeat_size(width, fillchar.len_utf8(), heap.tracker())?;
18231826
let total_pad = width - len;
18241827
let left_pad = total_pad / 2;
18251828
let right_pad = total_pad - left_pad;
@@ -1847,6 +1850,7 @@ fn str_ljust(s: &str, args: ArgValues, heap: &mut Heap<impl ResourceTracker>, in
18471850
let result = if width <= len {
18481851
s.to_owned()
18491852
} else {
1853+
check_repeat_size(width, fillchar.len_utf8(), heap.tracker())?;
18501854
let pad = width - len;
18511855
let mut result = String::with_capacity(width);
18521856
result.push_str(s);
@@ -1869,6 +1873,7 @@ fn str_rjust(s: &str, args: ArgValues, heap: &mut Heap<impl ResourceTracker>, in
18691873
let result = if width <= len {
18701874
s.to_owned()
18711875
} else {
1876+
check_repeat_size(width, fillchar.len_utf8(), heap.tracker())?;
18721877
let pad = width - len;
18731878
let mut result = String::with_capacity(width);
18741879
for _ in 0..pad {
@@ -1936,6 +1941,8 @@ fn str_zfill(s: &str, args: ArgValues, heap: &mut Heap<impl ResourceTracker>) ->
19361941
let result = if width <= len {
19371942
s.to_owned()
19381943
} else {
1944+
// zfill always pads with ASCII '0' (1 byte)
1945+
check_repeat_size(width, 1, heap.tracker())?;
19391946
let pad = width - len;
19401947
let mut chars = s.chars();
19411948
let first = chars.next();

0 commit comments

Comments
 (0)