Skip to content

Commit ce565cd

Browse files
authored
[Bug #20653] Fix memory leak in String#start_with? when regexp times out (ruby#11255)
Fix memory leak in String#start_with? when regexp times out [Bug #20653] This commit refactors how Onigmo handles timeout. Instead of raising a timeout error, onig_search will return a ONIGERR_TIMEOUT which the caller can free memory, and then raise a timeout error. This fixes a memory leak in String#start_with when the regexp times out. For example: regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) str = "a" * 1000000 + "x" 10.times do 100.times do str.start_with?(regex) rescue end puts `ps -o rss= -p #{$$}` end Before: 33216 51936 71152 81728 97152 103248 120384 133392 133520 133616 After: 14912 15376 15824 15824 16128 16128 16144 16144 16160 16160
1 parent 6d74483 commit ce565cd

File tree

4 files changed

+32
-54
lines changed

4 files changed

+32
-54
lines changed

re.c

Lines changed: 15 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,10 +1671,16 @@ rb_reg_onig_match(VALUE re, VALUE str,
16711671
if (result < 0) {
16721672
onig_region_free(regs, 0);
16731673

1674-
if (result != ONIG_MISMATCH) {
1674+
switch (result) {
1675+
case ONIG_MISMATCH:
1676+
break;
1677+
case ONIGERR_TIMEOUT:
1678+
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
1679+
default: {
16751680
onig_errmsg_buffer err = "";
16761681
onig_error_code_to_str((UChar*)err, (int)result);
16771682
rb_reg_raise(err, re);
1683+
}
16781684
}
16791685
}
16801686

@@ -1735,23 +1741,6 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
17351741
ONIG_OPTION_NONE);
17361742
}
17371743

1738-
struct rb_reg_onig_match_args {
1739-
VALUE re;
1740-
VALUE str;
1741-
struct reg_onig_search_args args;
1742-
struct re_registers regs;
1743-
1744-
OnigPosition result;
1745-
};
1746-
1747-
static VALUE
1748-
rb_reg_onig_match_try(VALUE value_args)
1749-
{
1750-
struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
1751-
args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
1752-
return Qnil;
1753-
}
1754-
17551744
/* returns byte offset */
17561745
static long
17571746
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
@@ -1762,38 +1751,22 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
17621751
return -1;
17631752
}
17641753

1765-
struct rb_reg_onig_match_args args = {
1766-
.re = re,
1767-
.str = str,
1768-
.args = {
1769-
.pos = pos,
1770-
.range = reverse ? 0 : len,
1771-
},
1772-
.regs = {0}
1754+
struct reg_onig_search_args args = {
1755+
.pos = pos,
1756+
.range = reverse ? 0 : len,
17731757
};
1758+
struct re_registers regs = {0};
17741759

1775-
/* If there is a timeout set, then rb_reg_onig_match could raise a
1776-
* Regexp::TimeoutError so we want to protect it from leaking memory. */
1777-
if (rb_reg_match_time_limit) {
1778-
int state;
1779-
rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
1780-
if (state) {
1781-
onig_region_free(&args.regs, false);
1782-
rb_jump_tag(state);
1783-
}
1784-
}
1785-
else {
1786-
rb_reg_onig_match_try((VALUE)&args);
1787-
}
1760+
OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, &regs);
17881761

1789-
if (args.result == ONIG_MISMATCH) {
1762+
if (result == ONIG_MISMATCH) {
17901763
rb_backref_set(Qnil);
17911764
return ONIG_MISMATCH;
17921765
}
17931766

17941767
VALUE match = match_alloc(rb_cMatch);
17951768
rb_matchext_t *rm = RMATCH_EXT(match);
1796-
rm->regs = args.regs;
1769+
rm->regs = regs;
17971770

17981771
if (set_backref_str) {
17991772
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
@@ -1810,7 +1783,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
18101783
rb_backref_set(match);
18111784
if (set_match) *set_match = match;
18121785

1813-
return args.result;
1786+
return result;
18141787
}
18151788

18161789
long
@@ -4672,12 +4645,6 @@ rb_reg_timeout_p(regex_t *reg, void *end_time_)
46724645
return false;
46734646
}
46744647

4675-
void
4676-
rb_reg_raise_timeout(void)
4677-
{
4678-
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
4679-
}
4680-
46814648
/*
46824649
* call-seq:
46834650
* Regexp.timeout -> float or nil

regexec.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5575,8 +5575,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
55755575

55765576
timeout:
55775577
MATCH_ARG_FREE(msa);
5578-
onig_region_free(region, false);
5579-
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
5578+
return ONIGERR_TIMEOUT;
55805579
}
55815580

55825581
extern OnigPosition

regint.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,6 @@
163163
rb_thread_check_ints(); \
164164
} \
165165
} while(0)
166-
# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \
167-
rb_reg_raise_timeout(); \
168-
} while (0)
169166
# define onig_st_init_table st_init_table
170167
# define onig_st_init_table_with_size st_init_table_with_size
171168
# define onig_st_init_numtable st_init_numtable
@@ -1002,7 +999,6 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
1002999
extern size_t onig_memsize(const regex_t *reg);
10031000
extern size_t onig_region_memsize(const struct re_registers *regs);
10041001
bool rb_reg_timeout_p(regex_t *reg, void *end_time);
1005-
NORETURN(void rb_reg_raise_timeout(void));
10061002
#endif
10071003

10081004
RUBY_SYMBOL_EXPORT_END

test/ruby/test_string.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,6 +2010,22 @@ def test_start_with_regexp
20102010
assert_nil($&)
20112011
end
20122012

2013+
def test_start_with_timeout_memory_leak
2014+
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20653]", rss: true)
2015+
regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
2016+
str = "a" * 1_000_000 + "x"
2017+
2018+
code = proc do
2019+
str.start_with?(regex)
2020+
rescue
2021+
end
2022+
2023+
10.times(&code)
2024+
begin;
2025+
1_000.times(&code)
2026+
end;
2027+
end
2028+
20132029
def test_strip
20142030
assert_equal(S("x"), S(" x ").strip)
20152031
assert_equal(S("x"), S(" \n\r\t x \t\r\n\n ").strip)

0 commit comments

Comments
 (0)