Skip to content

Commit c296882

Browse files
authored
fix: Escape regex symbols in like operator (#22)
1 parent 93c26ef commit c296882

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

arrow/src/compute/kernels/comparison.rs

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -261,29 +261,43 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
261261
let re = if let Some(ref regex) = map.get(pat) {
262262
regex
263263
} else {
264-
let mut prev_char = None;
265-
let mut re_pattern = pat
266-
.replace(
267-
|c| {
268-
let res = c == '%' && prev_char != Some('\\');
269-
prev_char = Some(c);
270-
res
271-
},
272-
".*",
273-
)
274-
.replace("\\%", "%");
275-
276-
let mut prev_char = None;
277-
re_pattern = re_pattern
278-
.replace(
279-
|c| {
280-
let res = c == '_' && prev_char != Some('\\');
281-
prev_char = Some(c);
282-
res
283-
},
284-
".",
285-
)
286-
.replace("\\_", "_");
264+
let mut is_escaped = false;
265+
let mut re_pattern = String::new();
266+
let regex_chars = "-[]{}()*+?.,^$|#";
267+
for c in pat.chars() {
268+
if is_escaped {
269+
is_escaped = false;
270+
if c == '%' {
271+
re_pattern.push('%');
272+
continue;
273+
} else if c == '_' {
274+
re_pattern.push('_');
275+
continue;
276+
} else if c == '\\' {
277+
re_pattern.push_str("\\\\");
278+
continue;
279+
}
280+
}
281+
282+
if regex_chars.find(c).is_some() {
283+
re_pattern.push('\\');
284+
re_pattern.push(c);
285+
} else if c == '%' {
286+
re_pattern.push_str(".*");
287+
} else if c == '_' {
288+
re_pattern.push('.');
289+
} else if c == '\\' {
290+
is_escaped = true;
291+
} else {
292+
re_pattern.push(c);
293+
}
294+
}
295+
if is_escaped {
296+
return Err(ArrowError::InvalidArgumentError(format!(
297+
"LIKE pattern must not end with escape character. Pattern {}",
298+
pat
299+
)));
300+
}
287301
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
288302
.case_insensitive(!case_sensitive)
289303
.build()

0 commit comments

Comments
 (0)