Skip to content

Commit 1247477

Browse files
authored
fix: Escape regex symbols in all versions of like operator (#25)
1 parent c296882 commit 1247477

File tree

1 file changed

+43
-60
lines changed

1 file changed

+43
-60
lines changed

arrow/src/compute/kernels/comparison.rs

Lines changed: 43 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,47 @@ pub fn ilike_utf8<OffsetSize: StringOffsetSizeTrait>(
237237
like_utf8_impl(left, right, false, false)
238238
}
239239

240+
fn like_to_regex(pat: &str) -> Result<String> {
241+
let mut is_escaped = false;
242+
let mut re_pattern = String::new();
243+
let regex_chars = "-[]{}()*+?.,^$|#";
244+
for c in pat.chars() {
245+
if is_escaped {
246+
is_escaped = false;
247+
if c == '%' {
248+
re_pattern.push('%');
249+
continue;
250+
} else if c == '_' {
251+
re_pattern.push('_');
252+
continue;
253+
} else if c == '\\' {
254+
re_pattern.push_str("\\\\");
255+
continue;
256+
}
257+
}
258+
259+
if regex_chars.find(c).is_some() {
260+
re_pattern.push('\\');
261+
re_pattern.push(c);
262+
} else if c == '%' {
263+
re_pattern.push_str(".*");
264+
} else if c == '_' {
265+
re_pattern.push('.');
266+
} else if c == '\\' {
267+
is_escaped = true;
268+
} else {
269+
re_pattern.push(c);
270+
}
271+
}
272+
if is_escaped {
273+
return Err(ArrowError::InvalidArgumentError(format!(
274+
"LIKE pattern must not end with escape character. Pattern {}",
275+
pat
276+
)));
277+
}
278+
Ok(re_pattern)
279+
}
280+
240281
fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
241282
left: &GenericStringArray<OffsetSize>,
242283
right: &GenericStringArray<OffsetSize>,
@@ -261,43 +302,7 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
261302
let re = if let Some(ref regex) = map.get(pat) {
262303
regex
263304
} else {
264-
let mut is_escaped = false;
265-
let mut re_pattern = String::new();
266-
let regex_chars = "-[]{}()*+?.,^$|#";
267-
for c in pat.chars() {
268-
if is_escaped {
269-
is_escaped = false;
270-
if c == '%' {
271-
re_pattern.push('%');
272-
continue;
273-
} else if c == '_' {
274-
re_pattern.push('_');
275-
continue;
276-
} else if c == '\\' {
277-
re_pattern.push_str("\\\\");
278-
continue;
279-
}
280-
}
281-
282-
if regex_chars.find(c).is_some() {
283-
re_pattern.push('\\');
284-
re_pattern.push(c);
285-
} else if c == '%' {
286-
re_pattern.push_str(".*");
287-
} else if c == '_' {
288-
re_pattern.push('.');
289-
} else if c == '\\' {
290-
is_escaped = true;
291-
} else {
292-
re_pattern.push(c);
293-
}
294-
}
295-
if is_escaped {
296-
return Err(ArrowError::InvalidArgumentError(format!(
297-
"LIKE pattern must not end with escape character. Pattern {}",
298-
pat
299-
)));
300-
}
305+
let re_pattern = like_to_regex(pat)?;
301306
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
302307
.case_insensitive(!case_sensitive)
303308
.build()
@@ -406,29 +411,7 @@ fn like_utf8_scalar_impl<OffsetSize: StringOffsetSizeTrait>(
406411
}
407412
}
408413
} else {
409-
let mut prev_char = None;
410-
let mut re_pattern = right
411-
.replace(
412-
|c| {
413-
let res = c == '%' && prev_char != Some('\\');
414-
prev_char = Some(c);
415-
res
416-
},
417-
".*",
418-
)
419-
.replace("\\%", "%");
420-
421-
let mut prev_char = None;
422-
re_pattern = re_pattern
423-
.replace(
424-
|c| {
425-
let res = c == '_' && prev_char != Some('\\');
426-
prev_char = Some(c);
427-
res
428-
},
429-
".",
430-
)
431-
.replace("\\_", "_");
414+
let re_pattern = like_to_regex(right)?;
432415
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
433416
.case_insensitive(!case_sensitive)
434417
.build()

0 commit comments

Comments
 (0)