Skip to content

Commit 021d07b

Browse files
Kixunilmxinden
andauthored
Replace regex with memchr (#385)
`regex` was used only in four trivial cases that could be implemented more simply, either naively or using memchr, without losing performance. As such the dependency needlessly increases build time, size of binary and attack surface. This change replaces`regex` with `naive`/`memchr` implementations. Signed-off-by: Martin Habovstiak <[email protected]> Co-authored-by: Max Inden <[email protected]>
1 parent e1b197b commit 021d07b

File tree

3 files changed

+39
-31
lines changed

3 files changed

+39
-31
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ lazy_static = "^1.4"
3131
libc = { version = "^0.2", optional = true }
3232
parking_lot = "^0.11"
3333
protobuf = { version = "^2.0", optional = true }
34-
regex = "^1.3"
34+
memchr = "^2.3"
3535
reqwest = { version = "^0.11", features = ["blocking"], optional = true }
3636
thiserror = "^1.0"
3737

src/desc.rs

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,44 @@ use std::collections::{BTreeSet, HashMap};
55
use std::hash::Hasher;
66

77
use fnv::FnvHasher;
8-
use regex::Regex;
98

109
use crate::errors::{Error, Result};
1110
use crate::metrics::SEPARATOR_BYTE;
1211
use crate::proto::LabelPair;
1312

14-
// Details of required format are at
15-
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
16-
fn is_valid_metric_name(name: &str) -> bool {
17-
lazy_static! {
18-
static ref VALIDATOR: Regex =
19-
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
20-
}
13+
// [a-zA-Z_]
14+
fn matches_charset_without_colon(c: char) -> bool {
15+
c.is_ascii_alphabetic() || c == '_'
16+
}
2117

22-
VALIDATOR.is_match(name)
18+
// [a-zA-Z_:]
19+
fn matches_charset_with_colon(c: char) -> bool {
20+
matches_charset_without_colon(c) || c == ':'
2321
}
2422

25-
fn is_valid_label_name(name: &str) -> bool {
26-
lazy_static! {
27-
static ref VALIDATOR: Regex =
28-
Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
29-
}
23+
// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator
24+
fn is_valid_ident<F: FnMut(char) -> bool>(input: &str, mut charset_validator: F) -> bool {
25+
let mut chars = input.chars();
26+
let zeroth = chars.next();
27+
zeroth
28+
.and_then(|zeroth| {
29+
if charset_validator(zeroth) {
30+
Some(chars.all(|c| charset_validator(c) || c.is_digit(10)))
31+
} else {
32+
None
33+
}
34+
})
35+
.unwrap_or(false)
36+
}
37+
38+
// Details of required format are at
39+
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
40+
pub(super) fn is_valid_metric_name(name: &str) -> bool {
41+
is_valid_ident(name, matches_charset_with_colon)
42+
}
3043

31-
VALIDATOR.is_match(name)
44+
pub(super) fn is_valid_label_name(name: &str) -> bool {
45+
is_valid_ident(name, matches_charset_without_colon)
3246
}
3347

3448
/// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially

src/encoder/text.rs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.
22

3-
use regex::{Match, Regex};
43
use std::borrow::Cow;
54
use std::io::Write;
65

@@ -216,26 +215,21 @@ fn label_pairs_to_text(
216215
Ok(())
217216
}
218217

218+
fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
219+
if include_double_quote {
220+
memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes())
221+
} else {
222+
memchr::memchr2(b'\\', b'\n', v.as_bytes())
223+
}
224+
}
225+
219226
/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
220227
/// `include_double_quote` is true.
221228
///
222229
/// Implementation adapted from
223230
/// https://lise-henry.github.io/articles/optimising_strings.html
224231
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
225-
// Regex compilation is expensive. Use `lazy_static` to compile the regexes
226-
// once per process lifetime and not once per function invocation.
227-
lazy_static! {
228-
static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid.");
229-
static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid.");
230-
}
231-
232-
let first_occurence = if include_double_quote {
233-
QUOTED_ESCAPER.find(v)
234-
} else {
235-
ESCAPER.find(v)
236-
}
237-
.as_ref()
238-
.map(Match::start);
232+
let first_occurence = find_first_occurence(v, include_double_quote);
239233

240234
if let Some(first) = first_occurence {
241235
let mut escaped = String::with_capacity(v.len() * 2);

0 commit comments

Comments
 (0)