Skip to content

Commit cf4b37f

Browse files
Use simdutf8
1 parent 6621924 commit cf4b37f

File tree

9 files changed

+47
-20
lines changed

9 files changed

+47
-20
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ regex-automata = "0.4.14"
3535
serde = { version = "1.0.228", features = ["derive"] }
3636
serde_json = "1.0.149"
3737
serde-wasm-bindgen = "0.6.5"
38+
simdutf8 = "0.1.5"
3839
sliceslice = "0.4.3"
3940
thiserror = "2.0.18"
4041
wasm-bindgen = { version = "0.2.108", features = ["serde-serialize"] }

engine/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ memchr.workspace = true
3030
rand.workspace = true
3131
regex-automata = { workspace = true, optional = true }
3232
serde.workspace = true
33+
simdutf8.workspace = true
3334
sliceslice.workspace = true
3435
thiserror.workspace = true
3536
wildcard.workspace = true

engine/src/ast/index_expr.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,10 @@ mod tests {
936936
LhsValue::Bytes(bytes) => bytes,
937937
_ => unreachable!(),
938938
};
939-
assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{i}][{j}]"));
939+
assert_eq!(
940+
simdutf8::basic::from_utf8(&bytes).unwrap(),
941+
format!("[{i}][{j}]")
942+
);
940943
}
941944

942945
let indexes = [FieldIndex::MapEach, FieldIndex::ArrayIndex(i)];
@@ -948,7 +951,10 @@ mod tests {
948951
LhsValue::Bytes(bytes) => bytes,
949952
_ => unreachable!(),
950953
};
951-
assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{j}][{i}]"));
954+
assert_eq!(
955+
simdutf8::basic::from_utf8(&bytes).unwrap(),
956+
format!("[{j}][{i}]")
957+
);
952958
}
953959
}
954960

@@ -963,7 +969,10 @@ mod tests {
963969
LhsValue::Bytes(bytes) => bytes,
964970
_ => unreachable!(),
965971
};
966-
assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{i}][{j}]"));
972+
assert_eq!(
973+
simdutf8::basic::from_utf8(&bytes).unwrap(),
974+
format!("[{i}][{j}]")
975+
);
967976
j = (j + 1) % 10;
968977
i += (j == 0) as u32;
969978
}

engine/src/lhs_types/bytes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ impl Serialize for Bytes<'_> {
254254
where
255255
S: Serializer,
256256
{
257-
if let Ok(s) = std::str::from_utf8(self) {
257+
if let Ok(s) = simdutf8::basic::from_utf8(self) {
258258
serializer.serialize_str(s)
259259
} else {
260260
serializer.serialize_bytes(self)

engine/src/lhs_types/map.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,12 @@ impl Serialize for Map<'_> {
339339
let to_map = self
340340
.data
341341
.iter()
342-
.all(|(key, _)| std::str::from_utf8(key).is_ok());
342+
.all(|(key, _)| simdutf8::basic::from_utf8(key).is_ok());
343343

344344
if to_map {
345345
let mut map = serializer.serialize_map(Some(self.len()))?;
346346
for (k, v) in self.data.iter() {
347-
map.serialize_entry(std::str::from_utf8(k).unwrap(), v)?;
347+
map.serialize_entry(simdutf8::basic::from_utf8(k).unwrap(), v)?;
348348
}
349349
map.end()
350350
} else {

engine/src/rhs_types/bytes.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use serde::{Serialize, Serializer};
44
use std::fmt::{self, Debug, Formatter};
55
use std::hash::{Hash, Hasher};
66
use std::ops::Deref;
7-
use std::str;
87

98
/// BytesFormat describes the format in which the string was expressed
109
#[derive(PartialEq, Eq, Copy, Clone)]
@@ -48,10 +47,12 @@ impl Serialize for BytesExpr {
4847
S: Serializer,
4948
{
5049
match self.format() {
51-
BytesFormat::Quoted | BytesFormat::Raw(_) => match std::str::from_utf8(&self.data) {
52-
Ok(s) => s.serialize(serializer),
53-
Err(_) => self.data.serialize(serializer),
54-
},
50+
BytesFormat::Quoted | BytesFormat::Raw(_) => {
51+
match simdutf8::basic::from_utf8(&self.data) {
52+
Ok(s) => s.serialize(serializer),
53+
Err(_) => self.data.serialize(serializer),
54+
}
55+
}
5556
BytesFormat::Byte => self.data.serialize(serializer),
5657
}
5758
}
@@ -117,10 +118,12 @@ impl Debug for BytesExpr {
117118
}
118119

119120
match self.format {
120-
BytesFormat::Quoted | BytesFormat::Raw(_) => match std::str::from_utf8(&self.data) {
121-
Ok(s) => s.fmt(f),
122-
Err(_) => fmt_raw(&self.data, f),
123-
},
121+
BytesFormat::Quoted | BytesFormat::Raw(_) => {
122+
match simdutf8::basic::from_utf8(&self.data) {
123+
Ok(s) => s.fmt(f),
124+
Err(_) => fmt_raw(&self.data, f),
125+
}
126+
}
124127
BytesFormat::Byte => fmt_raw(&self.data, f),
125128
}
126129
}

engine/src/scheme.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,16 @@ impl<'i> Lex<'i> for FieldIndex {
7676
input,
7777
)),
7878
},
79-
RhsValue::Bytes(b) => match String::from_utf8(b.into()) {
80-
Ok(s) => Ok((FieldIndex::MapKey(s), rest)),
81-
Err(_) => Err((LexErrorKind::ExpectedLiteral("expected utf8 string"), input)),
82-
},
79+
RhsValue::Bytes(b) => {
80+
match simdutf8::basic::from_utf8(&b) {
81+
Ok(_) => {
82+
// SAFETY: simdutf8 just validated the bytes as valid UTF-8.
83+
let s = unsafe { String::from_utf8_unchecked(b.into()) };
84+
Ok((FieldIndex::MapKey(s), rest))
85+
}
86+
Err(_) => Err((LexErrorKind::ExpectedLiteral("expected utf8 string"), input)),
87+
}
88+
}
8389
_ => unreachable!(),
8490
}
8591
}

engine/src/types.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,7 @@ impl Serialize for LhsValue<'_> {
785785
match self {
786786
LhsValue::Ip(ip) => ip.serialize(serializer),
787787
LhsValue::Bytes(bytes) => {
788-
if let Ok(s) = std::str::from_utf8(bytes) {
788+
if let Ok(s) = simdutf8::basic::from_utf8(bytes) {
789789
serializer.serialize_str(s)
790790
} else {
791791
serializer.serialize_bytes(bytes)

0 commit comments

Comments
 (0)