Skip to content

Commit cbe264d

Browse files
committed
Add tests
1 parent 33da962 commit cbe264d

File tree

3 files changed

+198
-17
lines changed

3 files changed

+198
-17
lines changed

.github/workflows/CI.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,29 @@ on:
88
- main
99

1010
jobs:
11+
test:
12+
strategy:
13+
matrix:
14+
settings:
15+
- target: x86_64-unknown-linux-gnu
16+
os: ubuntu-latest
17+
- target: x86_64-pc-windows-msvc
18+
os: windows-latest
19+
- target: aarch64-unknown-linux-gnu
20+
os: ubuntu-24.04-arm
21+
- target: aarch64-apple-darwin
22+
os: macos-latest
23+
fail-fast: false
24+
runs-on: ${{ matrix.settings.os }}
25+
steps:
26+
- uses: actions/checkout@v5
27+
- name: Setup Rust
28+
uses: dtolnay/rust-toolchain@stable
29+
with:
30+
targets: ${{ matrix.settings.target }}
31+
- name: Run benchmarks
32+
run: cargo test
33+
1134
bench:
1235
strategy:
1336
matrix:

src/lib.rs

Lines changed: 175 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
#![cfg_attr(feature = "nightly", feature(test))]
22

3-
#[cfg(target_arch = "x86_64")]
4-
pub use x86::encode_str;
5-
63
#[cfg(target_arch = "x86_64")]
74
mod x86;
85

@@ -134,8 +131,20 @@ pub fn encode_str_fallback<S: AsRef<str>>(input: S) -> String {
134131
unsafe { String::from_utf8_unchecked(result) }
135132
}
136133

137-
#[cfg(not(target_arch = "x86_64"))]
134+
/// Main entry point for JSON string escaping with SIMD acceleration
135+
#[inline]
138136
pub fn encode_str<S: AsRef<str>>(input: S) -> String {
137+
#[cfg(target_arch = "x86_64")]
138+
{
139+
// Runtime CPU feature detection for x86_64
140+
if is_x86_feature_detected!("avx2") {
141+
unsafe { return x86::encode_str_avx2(input) }
142+
} else if is_x86_feature_detected!("sse2") {
143+
unsafe { return x86::encode_str_sse2(input) }
144+
}
145+
}
146+
147+
// Fallback to optimized scalar implementation
139148
encode_str_fallback(input)
140149
}
141150

@@ -170,3 +179,165 @@ fn test_escape_json_string() {
170179
fixture
171180
);
172181
}
182+
183+
// Test cases for various string sizes to cover different SIMD paths
184+
185+
#[test]
186+
fn test_empty_string() {
187+
assert_eq!(encode_str(""), r#""""#);
188+
}
189+
190+
#[test]
191+
fn test_very_small_strings() {
192+
// Less than 16 bytes (SSE register size)
193+
assert_eq!(encode_str("a"), r#""a""#);
194+
assert_eq!(encode_str("ab"), r#""ab""#);
195+
assert_eq!(encode_str("hello"), r#""hello""#);
196+
assert_eq!(encode_str("hello\n"), r#""hello\n""#);
197+
assert_eq!(encode_str("\""), r#""\"""#);
198+
assert_eq!(encode_str("\\"), r#""\\""#);
199+
assert_eq!(encode_str("\t"), r#""\t""#);
200+
assert_eq!(encode_str("\r\n"), r#""\r\n""#);
201+
}
202+
203+
#[test]
204+
fn test_small_strings_16_bytes() {
205+
// Exactly 16 bytes - SSE register boundary
206+
let s16 = "0123456789abcdef";
207+
assert_eq!(s16.len(), 16);
208+
assert_eq!(encode_str(s16), serde_json::to_string(s16).unwrap());
209+
210+
// 16 bytes with escapes
211+
let s16_esc = "01234567\t9abcde";
212+
assert_eq!(s16_esc.len(), 15); // \t is 1 byte
213+
assert_eq!(encode_str(s16_esc), serde_json::to_string(s16_esc).unwrap());
214+
}
215+
216+
#[test]
217+
fn test_medium_strings_32_bytes() {
218+
// Exactly 32 bytes - AVX2 register boundary
219+
let s32 = "0123456789abcdef0123456789abcdef";
220+
assert_eq!(s32.len(), 32);
221+
assert_eq!(encode_str(s32), serde_json::to_string(s32).unwrap());
222+
223+
// 32 bytes with escapes at different positions
224+
let s32_esc = "0123456789abcde\"0123456789abcde";
225+
assert_eq!(encode_str(s32_esc), serde_json::to_string(s32_esc).unwrap());
226+
}
227+
228+
#[test]
229+
fn test_large_strings_128_bytes() {
230+
// Exactly 128 bytes - main loop size
231+
let s128 = "0123456789abcdef".repeat(8);
232+
assert_eq!(s128.len(), 128);
233+
assert_eq!(encode_str(&s128), serde_json::to_string(&s128).unwrap());
234+
235+
// 128 bytes with escapes spread throughout
236+
let mut s128_esc = String::new();
237+
for i in 0..8 {
238+
if i % 2 == 0 {
239+
s128_esc.push_str("0123456789abcd\n");
240+
} else {
241+
s128_esc.push_str("0123456789abcd\"");
242+
}
243+
}
244+
assert_eq!(encode_str(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
245+
}
246+
247+
#[test]
248+
fn test_unaligned_data() {
249+
// Test strings that start at various alignments
250+
for offset in 0..32 {
251+
let padding = " ".repeat(offset);
252+
let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
253+
let result = encode_str(&test_str[offset..]);
254+
let expected = serde_json::to_string(&test_str[offset..]).unwrap();
255+
assert_eq!(result, expected, "Failed at offset {}", offset);
256+
}
257+
}
258+
259+
#[test]
260+
fn test_sparse_escapes() {
261+
// Large string with escapes only at the beginning and end
262+
let mut s = String::new();
263+
s.push('"');
264+
s.push_str(&"a".repeat(500));
265+
s.push('\\');
266+
assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap());
267+
}
268+
269+
#[test]
270+
fn test_dense_escapes() {
271+
// String with many escapes
272+
let s = "\"\\\"\\\"\\\"\\".repeat(50);
273+
assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap());
274+
275+
// All control characters
276+
let mut ctrl = String::new();
277+
for _ in 0..10 {
278+
for i in 0u8..32 {
279+
ctrl.push(i as char);
280+
}
281+
}
282+
assert_eq!(encode_str(&ctrl), serde_json::to_string(&ctrl).unwrap());
283+
}
284+
285+
#[test]
286+
fn test_boundary_conditions() {
287+
// Test around 256 byte boundary (common cache line multiple)
288+
for size in 250..260 {
289+
let s = "a".repeat(size);
290+
assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap());
291+
292+
// With escape at the end
293+
let mut s_esc = "a".repeat(size - 1);
294+
s_esc.push('"');
295+
assert_eq!(encode_str(&s_esc), serde_json::to_string(&s_esc).unwrap());
296+
}
297+
}
298+
299+
#[test]
300+
fn test_all_escape_types() {
301+
// Test each escape type individually
302+
assert_eq!(encode_str("\x00"), r#""\u0000""#);
303+
assert_eq!(encode_str("\x08"), r#""\b""#);
304+
assert_eq!(encode_str("\x09"), r#""\t""#);
305+
assert_eq!(encode_str("\x0A"), r#""\n""#);
306+
assert_eq!(encode_str("\x0C"), r#""\f""#);
307+
assert_eq!(encode_str("\x0D"), r#""\r""#);
308+
assert_eq!(encode_str("\x1F"), r#""\u001f""#);
309+
assert_eq!(encode_str("\""), r#""\"""#);
310+
assert_eq!(encode_str("\\"), r#""\\""#);
311+
312+
// Test all control characters
313+
for i in 0u8..32 {
314+
let s = String::from_utf8(vec![i]).unwrap();
315+
let result = encode_str(&s);
316+
let expected = serde_json::to_string(&s).unwrap();
317+
assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
318+
}
319+
}
320+
321+
#[test]
322+
fn test_mixed_content() {
323+
// Mix of ASCII, escapes, and multi-byte UTF-8
324+
let mixed = r#"Hello "World"!
325+
Tab: Here
326+
Emoji: 😀 Chinese: 中文
327+
Math: ∑∫∂ Music: 𝄞
328+
Escape: \" \\ \n \r \t"#;
329+
assert_eq!(encode_str(mixed), serde_json::to_string(mixed).unwrap());
330+
}
331+
332+
#[test]
333+
fn test_repeated_patterns() {
334+
// Patterns that might benefit from or confuse SIMD operations
335+
let pattern1 = "abcd".repeat(100);
336+
assert_eq!(encode_str(&pattern1), serde_json::to_string(&pattern1).unwrap());
337+
338+
let pattern2 = "a\"b\"".repeat(100);
339+
assert_eq!(encode_str(&pattern2), serde_json::to_string(&pattern2).unwrap());
340+
341+
let pattern3 = "\t\n".repeat(100);
342+
assert_eq!(encode_str(&pattern3), serde_json::to_string(&pattern3).unwrap());
343+
}

src/x86.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -448,16 +448,3 @@ fn write_escape(result: &mut Vec<u8>, escape_byte: u8, c: u8) {
448448
}
449449
}
450450

451-
// Public entry point that does runtime CPU detection
452-
#[inline]
453-
pub fn encode_str<S: AsRef<str>>(input: S) -> String {
454-
// Runtime CPU feature detection
455-
if is_x86_feature_detected!("avx2") {
456-
unsafe { encode_str_avx2(input) }
457-
} else if is_x86_feature_detected!("sse2") {
458-
unsafe { encode_str_sse2(input) }
459-
} else {
460-
// Fallback to scalar implementation
461-
crate::encode_str_fallback(input)
462-
}
463-
}

0 commit comments

Comments
 (0)