|
1 | 1 | #![cfg_attr(feature = "nightly", feature(test))]
|
2 | 2 |
|
3 |
| -#[cfg(target_arch = "x86_64")] |
4 |
| -pub use x86::encode_str; |
5 |
| - |
6 | 3 | #[cfg(target_arch = "x86_64")]
|
7 | 4 | mod x86;
|
8 | 5 |
|
@@ -134,8 +131,20 @@ pub fn encode_str_fallback<S: AsRef<str>>(input: S) -> String {
|
134 | 131 | unsafe { String::from_utf8_unchecked(result) }
|
135 | 132 | }
|
136 | 133 |
|
137 |
| -#[cfg(not(target_arch = "x86_64"))] |
| 134 | +/// Main entry point for JSON string escaping with SIMD acceleration |
| 135 | +#[inline] |
138 | 136 | pub fn encode_str<S: AsRef<str>>(input: S) -> String {
|
| 137 | + #[cfg(target_arch = "x86_64")] |
| 138 | + { |
| 139 | + // Runtime CPU feature detection for x86_64 |
| 140 | + if is_x86_feature_detected!("avx2") { |
| 141 | + unsafe { return x86::encode_str_avx2(input) } |
| 142 | + } else if is_x86_feature_detected!("sse2") { |
| 143 | + unsafe { return x86::encode_str_sse2(input) } |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + // Fallback to optimized scalar implementation |
139 | 148 | encode_str_fallback(input)
|
140 | 149 | }
|
141 | 150 |
|
@@ -170,3 +179,165 @@ fn test_escape_json_string() {
|
170 | 179 | fixture
|
171 | 180 | );
|
172 | 181 | }
|
| 182 | + |
| 183 | +// Test cases for various string sizes to cover different SIMD paths |
| 184 | + |
| 185 | +#[test] |
| 186 | +fn test_empty_string() { |
| 187 | + assert_eq!(encode_str(""), r#""""#); |
| 188 | +} |
| 189 | + |
| 190 | +#[test] |
| 191 | +fn test_very_small_strings() { |
| 192 | + // Less than 16 bytes (SSE register size) |
| 193 | + assert_eq!(encode_str("a"), r#""a""#); |
| 194 | + assert_eq!(encode_str("ab"), r#""ab""#); |
| 195 | + assert_eq!(encode_str("hello"), r#""hello""#); |
| 196 | + assert_eq!(encode_str("hello\n"), r#""hello\n""#); |
| 197 | + assert_eq!(encode_str("\""), r#""\"""#); |
| 198 | + assert_eq!(encode_str("\\"), r#""\\""#); |
| 199 | + assert_eq!(encode_str("\t"), r#""\t""#); |
| 200 | + assert_eq!(encode_str("\r\n"), r#""\r\n""#); |
| 201 | +} |
| 202 | + |
| 203 | +#[test] |
| 204 | +fn test_small_strings_16_bytes() { |
| 205 | + // Exactly 16 bytes - SSE register boundary |
| 206 | + let s16 = "0123456789abcdef"; |
| 207 | + assert_eq!(s16.len(), 16); |
| 208 | + assert_eq!(encode_str(s16), serde_json::to_string(s16).unwrap()); |
| 209 | + |
| 210 | + // 16 bytes with escapes |
| 211 | + let s16_esc = "01234567\t9abcde"; |
| 212 | + assert_eq!(s16_esc.len(), 15); // \t is 1 byte |
| 213 | + assert_eq!(encode_str(s16_esc), serde_json::to_string(s16_esc).unwrap()); |
| 214 | +} |
| 215 | + |
| 216 | +#[test] |
| 217 | +fn test_medium_strings_32_bytes() { |
| 218 | + // Exactly 32 bytes - AVX2 register boundary |
| 219 | + let s32 = "0123456789abcdef0123456789abcdef"; |
| 220 | + assert_eq!(s32.len(), 32); |
| 221 | + assert_eq!(encode_str(s32), serde_json::to_string(s32).unwrap()); |
| 222 | + |
| 223 | + // 32 bytes with escapes at different positions |
| 224 | + let s32_esc = "0123456789abcde\"0123456789abcde"; |
| 225 | + assert_eq!(encode_str(s32_esc), serde_json::to_string(s32_esc).unwrap()); |
| 226 | +} |
| 227 | + |
| 228 | +#[test] |
| 229 | +fn test_large_strings_128_bytes() { |
| 230 | + // Exactly 128 bytes - main loop size |
| 231 | + let s128 = "0123456789abcdef".repeat(8); |
| 232 | + assert_eq!(s128.len(), 128); |
| 233 | + assert_eq!(encode_str(&s128), serde_json::to_string(&s128).unwrap()); |
| 234 | + |
| 235 | + // 128 bytes with escapes spread throughout |
| 236 | + let mut s128_esc = String::new(); |
| 237 | + for i in 0..8 { |
| 238 | + if i % 2 == 0 { |
| 239 | + s128_esc.push_str("0123456789abcd\n"); |
| 240 | + } else { |
| 241 | + s128_esc.push_str("0123456789abcd\""); |
| 242 | + } |
| 243 | + } |
| 244 | + assert_eq!(encode_str(&s128_esc), serde_json::to_string(&s128_esc).unwrap()); |
| 245 | +} |
| 246 | + |
| 247 | +#[test] |
| 248 | +fn test_unaligned_data() { |
| 249 | + // Test strings that start at various alignments |
| 250 | + for offset in 0..32 { |
| 251 | + let padding = " ".repeat(offset); |
| 252 | + let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes"); |
| 253 | + let result = encode_str(&test_str[offset..]); |
| 254 | + let expected = serde_json::to_string(&test_str[offset..]).unwrap(); |
| 255 | + assert_eq!(result, expected, "Failed at offset {}", offset); |
| 256 | + } |
| 257 | +} |
| 258 | + |
| 259 | +#[test] |
| 260 | +fn test_sparse_escapes() { |
| 261 | + // Large string with escapes only at the beginning and end |
| 262 | + let mut s = String::new(); |
| 263 | + s.push('"'); |
| 264 | + s.push_str(&"a".repeat(500)); |
| 265 | + s.push('\\'); |
| 266 | + assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap()); |
| 267 | +} |
| 268 | + |
| 269 | +#[test] |
| 270 | +fn test_dense_escapes() { |
| 271 | + // String with many escapes |
| 272 | + let s = "\"\\\"\\\"\\\"\\".repeat(50); |
| 273 | + assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap()); |
| 274 | + |
| 275 | + // All control characters |
| 276 | + let mut ctrl = String::new(); |
| 277 | + for _ in 0..10 { |
| 278 | + for i in 0u8..32 { |
| 279 | + ctrl.push(i as char); |
| 280 | + } |
| 281 | + } |
| 282 | + assert_eq!(encode_str(&ctrl), serde_json::to_string(&ctrl).unwrap()); |
| 283 | +} |
| 284 | + |
| 285 | +#[test] |
| 286 | +fn test_boundary_conditions() { |
| 287 | + // Test around 256 byte boundary (common cache line multiple) |
| 288 | + for size in 250..260 { |
| 289 | + let s = "a".repeat(size); |
| 290 | + assert_eq!(encode_str(&s), serde_json::to_string(&s).unwrap()); |
| 291 | + |
| 292 | + // With escape at the end |
| 293 | + let mut s_esc = "a".repeat(size - 1); |
| 294 | + s_esc.push('"'); |
| 295 | + assert_eq!(encode_str(&s_esc), serde_json::to_string(&s_esc).unwrap()); |
| 296 | + } |
| 297 | +} |
| 298 | + |
| 299 | +#[test] |
| 300 | +fn test_all_escape_types() { |
| 301 | + // Test each escape type individually |
| 302 | + assert_eq!(encode_str("\x00"), r#""\u0000""#); |
| 303 | + assert_eq!(encode_str("\x08"), r#""\b""#); |
| 304 | + assert_eq!(encode_str("\x09"), r#""\t""#); |
| 305 | + assert_eq!(encode_str("\x0A"), r#""\n""#); |
| 306 | + assert_eq!(encode_str("\x0C"), r#""\f""#); |
| 307 | + assert_eq!(encode_str("\x0D"), r#""\r""#); |
| 308 | + assert_eq!(encode_str("\x1F"), r#""\u001f""#); |
| 309 | + assert_eq!(encode_str("\""), r#""\"""#); |
| 310 | + assert_eq!(encode_str("\\"), r#""\\""#); |
| 311 | + |
| 312 | + // Test all control characters |
| 313 | + for i in 0u8..32 { |
| 314 | + let s = String::from_utf8(vec![i]).unwrap(); |
| 315 | + let result = encode_str(&s); |
| 316 | + let expected = serde_json::to_string(&s).unwrap(); |
| 317 | + assert_eq!(result, expected, "Failed for byte 0x{:02x}", i); |
| 318 | + } |
| 319 | +} |
| 320 | + |
| 321 | +#[test] |
| 322 | +fn test_mixed_content() { |
| 323 | + // Mix of ASCII, escapes, and multi-byte UTF-8 |
| 324 | + let mixed = r#"Hello "World"! |
| 325 | + Tab: Here |
| 326 | + Emoji: 😀 Chinese: 中文 |
| 327 | + Math: ∑∫∂ Music: 𝄞 |
| 328 | + Escape: \" \\ \n \r \t"#; |
| 329 | + assert_eq!(encode_str(mixed), serde_json::to_string(mixed).unwrap()); |
| 330 | +} |
| 331 | + |
| 332 | +#[test] |
| 333 | +fn test_repeated_patterns() { |
| 334 | + // Patterns that might benefit from or confuse SIMD operations |
| 335 | + let pattern1 = "abcd".repeat(100); |
| 336 | + assert_eq!(encode_str(&pattern1), serde_json::to_string(&pattern1).unwrap()); |
| 337 | + |
| 338 | + let pattern2 = "a\"b\"".repeat(100); |
| 339 | + assert_eq!(encode_str(&pattern2), serde_json::to_string(&pattern2).unwrap()); |
| 340 | + |
| 341 | + let pattern3 = "\t\n".repeat(100); |
| 342 | + assert_eq!(encode_str(&pattern3), serde_json::to_string(&pattern3).unwrap()); |
| 343 | +} |
0 commit comments