Skip to content

Commit 09ea496

Browse files
authored
Merge pull request #92 from RAprogramm/codex/refactor-contains_nocase-for-performance
Optimize Turnkey classifier case-insensitive search
2 parents abcddf3 + 370b342 commit 09ea496

File tree

2 files changed

+52
-12
lines changed

2 files changed

+52
-12
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@ All notable changes to this project will be documented in this file.
33

44
## [Unreleased]
55

6-
## [0.10.7] - 2025-09-22
6+
## [0.10.7] - 2025-10-24
7+
8+
### Tests
9+
- Added regression coverage for long classifier needles to exercise the
10+
heap-allocation fallback.
711

812
### Changed
913
- Added an owning `From<AppError>` conversion for `ErrorResponse` and updated the
1014
Axum adapter to use it, eliminating redundant clones when building HTTP error
1115
bodies.
16+
- Precomputed lowercase Turnkey classifier needles with a stack-backed buffer
17+
to remove repeated transformations while keeping the common zero-allocation
18+
path for short patterns.
19+
1220

1321
## [0.10.6] - 2025-09-21
1422

src/turnkey/classifier.rs

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
use super::domain::TurnkeyErrorKind;
22

3+
const STACK_NEEDLE_INLINE_CAP: usize = 64;
4+
35
/// Heuristic classifier for raw SDK/provider messages (ASCII case-insensitive).
46
///
5-
/// This helper **does not allocate**; it performs case-insensitive `contains`
6-
/// checks over the input string to map common upstream texts to stable kinds.
7+
/// This helper keeps allocations to a minimum; it performs case-insensitive
8+
/// `contains` checks over the input string to map common upstream texts to
9+
/// stable kinds while reusing stack buffers for the short ASCII patterns we
10+
/// match.
711
///
812
/// The classifier is intentionally minimal; providers can and will change
913
/// messages. Prefer returning structured errors from adapters whenever
@@ -55,20 +59,41 @@ pub fn classify_turnkey_error(msg: &str) -> TurnkeyErrorKind {
5559
}
5660

5761
/// Returns true if `haystack` contains `needle` ignoring ASCII case.
58-
/// Performs the search without allocating.
62+
///
63+
/// The search avoids heap allocations for needles up to
64+
/// `STACK_NEEDLE_INLINE_CAP` bytes by reusing a stack buffer. Longer needles
65+
/// allocate once to store their lowercased representation.
5966
#[inline]
6067
fn contains_nocase(haystack: &str, needle: &str) -> bool {
6168
// Fast path: empty needle always matches.
6269
if needle.is_empty() {
6370
return true;
6471
}
65-
// Walk haystack windows and compare ASCII case-insensitively.
66-
haystack.as_bytes().windows(needle.len()).any(|w| {
67-
w.iter()
68-
.copied()
69-
.map(ascii_lower)
70-
.eq(needle.as_bytes().iter().copied().map(ascii_lower))
71-
})
72+
let haystack_bytes = haystack.as_bytes();
73+
let needle_bytes = needle.as_bytes();
74+
75+
let search = |needle_lower: &[u8]| {
76+
haystack_bytes.windows(needle_lower.len()).any(|window| {
77+
window
78+
.iter()
79+
.zip(needle_lower.iter())
80+
.all(|(hay, lower_needle)| ascii_lower(*hay) == *lower_needle)
81+
})
82+
};
83+
84+
if needle_bytes.len() <= STACK_NEEDLE_INLINE_CAP {
85+
let mut inline = [0u8; STACK_NEEDLE_INLINE_CAP];
86+
for (idx, byte) in needle_bytes.iter().enumerate() {
87+
inline[idx] = ascii_lower(*byte);
88+
}
89+
search(&inline[..needle_bytes.len()])
90+
} else {
91+
let mut lowercased = Vec::with_capacity(needle_bytes.len());
92+
for byte in needle_bytes {
93+
lowercased.push(ascii_lower(*byte));
94+
}
95+
search(lowercased.as_slice())
96+
}
7297
}
7398

7499
/// Check whether `haystack` contains any of the `needles` (ASCII
@@ -90,10 +115,17 @@ pub(super) mod internal_tests {
90115
use super::*;
91116

92117
#[test]
93-
fn contains_nocase_works_without_alloc() {
118+
fn contains_nocase_matches_ascii_case_insensitively() {
94119
assert!(contains_nocase("ABCdef", "cDe"));
95120
assert!(contains_any_nocase("hello world", &["nope", "WORLD"]));
96121
assert!(!contains_nocase("rustacean", "python"));
97122
assert!(contains_nocase("", ""));
98123
}
124+
125+
#[test]
126+
fn contains_nocase_handles_long_needles() {
127+
let haystack = "prefixed".to_owned() + &"A".repeat(128) + "suffix";
128+
let needle = "a".repeat(128);
129+
assert!(contains_nocase(&haystack, &needle));
130+
}
99131
}

0 commit comments

Comments
 (0)