Skip to content

Commit ce5e297

Browse files
authored
chore: add normalization benchmarks with additional workload scenarios (#31)
1 parent 8788c4f commit ce5e297

File tree

2 files changed

+195
-2
lines changed

2 files changed

+195
-2
lines changed

benches/fixtures.rs

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,110 @@ pub static ABSOLUTE_PATHS: &[&str] = &[
7979
"/home/user/workspace/projects/company/backend/microservices/auth-service/src/controllers/v2/handlers/login.js",
8080
"/var/log/applications/production/cluster-01/node-03/services/api-gateway/2024/01/15/access.log",
8181
];
82+
83+
/// Paths that are already in normalized form on Unix.
84+
/// No `.` or `..` components, no `//` doubled separators, no trailing `/`.
85+
/// Used to benchmark the zero-allocation fast path.
86+
#[allow(dead_code)]
87+
pub static ALREADY_NORMALIZED_UNIX: &[&str] = &[
88+
// Single-component paths
89+
"foo",
90+
"file.txt",
91+
"/single",
92+
// Short relative paths (2-3 components)
93+
"foo/bar",
94+
"foo/bar/baz",
95+
"src/main.rs",
96+
// Short absolute paths
97+
"/",
98+
"/foo",
99+
"/foo/bar",
100+
"/usr/local/bin",
101+
// Medium paths (4-7 components) with dots in filenames
102+
"src/lib/utils/helpers.rs",
103+
"/home/user/.config/settings.json",
104+
"/var/log/app.2024.01.15.log",
105+
"node_modules/@scope/package/dist/index.js",
106+
"src/.hidden/file.rs",
107+
// Tricky: dots in component names that are NOT . or ..
108+
"..foo/bar",
109+
"foo..bar/baz",
110+
".../.../foo",
111+
// At SmallVec boundary (8 components)
112+
"a/b/c/d/e/f/g/h",
113+
"/level1/level2/level3/level4/level5/level6/level7/level8",
114+
// Over SmallVec boundary (10-12 components)
115+
"a/b/c/d/e/f/g/h/i/j",
116+
"/usr/local/share/doc/packages/example/tutorials/advanced/chapter1/section2",
117+
"/var/log/applications/production/cluster-01/node-03/services/api-gateway/access.log",
118+
// Very deep paths (15-20 components)
119+
"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o",
120+
"/home/user/projects/company/backend/services/api/controllers/v2/handlers/auth/login/validate/token/refresh/generate/key/store/cache/data",
121+
// --- Additional workload ---
122+
// Single-component paths
123+
"bar",
124+
"image.png",
125+
"/root",
126+
// Short relative paths (2-3 components)
127+
"baz/qux",
128+
"baz/qux/corge",
129+
"tests/unit.rs",
130+
// Short absolute paths
131+
"/tmp",
132+
"/bar/baz",
133+
"/opt/bin/tool",
134+
// Medium paths with dots in filenames
135+
"lib/core/math/vector.rs",
136+
"/opt/app/.env.production",
137+
"/tmp/data/report.2024.q1.csv",
138+
"packages/@company/sdk/lib/index.mjs",
139+
"config/.secrets/keys.pem",
140+
// Tricky dot names
141+
"..bar/baz",
142+
"baz..qux/corge",
143+
".../..../bar",
144+
// At SmallVec boundary (8 components)
145+
"p/q/r/s/t/u/v/w",
146+
"/alpha/bravo/charlie/delta/echo/foxtrot/golf/hotel",
147+
// Over SmallVec boundary (10-12 components)
148+
"p/q/r/s/t/u/v/w/x/y",
149+
"/opt/data/warehouse/etl/pipelines/transforms/staging/output/validated/reports",
150+
"/srv/apps/production/cluster-02/node-05/services/graphql-gateway/access.log",
151+
// Very deep paths (15-20 components)
152+
"p/q/r/s/t/u/v/w/x/y/z/aa/bb/cc/dd",
153+
"/srv/data/projects/org/team/repo/packages/core/src/modules/auth/handlers/v3/internal/process/queue/worker/task",
154+
];
155+
156+
/// Paths that are already in normalized form on Windows.
157+
#[allow(dead_code)]
158+
pub static ALREADY_NORMALIZED_WINDOWS: &[&str] = &[
159+
"C:\\",
160+
"C:\\foo",
161+
"C:\\foo\\bar",
162+
"C:\\Users\\Admin\\Documents\\file.txt",
163+
"D:\\Projects\\rust\\src\\main.rs",
164+
"\\\\server\\share\\",
165+
"\\\\server\\share\\folder\\document.doc",
166+
"C:\\Windows\\System32\\drivers\\etc\\hosts",
167+
"C:\\Program Files\\Application\\bin\\app.exe",
168+
// At SmallVec boundary
169+
"C:\\level1\\level2\\level3\\level4\\level5\\level6\\level7\\level8",
170+
// Deep paths
171+
"C:\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q\\r\\s\\t",
172+
"D:\\home\\user\\workspace\\projects\\company\\backend\\microservices\\auth-service\\src\\controllers\\v2\\handlers",
173+
// --- Additional workload ---
174+
"E:\\",
175+
"E:\\bar",
176+
"E:\\bar\\baz",
177+
"C:\\Users\\Deploy\\.profile\\config.toml",
178+
"F:\\Games\\Steam\\steamapps\\common\\app.exe",
179+
"\\\\nas\\backup\\",
180+
"\\\\nas\\backup\\archives\\2024\\data.zip",
181+
"C:\\ProgramData\\Docker\\volumes\\db\\data",
182+
"D:\\workspace\\monorepo\\packages\\core\\dist\\index.cjs",
183+
// At SmallVec boundary
184+
"E:\\alpha\\bravo\\charlie\\delta\\echo\\foxtrot\\golf\\hotel",
185+
// Deep paths
186+
"E:\\p\\q\\r\\s\\t\\u\\v\\w\\x\\y\\z\\aa\\bb\\cc\\dd\\ee\\ff\\gg\\hh\\ii",
187+
"F:\\srv\\data\\projects\\org\\team\\repo\\packages\\core\\src\\modules\\auth\\handlers",
188+
];

benches/normalize.rs

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,102 @@ use sugar_path::SugarPath;
66

77
mod fixtures;
88

9-
use fixtures::FIXTURES;
9+
#[cfg(not(target_family = "windows"))]
10+
use fixtures::ALREADY_NORMALIZED_UNIX;
11+
#[cfg(target_family = "windows")]
12+
use fixtures::ALREADY_NORMALIZED_WINDOWS;
13+
use fixtures::{ABSOLUTE_PATHS, FIXTURES};
1014

1115
fn criterion_benchmark(c: &mut Criterion) {
12-
c.bench_function("normalize", |b| {
16+
// Paths that need normalization (existing behavior baseline)
17+
c.bench_function("normalize_needs_work", |b| {
1318
b.iter(|| {
1419
for fixture in FIXTURES {
1520
black_box(Path::new(fixture).normalize());
1621
}
1722
})
1823
});
24+
25+
// Paths already in normal form (the Cow::Borrowed fast path target)
26+
c.bench_function("normalize_already_clean", |b| {
27+
#[cfg(not(target_family = "windows"))]
28+
let paths = ALREADY_NORMALIZED_UNIX;
29+
#[cfg(target_family = "windows")]
30+
let paths = ALREADY_NORMALIZED_WINDOWS;
31+
32+
b.iter(|| {
33+
for fixture in paths {
34+
black_box(Path::new(fixture).normalize());
35+
}
36+
})
37+
});
38+
39+
// Already-normalized absolute paths (reuses existing ABSOLUTE_PATHS)
40+
c.bench_function("normalize_already_clean_absolute", |b| {
41+
b.iter(|| {
42+
for fixture in ABSOLUTE_PATHS {
43+
black_box(Path::new(fixture).normalize());
44+
}
45+
})
46+
});
47+
48+
// Mixed workload: interleaved clean and needs-work paths
49+
c.bench_function("normalize_mixed_workload", |b| {
50+
#[cfg(not(target_family = "windows"))]
51+
let clean = ALREADY_NORMALIZED_UNIX;
52+
#[cfg(target_family = "windows")]
53+
let clean = ALREADY_NORMALIZED_WINDOWS;
54+
55+
let mixed: Vec<&str> = clean.iter().zip(FIXTURES.iter()).flat_map(|(c, d)| [*c, *d]).collect();
56+
57+
b.iter(|| {
58+
for fixture in &mixed {
59+
black_box(Path::new(fixture).normalize());
60+
}
61+
})
62+
});
63+
64+
// Short clean paths (isolate fixed-overhead savings)
65+
c.bench_function("normalize_short_clean", |b| {
66+
let short_paths = [
67+
"foo",
68+
"foo/bar",
69+
"/foo",
70+
"/foo/bar",
71+
"src/main.rs",
72+
"file.txt",
73+
"bar",
74+
"baz/qux",
75+
"/bar",
76+
"/bar/baz",
77+
"tests/unit.rs",
78+
"image.png",
79+
];
80+
b.iter(|| {
81+
for fixture in &short_paths {
82+
black_box(Path::new(fixture).normalize());
83+
}
84+
})
85+
});
86+
87+
// Deep clean paths (isolate memchr scan cost scaling)
88+
c.bench_function("normalize_deep_clean", |b| {
89+
let deep_paths = [
90+
"a/b/c/d/e/f/g/h/i/j",
91+
"/usr/local/share/doc/packages/example/tutorials/advanced/chapter1/section2",
92+
"/home/user/projects/company/backend/services/api/controllers/v2/handlers/auth/login/validate/token/refresh/generate/key/store/cache/data",
93+
"/level1/level2/level3/level4/level5/level6/level7/level8/level9/level10/level11/level12",
94+
"p/q/r/s/t/u/v/w/x/y",
95+
"/opt/data/warehouse/etl/pipelines/transforms/staging/output/validated/reports",
96+
"/srv/data/projects/org/team/repo/packages/core/src/modules/auth/handlers/v3/internal/process/queue/worker/task",
97+
"/alpha/bravo/charlie/delta/echo/foxtrot/golf/hotel/india/juliet/kilo/lima",
98+
];
99+
b.iter(|| {
100+
for fixture in &deep_paths {
101+
black_box(Path::new(fixture).normalize());
102+
}
103+
})
104+
});
19105
}
20106

21107
criterion_group!(benches, criterion_benchmark);

0 commit comments

Comments
 (0)