Skip to content

Commit 07537aa

Browse files
zakcutnermarmeladema
authored andcommitted
Update benchmarks to use different needle and haystack sizes
1 parent f51c5e3 commit 07537aa

File tree

3 files changed

+111
-267
lines changed

3 files changed

+111
-267
lines changed

benches/avx2_anysize.rs

Lines changed: 109 additions & 267 deletions
Original file line numberDiff line numberDiff line change
@@ -1,276 +1,118 @@
1-
use criterion::{criterion_group, criterion_main, Criterion};
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
22
use memmem::{Searcher, TwoWaySearcher};
3-
use std::fs::File;
4-
use std::io::{self, BufRead, Read};
3+
use std::str;
54
use strstr::avx2::*;
65

76
fn criterion_benchmark(c: &mut Criterion) {
8-
let mut f = File::open("./data/i386.txt").unwrap();
9-
let mut buffer = Vec::new();
10-
f.read_to_end(&mut buffer).unwrap();
11-
let content = String::from_utf8_lossy(&buffer);
12-
13-
let file = File::open("./data/words").unwrap();
14-
let mut words: Vec<String> = io::BufReader::new(file)
15-
.lines()
16-
.filter_map(|line| line.ok())
17-
.filter(|word| word.len() > 1)
18-
.collect();
19-
// Sort all words by length then lexicographically
20-
words.sort_by(|a, b| {
21-
if a.len() != b.len() {
22-
a.len().partial_cmp(&b.len()).unwrap()
23-
} else {
24-
a.partial_cmp(b).unwrap()
7+
let haystack = include_bytes!("../data/haystack");
8+
let needle = include_bytes!("../data/needle");
9+
10+
let sizes = [1, 5, 10, 20, 50, 100, 1000];
11+
12+
for (i, &size) in sizes.iter().enumerate() {
13+
let mut group = c.benchmark_group(format!("{}-byte needle", size));
14+
let needle = &needle[..size];
15+
16+
for &size in &sizes[i..] {
17+
let parameter = &format!("{}-byte haystack", size);
18+
let haystack = &haystack[..size];
19+
20+
group.bench_with_input(
21+
BenchmarkId::new("String::find", parameter),
22+
&size,
23+
|b, _| {
24+
let haystack = str::from_utf8(haystack).unwrap();
25+
let needle = str::from_utf8(needle).unwrap();
26+
b.iter(|| haystack.find(needle));
27+
},
28+
);
29+
30+
group.bench_with_input(
31+
BenchmarkId::new("TwoWaySearcher::search_in", parameter),
32+
&size,
33+
|b, _| {
34+
let searcher = TwoWaySearcher::new(needle);
35+
b.iter(|| black_box(searcher.search_in(haystack)));
36+
},
37+
);
38+
39+
group.bench_with_input(
40+
BenchmarkId::new("twoway::find_bytes", parameter),
41+
&size,
42+
|b, _| {
43+
b.iter(|| black_box(twoway::find_bytes(haystack, needle)));
44+
},
45+
);
46+
47+
group.bench_with_input(
48+
BenchmarkId::new("strstr_avx2_original", parameter),
49+
&size,
50+
|b, _| {
51+
b.iter(|| black_box(unsafe { strstr_avx2_original(haystack, needle) }));
52+
},
53+
);
54+
55+
group.bench_with_input(
56+
BenchmarkId::new("strstr_avx2_rust_simple", parameter),
57+
&size,
58+
|b, _| {
59+
b.iter(|| black_box(unsafe { strstr_avx2_rust_simple(haystack, needle) }));
60+
},
61+
);
62+
63+
group.bench_with_input(
64+
BenchmarkId::new("strstr_avx2_rust_simple_2", parameter),
65+
&size,
66+
|b, _| {
67+
b.iter(|| black_box(unsafe { strstr_avx2_rust_simple_2(haystack, needle) }));
68+
},
69+
);
70+
71+
group.bench_with_input(
72+
BenchmarkId::new("strstr_avx2_rust_fast", parameter),
73+
&size,
74+
|b, _| {
75+
b.iter(|| black_box(unsafe { strstr_avx2_rust_fast(haystack, needle) }));
76+
},
77+
);
78+
79+
group.bench_with_input(
80+
BenchmarkId::new("strstr_avx2_rust_fast_2", parameter),
81+
&size,
82+
|b, _| {
83+
b.iter(|| black_box(strstr_avx2_rust_fast_2(haystack, needle)));
84+
},
85+
);
86+
87+
group.bench_with_input(
88+
BenchmarkId::new("strstr_avx2_rust_aligned", parameter),
89+
&size,
90+
|b, _| {
91+
b.iter(|| black_box(unsafe { strstr_avx2_rust_aligned(haystack, needle) }));
92+
},
93+
);
94+
95+
group.bench_with_input(
96+
BenchmarkId::new("StrStrAVX2Searcher::search_in", parameter),
97+
&size,
98+
|b, _| {
99+
let searcher = StrStrAVX2Searcher::new(needle);
100+
b.iter(|| black_box(searcher.search_in(haystack)));
101+
},
102+
);
103+
104+
group.bench_with_input(
105+
BenchmarkId::new("DynamicAvx2Searcher::search_in", parameter),
106+
&size,
107+
|b, _| {
108+
let searcher = DynamicAvx2Searcher::new(needle.to_owned().into_boxed_slice());
109+
b.iter(|| black_box(searcher.search_in(haystack)));
110+
},
111+
);
25112
}
26-
});
27-
let words = words;
28-
29-
let twoway_words: Vec<TwoWaySearcher<'_>> = words
30-
.iter()
31-
.map(|word| TwoWaySearcher::new(word.as_bytes()))
32-
.collect();
33-
34-
let avx2_words: Vec<StrStrAVX2Searcher> = words
35-
.iter()
36-
.map(|word| StrStrAVX2Searcher::new(word.as_bytes()))
37-
.collect();
38-
39-
let dynamic_avx2_searchers: Vec<DynamicAvx2Searcher> = words
40-
.iter()
41-
.map(|word| DynamicAvx2Searcher::new(word.clone().into_boxed_str().into()))
42-
.collect();
43-
44-
// Benchmarks against long haystacks
45-
46-
c.bench_function("String::find with long haystack", |b| {
47-
b.iter(|| {
48-
for word in &words {
49-
content.find(word);
50-
}
51-
})
52-
});
53-
54-
c.bench_function(
55-
"memmem::TwoWaySearcher::search_in with long haystack",
56-
|b| {
57-
b.iter(|| {
58-
for twoway_word in &twoway_words {
59-
twoway_word.search_in(content.as_bytes());
60-
}
61-
})
62-
},
63-
);
64-
65-
c.bench_function("twoway::find_bytes with long haystack", |b| {
66-
b.iter(|| {
67-
for word in &words {
68-
twoway::find_bytes(content.as_bytes(), word.as_bytes());
69-
}
70-
})
71-
});
72-
73-
c.bench_function("strstr_avx2_original with long haystack", |b| {
74-
b.iter(|| {
75-
for word in &words {
76-
unsafe {
77-
strstr_avx2_original(content.as_bytes(), word.as_bytes());
78-
}
79-
}
80-
})
81-
});
82-
83-
c.bench_function("strstr_avx2_rust_simple with long haystack", |b| {
84-
b.iter(|| {
85-
for word in &words {
86-
unsafe {
87-
strstr_avx2_rust_simple(content.as_bytes(), word.as_bytes());
88-
}
89-
}
90-
})
91-
});
92-
93-
c.bench_function("strstr_avx2_rust_simple_2 with long haystack", |b| {
94-
b.iter(|| {
95-
for word in &words {
96-
unsafe {
97-
strstr_avx2_rust_simple_2(content.as_bytes(), word.as_bytes());
98-
}
99-
}
100-
})
101-
});
102-
103-
c.bench_function("strstr_avx2_rust_fast with long haystack", |b| {
104-
b.iter(|| {
105-
for word in &words {
106-
unsafe {
107-
strstr_avx2_rust_fast(content.as_bytes(), word.as_bytes());
108-
}
109-
}
110-
})
111-
});
112-
113-
c.bench_function("strstr_avx2_rust_fast_2 with long haystack", |b| {
114-
b.iter(|| {
115-
for word in &words {
116-
strstr_avx2_rust_fast_2(content.as_bytes(), word.as_bytes());
117-
}
118-
})
119-
});
120-
121-
c.bench_function("strstr_avx2_rust_aligned with long haystack", |b| {
122-
b.iter(|| {
123-
for word in &words {
124-
unsafe {
125-
strstr_avx2_rust_aligned(content.as_bytes(), word.as_bytes());
126-
}
127-
}
128-
})
129-
});
130-
131-
c.bench_function("StrStrAVX2Searcher::search_in with long haystack", |b| {
132-
b.iter(|| {
133-
for avx2_word in &avx2_words {
134-
avx2_word.search_in(content.as_bytes());
135-
}
136-
})
137-
});
138-
139-
c.bench_function("DynamicAvx2Searcher::search_in with long haystack", |b| {
140-
b.iter(|| {
141-
for searcher in &dynamic_avx2_searchers {
142-
searcher.search_in(content.as_bytes());
143-
}
144-
})
145-
});
146-
147-
// Benchmarks against short haystacks
148-
//
149-
// Since words are ordered by length, pick a word as needle
150-
// and use bigger words as haystacks.
151-
152-
c.bench_function("String::find with short haystack", |b| {
153-
b.iter(|| {
154-
for (i, word) in words.iter().enumerate() {
155-
for content in &words[(i + 1)..] {
156-
content.find(word);
157-
}
158-
}
159-
})
160-
});
161-
162-
c.bench_function(
163-
"memmem::TwoWaySearcher::search_in with short haystack",
164-
|b| {
165-
b.iter(|| {
166-
for (i, word) in twoway_words.iter().enumerate() {
167-
for content in &words[(i + 1)..] {
168-
word.search_in(content.as_bytes());
169-
}
170-
}
171-
})
172-
},
173-
);
174-
175-
c.bench_function("twoway::find_bytes with short haystack", |b| {
176-
b.iter(|| {
177-
for (i, word) in words.iter().enumerate() {
178-
for content in &words[(i + 1)..] {
179-
twoway::find_bytes(content.as_bytes(), word.as_bytes());
180-
}
181-
}
182-
})
183-
});
184-
185-
c.bench_function("strstr_avx2_original with short haystack", |b| {
186-
b.iter(|| {
187-
for (i, word) in words.iter().enumerate() {
188-
for content in &words[(i + 1)..] {
189-
unsafe {
190-
strstr_avx2_original(content.as_bytes(), word.as_bytes());
191-
}
192-
}
193-
}
194-
})
195-
});
196-
197-
c.bench_function("strstr_avx2_rust_simple with short haystack", |b| {
198-
b.iter(|| {
199-
for (i, word) in words.iter().enumerate() {
200-
for content in &words[(i + 1)..] {
201-
unsafe {
202-
strstr_avx2_rust_simple(content.as_bytes(), word.as_bytes());
203-
}
204-
}
205-
}
206-
})
207-
});
208-
209-
c.bench_function("strstr_avx2_rust_simple_2 with short haystack", |b| {
210-
b.iter(|| {
211-
for (i, word) in words.iter().enumerate() {
212-
for content in &words[(i + 1)..] {
213-
unsafe {
214-
strstr_avx2_rust_simple_2(content.as_bytes(), word.as_bytes());
215-
}
216-
}
217-
}
218-
})
219-
});
220-
221-
c.bench_function("strstr_avx2_rust_fast with short haystack", |b| {
222-
b.iter(|| {
223-
for (i, word) in words.iter().enumerate() {
224-
for content in &words[(i + 1)..] {
225-
unsafe {
226-
strstr_avx2_rust_fast(content.as_bytes(), word.as_bytes());
227-
}
228-
}
229-
}
230-
})
231-
});
232-
233-
c.bench_function("strstr_avx2_rust_fast_2 with short haystack", |b| {
234-
b.iter(|| {
235-
for (i, word) in words.iter().enumerate() {
236-
for content in &words[(i + 1)..] {
237-
strstr_avx2_rust_fast_2(content.as_bytes(), word.as_bytes());
238-
}
239-
}
240-
})
241-
});
242-
243-
c.bench_function("strstr_avx2_rust_aligned with short haystack", |b| {
244-
b.iter(|| {
245-
for (i, word) in words.iter().enumerate() {
246-
for content in &words[(i + 1)..] {
247-
unsafe {
248-
strstr_avx2_rust_aligned(content.as_bytes(), word.as_bytes());
249-
}
250-
}
251-
}
252-
})
253-
});
254-
255-
c.bench_function("StrStrAVX2Searcher::search_in with short haystack", |b| {
256-
b.iter(|| {
257-
for (i, word) in avx2_words.iter().enumerate() {
258-
for content in &words[(i + 1)..] {
259-
word.search_in(content.as_bytes());
260-
}
261-
}
262-
})
263-
});
264113

265-
c.bench_function("DynamicAvx2Searcher::search_in with short haystack", |b| {
266-
b.iter(|| {
267-
for (i, searcher) in dynamic_avx2_searchers.iter().enumerate() {
268-
for content in &words[(i + 1)..] {
269-
searcher.search_in(content.as_bytes());
270-
}
271-
}
272-
})
273-
});
114+
group.finish();
115+
}
274116
}
275117

276118
criterion_group!(benches, criterion_benchmark);

data/haystack

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
I4BCeTWkpm1G1StV1WP8HKMX1IaRTslq88MOEg4nx7kWLaWYDRDYZrdTtsg26mz9S8DhYcd1ty35aPLyRZSeimOmPAroaqi861z44o35qP9GdrofuuHwdhSEeOg7E0L1KrTYgWZZrzyk7o0DVxw7vvmo6rVJ5T4lLMkesL1U6ObckS9oorPI5ZDm4Nk1ryh3AC8juZwJLcyWKveenfaVGaB42iYrOvrZGIMQG0OmA0k73ydw2mJawUzWE8O61MQpDm19JFaTgSJMkPs3yjY149knHg03uOBvb6d0hCmUbiHkRgFwH17cL7UJUlkggx55Gk1n07v7fGdkjvxmOmCHCvLrY9pCvewCAy1Lg1NTdE4N7AJfjPj1hiw9DqiDaJwTVc4JA229EubFHnMBlHkrSSnR1wh7RYOngmQUxjGOdctDkjVard24dO2mGkGCXAHrOjRGq9J54ml3HtCx3Ui3YN68Q8eywJnkS7CdiA2d2SmRako5VlGfRCDdoiNpZwq9FodDRcw4XsWLuH9XEAfXSGzx5X8PVUwnmTImOcc1az1x8dUWVdK5St7M5dVPsWZ5hrll7F0MjKdf2wBFYEp0gqZTswXUn2RwkDLCj4oikpBCE1fLSaiKL89rbo1RZ2phIVrXb7eQ8xJTBU1iSwWF97OJlIBcNjmE4TzrqgIl2m8ybVAURk3iC51cwO8wD5V1dcVzIuqAWJUwKh4PPyHhGdv4T439nRh3ukwAlHBtxj56izkIKWNm01Mfrc1boFApld1E597BDSrWEcXwn44KogPpJ058LGNB8IaAufcWEXfqbPwgNQUSqqarItOAFir3tr8nBGQar22nZifJzUiU35TzXUc46LYAxSEeDmbqMh3Zcmuw5YrFF5Ma0oaAuiiJb2IdpkPr5E6HuN5vDvvnjxDw0GbOX72xuSRFDxbOmt40hDLQLy3UrlaP92jS7DfBMUGefSo7mTfCpz5hvllPhbjYqJ31P9d8zJYPpQMW

data/needle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
c7ll0y40P6Af6wq0GQ9ZZnCSTDOJMjY7DwJEnjR3LncoVS6Oz5oZoAX2ujWOwNzb5dbQ66jAPLxKcWK9skcfxdjVz9KZQwNYedrb32gSu5Uz42uiIy8JwcsQrzINauFahMgTxcDUfd6qiaNZrvsWxqWdl20lfd8HjFcXkaK60716cvM6Jry2wPvT9HdBVS3zWycALrb3gcEKhycPPVikCSc7TaOSfu99sga4qTxaI9syh5EYNKkCp7BLGTyPZZ8I1Z0V7x4PPhysOcXZu5FYsQXIInHg7muLLDdvpfSrIDAfp8cuchjXoAynbxLvjaAy5JjbLlCPrKBXsv52Clxf8BkHYhZuLfTB1rc41EseOexhedDjsRKfT6f9GlkbC3JKVRKtzxKua9vrxwYWmcGTKvUNAAHZeH1y9KDRY6Kyu8IVOdhQoPsVz4KUL055PJzN7wubglYt615GRnPnlhfwvi9pq54hUrMwWAwabvER6DmFgyh6mnx2KweqS9cH31gymB2XGocbXN4OCpa10kq6MMf7Ibvrm7y2rsjFBcdOeUZoOM5dqgrpvSKeQoOSnMpSrXeEiVTXrmXuLBZqHZMbbh7KPgKWlbg4MZwWVlDukNQKIsJdGy21m0q5DbDy0XSB1z1RbWu4B26g7fZJWK6TJr1ZUsUsQiRvKUrWbKz41A475MDxbaaWiAPJlqOFQl5UyoxuS1K51nMPlLG1DcRSeiQjLFsmDBNt6dcaIf8McnbDUhiA9MsHcNHlcroPCnrn26RatirFLkVhfMBFTORZ1EZz1IEMIRDrMotb9MzyqNpKkK2nItDA43MjaOddKhrms7ZxPUetfyKpcm9OXsbzmnvFqM9KFwtTiySICxAP6qNd24DM4wbGOU8WuVLZZyUnun3zqigNysKBwYwXdYKsaQAEZI6clSSmr4YcRzbgmf8qAIoYKz9crjXz47lMfPEpHIEEKN1ftW9Nml5Xg6Pivjw0mlUMRGTPLFKXedqC

0 commit comments

Comments
 (0)