Skip to content

Commit 0583614

Browse files
zakcutnermarmeladema
authored andcommitted
Use target_feature annotation to mark functions requiring AVX2
1 parent a66b596 commit 0583614

File tree

4 files changed

+51
-23
lines changed

4 files changed

+51
-23
lines changed

benches/i386.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ fn search_short_haystack(c: &mut Criterion) {
143143
group.bench_function("DynamicAvx2Searcher::search_in", |b| {
144144
let searchers = needles
145145
.iter()
146-
.map(|&needle| {
146+
.map(|&needle| unsafe {
147147
DynamicAvx2Searcher::new(needle.as_bytes().to_owned().into_boxed_slice())
148148
})
149149
.collect::<Vec<_>>();
@@ -274,7 +274,9 @@ fn search_long_haystack(c: &mut Criterion) {
274274
group.bench_function("DynamicAvx2Searcher::search_in", |b| {
275275
let searchers = needles
276276
.iter()
277-
.map(|needle| DynamicAvx2Searcher::new(needle.as_bytes().to_owned().into_boxed_slice()))
277+
.map(|needle| unsafe {
278+
DynamicAvx2Searcher::new(needle.as_bytes().to_owned().into_boxed_slice())
279+
})
278280
.collect::<Vec<_>>();
279281

280282
b.iter(|| {

benches/random.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ fn search(c: &mut Criterion) {
105105
BenchmarkId::new("DynamicAvx2Searcher::search_in", parameter),
106106
&size,
107107
|b, _| {
108-
let searcher = DynamicAvx2Searcher::new(needle.to_owned().into_boxed_slice());
108+
let searcher =
109+
unsafe { DynamicAvx2Searcher::new(needle.to_owned().into_boxed_slice()) };
109110
b.iter(|| black_box(searcher.search_in(haystack)));
110111
},
111112
);

src/avx2/mod.rs

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![allow(clippy::missing_safety_doc)]
2+
13
mod original;
24
mod rust;
35

@@ -45,53 +47,63 @@ trait Vector: Copy {
4547

4648
impl Vector for __m128i {
4749
#[inline]
50+
#[target_feature(enable = "avx2")]
4851
unsafe fn set1_epi8(a: i8) -> Self {
4952
_mm_set1_epi8(a)
5053
}
5154

5255
#[inline]
56+
#[target_feature(enable = "avx2")]
5357
unsafe fn loadu_si(a: *const Self) -> Self {
5458
_mm_loadu_si128(a)
5559
}
5660

5761
#[inline]
62+
#[target_feature(enable = "avx2")]
5863
unsafe fn cmpeq_epi8(a: Self, b: Self) -> Self {
5964
_mm_cmpeq_epi8(a, b)
6065
}
6166

6267
#[inline]
68+
#[target_feature(enable = "avx2")]
6369
unsafe fn and_si(a: Self, b: Self) -> Self {
6470
_mm_and_si128(a, b)
6571
}
6672

6773
#[inline]
74+
#[target_feature(enable = "avx2")]
6875
unsafe fn movemask_epi8(a: Self) -> i32 {
6976
_mm_movemask_epi8(a)
7077
}
7178
}
7279

7380
impl Vector for __m256i {
7481
#[inline]
82+
#[target_feature(enable = "avx2")]
7583
unsafe fn set1_epi8(a: i8) -> Self {
7684
_mm256_set1_epi8(a)
7785
}
7886

7987
#[inline]
88+
#[target_feature(enable = "avx2")]
8089
unsafe fn loadu_si(a: *const Self) -> Self {
8190
_mm256_loadu_si256(a)
8291
}
8392

8493
#[inline]
94+
#[target_feature(enable = "avx2")]
8595
unsafe fn cmpeq_epi8(a: Self, b: Self) -> Self {
8696
_mm256_cmpeq_epi8(a, b)
8797
}
8898

8999
#[inline]
100+
#[target_feature(enable = "avx2")]
90101
unsafe fn and_si(a: Self, b: Self) -> Self {
91102
_mm256_and_si256(a, b)
92103
}
93104

94105
#[inline]
106+
#[target_feature(enable = "avx2")]
95107
unsafe fn movemask_epi8(a: Self) -> i32 {
96108
_mm256_movemask_epi8(a)
97109
}
@@ -103,10 +115,11 @@ struct VectorHash<V: Vector> {
103115
}
104116

105117
impl<V: Vector> VectorHash<V> {
106-
fn new(first: u8, last: u8) -> Self {
118+
#[target_feature(enable = "avx2")]
119+
unsafe fn new(first: u8, last: u8) -> Self {
107120
Self {
108-
first: unsafe { Vector::set1_epi8(first as i8) },
109-
last: unsafe { Vector::set1_epi8(last as i8) },
121+
first: Vector::set1_epi8(first as i8),
122+
last: Vector::set1_epi8(last as i8),
110123
}
111124
}
112125
}
@@ -122,12 +135,14 @@ macro_rules! avx2_searcher {
122135
}
123136

124137
impl $name {
125-
pub fn new(needle: Box<[u8]>) -> Self {
138+
#[target_feature(enable = "avx2")]
139+
pub unsafe fn new(needle: Box<[u8]>) -> Self {
126140
let position = needle.len() - 1;
127141
Self::with_position(needle, position)
128142
}
129143

130-
pub fn with_position(needle: Box<[u8]>, position: usize) -> Self {
144+
#[target_feature(enable = "avx2")]
145+
pub unsafe fn with_position(needle: Box<[u8]>, position: usize) -> Self {
131146
assert!(!needle.is_empty());
132147
assert!(position < needle.len());
133148

@@ -176,6 +191,7 @@ macro_rules! avx2_searcher {
176191
}
177192

178193
#[inline]
194+
#[target_feature(enable = "avx2")]
179195
unsafe fn vector_search_in_chunk<V: Vector>(
180196
&self,
181197
haystack: &[u8],
@@ -206,11 +222,12 @@ macro_rules! avx2_searcher {
206222
}
207223

208224
#[inline]
209-
fn vector_search_in<V: Vector>(
225+
#[target_feature(enable = "avx2")]
226+
unsafe fn vector_search_in<V: Vector>(
210227
&self,
211228
haystack: &[u8],
212229
hash: &VectorHash<V>,
213-
next: fn(&Self, &[u8]) -> bool,
230+
next: unsafe fn(&Self, &[u8]) -> bool,
214231
) -> bool {
215232
debug_assert!(haystack.len() >= self.size());
216233

@@ -223,17 +240,17 @@ macro_rules! avx2_searcher {
223240

224241
let mut chunks = haystack[..end].chunks_exact(lanes);
225242
while let Some(chunk) = chunks.next() {
226-
if unsafe { self.vector_search_in_chunk(haystack, hash, chunk.as_ptr(), -1) } {
243+
if self.vector_search_in_chunk(haystack, hash, chunk.as_ptr(), -1) {
227244
return true;
228245
}
229246
}
230247

231248
let remainder = chunks.remainder().len();
232249
if remainder > 0 {
233-
let start = unsafe { haystack.as_ptr().add(end - lanes) };
250+
let start = haystack.as_ptr().add(end - lanes);
234251
let mask = -1 << (lanes - remainder);
235252

236-
if unsafe { self.vector_search_in_chunk(haystack, hash, start, mask) } {
253+
if self.vector_search_in_chunk(haystack, hash, start, mask) {
237254
return true;
238255
}
239256
}
@@ -242,26 +259,30 @@ macro_rules! avx2_searcher {
242259
}
243260

244261
#[inline]
245-
fn sse2_search_in(&self, haystack: &[u8]) -> bool {
262+
#[target_feature(enable = "avx2")]
263+
unsafe fn sse2_search_in(&self, haystack: &[u8]) -> bool {
246264
self.vector_search_in(haystack, &self.sse2_hash, Self::scalar_search_in)
247265
}
248266

249267
#[inline]
250-
fn avx2_search_in(&self, haystack: &[u8]) -> bool {
268+
#[target_feature(enable = "avx2")]
269+
unsafe fn avx2_search_in(&self, haystack: &[u8]) -> bool {
251270
self.vector_search_in(haystack, &self.avx2_hash, Self::sse2_search_in)
252271
}
253272

254273
#[inline]
255-
pub fn inlined_search_in(&self, haystack: &[u8]) -> bool {
274+
#[target_feature(enable = "avx2")]
275+
pub unsafe fn inlined_search_in(&self, haystack: &[u8]) -> bool {
256276
if haystack.len() < self.size() {
257277
return false;
258278
}
259279

260280
self.avx2_search_in(haystack)
261281
}
262282

283+
#[inline]
263284
pub fn search_in(&self, haystack: &[u8]) -> bool {
264-
self.inlined_search_in(haystack)
285+
unsafe { self.inlined_search_in(haystack) }
265286
}
266287
}
267288
};
@@ -300,12 +321,14 @@ pub enum DynamicAvx2Searcher {
300321
}
301322

302323
impl DynamicAvx2Searcher {
303-
pub fn new(needle: Box<[u8]>) -> Self {
324+
#[target_feature(enable = "avx2")]
325+
pub unsafe fn new(needle: Box<[u8]>) -> Self {
304326
let position = needle.len() - 1;
305327
Self::with_position(needle, position)
306328
}
307329

308-
pub fn with_position(needle: Box<[u8]>, position: usize) -> Self {
330+
#[target_feature(enable = "avx2")]
331+
pub unsafe fn with_position(needle: Box<[u8]>, position: usize) -> Self {
309332
assert!(!needle.is_empty());
310333
assert!(position < needle.len());
311334

@@ -329,7 +352,8 @@ impl DynamicAvx2Searcher {
329352
}
330353

331354
#[inline]
332-
pub fn inlined_search_in(&self, haystack: &[u8]) -> bool {
355+
#[target_feature(enable = "avx2")]
356+
pub unsafe fn inlined_search_in(&self, haystack: &[u8]) -> bool {
333357
match self {
334358
Self::N0 => true,
335359
Self::N1(searcher) => searcher.inlined_search_in(haystack),
@@ -349,8 +373,9 @@ impl DynamicAvx2Searcher {
349373
}
350374
}
351375

376+
#[inline]
352377
pub fn search_in(&self, haystack: &[u8]) -> bool {
353-
self.inlined_search_in(haystack)
378+
unsafe { self.inlined_search_in(haystack) }
354379
}
355380
}
356381

@@ -359,7 +384,7 @@ mod tests {
359384
use super::Avx2Searcher;
360385

361386
fn search(haystack: &[u8], needle: &[u8]) -> bool {
362-
let search = |position| {
387+
let search = |position| unsafe {
363388
Avx2Searcher::with_position(needle.to_owned().into_boxed_slice(), position)
364389
.search_in(haystack)
365390
};

tests/i386.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fn search(haystack: &str, needle: &str) {
3737
let searcher = StrStrAVX2Searcher::new(needle);
3838
assert_eq!(searcher.search_in(haystack), result);
3939

40-
let searcher = DynamicAvx2Searcher::new(needle.to_owned().into_boxed_slice());
40+
let searcher = unsafe { DynamicAvx2Searcher::new(needle.to_owned().into_boxed_slice()) };
4141
assert_eq!(searcher.search_in(haystack), result);
4242
}
4343

0 commit comments

Comments
 (0)