Skip to content

Commit da5da2b

Browse files
committed
Support 32-bit x86 with SSE4.2 as well
1 parent f607f4a commit da5da2b

File tree

3 files changed

+41
-30
lines changed

3 files changed

+41
-30
lines changed

build.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,28 @@ use std::io::prelude::*;
44
use std::path::{Path, PathBuf};
55

66
fn main() {
7+
cfg();
78
macros();
89
simd_macros();
910
println!("cargo:rerun-if-changed=build.rs");
1011
}
1112

13+
fn cfg() {
14+
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
15+
let target_feature = env::var("CARGO_CFG_TARGET_FEATURE").unwrap_or_default();
16+
17+
let ok_arch = matches!(&*target_arch, "x86" | "x86_64");
18+
let sse4_2_guaranteed = target_feature.split(',').any(|f| f == "sse4.2");
19+
20+
if sse4_2_guaranteed {
21+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="yes""#);
22+
} else if ok_arch {
23+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="maybe""#);
24+
} else {
25+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="no""#);
26+
}
27+
}
28+
1229
fn macros() {
1330
let mut base: PathBuf = env::var_os("OUT_DIR").unwrap().into();
1431
base.push("src");

src/lib.rs

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
//!
8686
//! ## What's so special about this library?
8787
//!
88-
//! We use a particular set of x86-64 SSE 4.2 instructions (`PCMPESTRI`
88+
//! We use a particular set of SSE 4.2 instructions (`PCMPESTRI`
8989
//! and `PCMPESTRM`) to gain great speedups. This method stays fast even
9090
//! when searching for a byte in a set of up to 16 choices.
9191
//!
@@ -155,10 +155,10 @@ use std::marker::PhantomData;
155155

156156
include!(concat!(env!("OUT_DIR"), "/src/macros.rs"));
157157

158-
#[cfg(target_arch = "x86_64")]
158+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
159159
mod simd;
160160

161-
#[cfg(not(target_feature = "sse4.2"))]
161+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
162162
mod fallback;
163163

164164
#[cfg(feature = "pattern")]
@@ -168,21 +168,21 @@ macro_rules! dispatch {
168168
(simd: $simd:expr,fallback: $fallback:expr,) => {
169169
// If we can tell at compile time that we have support,
170170
// call the optimized code directly.
171-
#[cfg(target_feature = "sse4.2")]
171+
#[cfg(jetscii_sse4_2 = "yes")]
172172
{
173173
$simd
174174
}
175175

176176
// If we can tell at compile time that we will *never* have
177177
// support, call the fallback directly.
178-
#[cfg(not(target_arch = "x86_64"))]
178+
#[cfg(jetscii_sse4_2 = "no")]
179179
{
180180
$fallback
181181
}
182182

183183
// Otherwise, we will be run on a machine with or without
184184
// support, so we perform runtime detection.
185-
#[cfg(all(target_arch = "x86_64", not(target_feature = "sse4.2")))]
185+
#[cfg(jetscii_sse4_2 = "maybe")]
186186
{
187187
if is_x86_feature_detected!("sse4.2") {
188188
$simd
@@ -198,14 +198,10 @@ pub struct Bytes<F>
198198
where
199199
F: Fn(u8) -> bool,
200200
{
201-
// Include this implementation only when compiling for x86_64 as
202-
// that's the only platform that we support.
203-
#[cfg(target_arch = "x86_64")]
201+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
204202
simd: simd::Bytes,
205203

206-
// If we are *guaranteed* to have SSE 4.2, then there's no reason
207-
// to have this implementation.
208-
#[cfg(not(target_feature = "sse4.2"))]
204+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
209205
fallback: fallback::Bytes<F>,
210206

211207
// Since we might not use the fallback implementation, we add this
@@ -226,10 +222,10 @@ where
226222
#[allow(unused_variables)]
227223
pub /* const */ fn new(bytes: [u8; 16], len: i32, fallback: F) -> Self {
228224
Bytes {
229-
#[cfg(target_arch = "x86_64")]
225+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
230226
simd: simd::Bytes::new(bytes, len),
231227

232-
#[cfg(not(target_feature = "sse4.2"))]
228+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
233229
fallback: fallback::Bytes::new(fallback),
234230

235231
_fallback: PhantomData,
@@ -288,24 +284,20 @@ pub type AsciiCharsConst = AsciiChars<fn(u8) -> bool>;
288284

289285
/// Searches a slice for the first occurence of the subslice.
290286
pub struct ByteSubstring<'a> {
291-
// Include this implementation only when compiling for x86_64 as
292-
// that's the only platform that we support.
293-
#[cfg(target_arch = "x86_64")]
287+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
294288
simd: simd::ByteSubstring<'a>,
295289

296-
// If we are *guaranteed* to have SSE 4.2, then there's no reason
297-
// to have this implementation.
298-
#[cfg(not(target_feature = "sse4.2"))]
290+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
299291
fallback: fallback::ByteSubstring<'a>,
300292
}
301293

302294
impl<'a> ByteSubstring<'a> {
303295
pub /* const */ fn new(needle: &'a [u8]) -> Self {
304296
ByteSubstring {
305-
#[cfg(target_arch = "x86_64")]
297+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
306298
simd: simd::ByteSubstring::new(needle),
307299

308-
#[cfg(not(target_feature = "sse4.2"))]
300+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
309301
fallback: fallback::ByteSubstring::new(needle),
310302
}
311303
}

src/simd.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
//
33
// Everything in this module assumes that the SSE 4.2 feature is available.
44

5-
use std::{
6-
arch::x86_64::{
7-
__m128i, _mm_cmpestri, _mm_cmpestrm, _mm_extract_epi16, _mm_loadu_si128, _SIDD_CMP_EQUAL_ORDERED,
8-
},
9-
cmp::min,
10-
slice,
5+
use std::{cmp::min, slice};
6+
7+
#[cfg(target_arch = "x86")]
8+
use std::arch::x86 as target_arch;
9+
#[cfg(target_arch = "x86_64")]
10+
use std::arch::x86_64 as target_arch;
11+
12+
use self::target_arch::{
13+
__m128i, _mm_cmpestri, _mm_cmpestrm, _mm_extract_epi16, _mm_loadu_si128,
14+
_SIDD_CMP_EQUAL_ORDERED,
1115
};
1216

1317
include!(concat!(env!("OUT_DIR"), "/src/simd_macros.rs"));
@@ -298,8 +302,6 @@ impl<'a, 'b> PackedCompareControl for &'b ByteSubstring<'a> {
298302
}
299303
}
300304

301-
// TODO: Does x86 actually support this instruction?
302-
303305
#[cfg(test)]
304306
mod test {
305307
use proptest::prelude::*;

0 commit comments

Comments
 (0)