Skip to content

Commit cd390de

Browse files
authored
Merge pull request #52 from shepmaster/32bit-sse42
2 parents f607f4a + 3687959 commit cd390de

File tree

5 files changed

+141
-52
lines changed

5 files changed

+141
-52
lines changed

.github/actions/test/action.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Run tests
2+
3+
inputs:
4+
toolchain:
5+
default: "stable"
6+
required: false
7+
type: string
8+
9+
features:
10+
default: ""
11+
required: false
12+
type: string
13+
14+
rustflags:
15+
default: ""
16+
required: false
17+
type: string
18+
19+
target:
20+
default: ""
21+
required: false
22+
type: string
23+
24+
runs:
25+
using: "composite"
26+
steps:
27+
- name: Install Rust
28+
uses: actions-rs/toolchain@v1
29+
with:
30+
toolchain: ${{ inputs.toolchain }}
31+
profile: minimal
32+
default: true
33+
34+
- name: Configure cross
35+
shell: bash
36+
run: |
37+
cat > Cross.toml <<EOF
38+
[build.env]
39+
passthrough = ["RUSTFLAGS", "RUSTDOCFLAGS"]
40+
EOF
41+
42+
- name: Run tests
43+
uses: actions-rs/cargo@v1
44+
env:
45+
RUSTFLAGS: "${{ env.RUSTFLAGS }} ${{ inputs.rustflags }}"
46+
with:
47+
command: test
48+
args: >-
49+
--all
50+
${{ inputs.features != '' && format('--features {0}', inputs.features) || '' }}
51+
${{ inputs.target != '' && format('--target {0}', inputs.target) || '' }}
52+
use-cross: ${{ inputs.target != '' }}

.github/workflows/ci.yml

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,39 +6,65 @@ env:
66
RUSTDOCFLAGS: -D warnings
77

88
jobs:
9-
primary:
9+
toolchains:
1010
strategy:
1111
matrix:
12-
os: [ubuntu-latest]
13-
toolchain: [stable, beta, nightly]
14-
features: ['']
12+
toolchain: ["stable", "beta", "nightly"]
1513

16-
include:
17-
- os: ubuntu-latest
18-
toolchain: nightly
19-
features: pattern
14+
runs-on: ubuntu-latest
2015

21-
- os: windows-latest
22-
toolchain: stable
16+
steps:
17+
- name: Check out code
18+
uses: actions/checkout@v3
19+
20+
- name: Run tests
21+
uses: ./.github/actions/test
22+
with:
23+
toolchain: "${{ matrix.toolchain }}"
24+
25+
platforms:
26+
strategy:
27+
matrix:
28+
platform: ["windows-latest", "macos-latest"]
29+
30+
runs-on: ${{ matrix.platform }}
31+
32+
steps:
33+
- name: Check out code
34+
uses: actions/checkout@v3
2335

24-
- os: macos-latest
25-
toolchain: stable
36+
- name: Run tests
37+
uses: ./.github/actions/test
38+
with:
39+
platform: "${{ matrix.platform }}"
2640

27-
runs-on: ${{ matrix.os }}
41+
crate_features:
42+
runs-on: ubuntu-latest
2843

2944
steps:
3045
- name: Check out code
31-
uses: actions/checkout@v2
46+
uses: actions/checkout@v3
3247

33-
- name: Install Rust
34-
uses: actions-rs/toolchain@v1
48+
- name: Run tests
49+
uses: ./.github/actions/test
3550
with:
36-
toolchain: ${{ matrix.toolchain }}
37-
profile: minimal
38-
default: true
51+
toolchain: "nightly"
52+
features: "pattern"
53+
54+
target_features:
55+
strategy:
56+
matrix:
57+
target_feature: ["-sse4.2", "+sse4.2"]
58+
target: ["", "i686-unknown-linux-gnu"]
59+
60+
runs-on: ubuntu-latest
61+
62+
steps:
63+
- name: Check out code
64+
uses: actions/checkout@v3
3965

4066
- name: Run tests
41-
uses: actions-rs/cargo@v1
67+
uses: ./.github/actions/test
4268
with:
43-
command: test
44-
args: --all --features=${{ matrix.features }}
69+
rustflags: "-C target-feature=${{ matrix.target_feature }}"
70+
target: "${{ matrix.target }}"

build.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,28 @@ use std::io::prelude::*;
44
use std::path::{Path, PathBuf};
55

66
fn main() {
7+
cfg();
78
macros();
89
simd_macros();
910
println!("cargo:rerun-if-changed=build.rs");
1011
}
1112

13+
fn cfg() {
14+
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
15+
let target_feature = env::var("CARGO_CFG_TARGET_FEATURE").unwrap_or_default();
16+
17+
let ok_arch = matches!(&*target_arch, "x86" | "x86_64");
18+
let sse4_2_guaranteed = target_feature.split(',').any(|f| f == "sse4.2");
19+
20+
if sse4_2_guaranteed {
21+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="yes""#);
22+
} else if ok_arch {
23+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="maybe""#);
24+
} else {
25+
println!(r#"cargo:rustc-cfg=jetscii_sse4_2="no""#);
26+
}
27+
}
28+
1229
fn macros() {
1330
let mut base: PathBuf = env::var_os("OUT_DIR").unwrap().into();
1431
base.push("src");

src/lib.rs

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
//!
8686
//! ## What's so special about this library?
8787
//!
88-
//! We use a particular set of x86-64 SSE 4.2 instructions (`PCMPESTRI`
88+
//! We use a particular set of SSE 4.2 instructions (`PCMPESTRI`
8989
//! and `PCMPESTRM`) to gain great speedups. This method stays fast even
9090
//! when searching for a byte in a set of up to 16 choices.
9191
//!
@@ -155,10 +155,10 @@ use std::marker::PhantomData;
155155

156156
include!(concat!(env!("OUT_DIR"), "/src/macros.rs"));
157157

158-
#[cfg(target_arch = "x86_64")]
158+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
159159
mod simd;
160160

161-
#[cfg(not(target_feature = "sse4.2"))]
161+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
162162
mod fallback;
163163

164164
#[cfg(feature = "pattern")]
@@ -168,21 +168,21 @@ macro_rules! dispatch {
168168
(simd: $simd:expr,fallback: $fallback:expr,) => {
169169
// If we can tell at compile time that we have support,
170170
// call the optimized code directly.
171-
#[cfg(target_feature = "sse4.2")]
171+
#[cfg(jetscii_sse4_2 = "yes")]
172172
{
173173
$simd
174174
}
175175

176176
// If we can tell at compile time that we will *never* have
177177
// support, call the fallback directly.
178-
#[cfg(not(target_arch = "x86_64"))]
178+
#[cfg(jetscii_sse4_2 = "no")]
179179
{
180180
$fallback
181181
}
182182

183183
// Otherwise, we will be run on a machine with or without
184184
// support, so we perform runtime detection.
185-
#[cfg(all(target_arch = "x86_64", not(target_feature = "sse4.2")))]
185+
#[cfg(jetscii_sse4_2 = "maybe")]
186186
{
187187
if is_x86_feature_detected!("sse4.2") {
188188
$simd
@@ -198,14 +198,10 @@ pub struct Bytes<F>
198198
where
199199
F: Fn(u8) -> bool,
200200
{
201-
// Include this implementation only when compiling for x86_64 as
202-
// that's the only platform that we support.
203-
#[cfg(target_arch = "x86_64")]
201+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
204202
simd: simd::Bytes,
205203

206-
// If we are *guaranteed* to have SSE 4.2, then there's no reason
207-
// to have this implementation.
208-
#[cfg(not(target_feature = "sse4.2"))]
204+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
209205
fallback: fallback::Bytes<F>,
210206

211207
// Since we might not use the fallback implementation, we add this
@@ -226,10 +222,10 @@ where
226222
#[allow(unused_variables)]
227223
pub /* const */ fn new(bytes: [u8; 16], len: i32, fallback: F) -> Self {
228224
Bytes {
229-
#[cfg(target_arch = "x86_64")]
225+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
230226
simd: simd::Bytes::new(bytes, len),
231227

232-
#[cfg(not(target_feature = "sse4.2"))]
228+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
233229
fallback: fallback::Bytes::new(fallback),
234230

235231
_fallback: PhantomData,
@@ -288,24 +284,20 @@ pub type AsciiCharsConst = AsciiChars<fn(u8) -> bool>;
288284

289285
/// Searches a slice for the first occurence of the subslice.
290286
pub struct ByteSubstring<'a> {
291-
// Include this implementation only when compiling for x86_64 as
292-
// that's the only platform that we support.
293-
#[cfg(target_arch = "x86_64")]
287+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
294288
simd: simd::ByteSubstring<'a>,
295289

296-
// If we are *guaranteed* to have SSE 4.2, then there's no reason
297-
// to have this implementation.
298-
#[cfg(not(target_feature = "sse4.2"))]
290+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
299291
fallback: fallback::ByteSubstring<'a>,
300292
}
301293

302294
impl<'a> ByteSubstring<'a> {
303295
pub /* const */ fn new(needle: &'a [u8]) -> Self {
304296
ByteSubstring {
305-
#[cfg(target_arch = "x86_64")]
297+
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
306298
simd: simd::ByteSubstring::new(needle),
307299

308-
#[cfg(not(target_feature = "sse4.2"))]
300+
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
309301
fallback: fallback::ByteSubstring::new(needle),
310302
}
311303
}

src/simd.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
//
33
// Everything in this module assumes that the SSE 4.2 feature is available.
44

5-
use std::{
6-
arch::x86_64::{
7-
__m128i, _mm_cmpestri, _mm_cmpestrm, _mm_extract_epi16, _mm_loadu_si128, _SIDD_CMP_EQUAL_ORDERED,
8-
},
9-
cmp::min,
10-
slice,
5+
use std::{cmp::min, slice};
6+
7+
#[cfg(target_arch = "x86")]
8+
use std::arch::x86 as target_arch;
9+
#[cfg(target_arch = "x86_64")]
10+
use std::arch::x86_64 as target_arch;
11+
12+
use self::target_arch::{
13+
__m128i, _mm_cmpestri, _mm_cmpestrm, _mm_extract_epi16, _mm_loadu_si128,
14+
_SIDD_CMP_EQUAL_ORDERED,
1115
};
1216

1317
include!(concat!(env!("OUT_DIR"), "/src/simd_macros.rs"));
@@ -298,8 +302,6 @@ impl<'a, 'b> PackedCompareControl for &'b ByteSubstring<'a> {
298302
}
299303
}
300304

301-
// TODO: Does x86 actually support this instruction?
302-
303305
#[cfg(test)]
304306
mod test {
305307
use proptest::prelude::*;

0 commit comments

Comments
 (0)