Skip to content

Commit 1627fc4

Browse files
committed
manual struct target feature
1 parent af07d45 commit 1627fc4

File tree

3 files changed

+66
-7
lines changed

3 files changed

+66
-7
lines changed

zlib-rs/src/cpu_features.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
#![allow(dead_code)]
22
#![allow(unreachable_code)]
33

4+
pub struct CpuFeatures;
5+
6+
impl CpuFeatures {
7+
pub const NONE: usize = 0;
8+
pub const AVX2: usize = 1;
9+
}
10+
411
#[inline(always)]
512
pub fn is_enabled_sse() -> bool {
613
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]

zlib-rs/src/inflate.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ mod writer;
1414

1515
use crate::allocate::Allocator;
1616
use crate::c_api::internal_state;
17+
use crate::cpu_features::CpuFeatures;
1718
use crate::{
1819
adler32::adler32,
1920
c_api::{gz_header, z_checksum, z_size, z_stream, Z_DEFLATED},
@@ -1869,15 +1870,15 @@ fn inflate_fast_help(state: &mut State, start: usize) {
18691870
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
18701871
#[target_feature(enable = "avx2")]
18711872
unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
1872-
inflate_fast_help_impl(state, start);
1873+
inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
18731874
}
18741875

18751876
fn inflate_fast_help_vanilla(state: &mut State, start: usize) {
1876-
inflate_fast_help_impl(state, start);
1877+
inflate_fast_help_impl::<{ CpuFeatures::NONE }>(state, start);
18771878
}
18781879

18791880
#[inline(always)]
1880-
fn inflate_fast_help_impl(state: &mut State, _start: usize) {
1881+
fn inflate_fast_help_impl<const FEATURES: usize>(state: &mut State, _start: usize) {
18811882
let mut bit_reader = BitReader::new(&[]);
18821883
core::mem::swap(&mut bit_reader, &mut state.bit_reader);
18831884

@@ -2009,23 +2010,32 @@ fn inflate_fast_help_impl(state: &mut State, _start: usize) {
20092010
// window, and part of it has wrapped around to the start. Copy
20102011
// the end section here, the start section will be copied below.
20112012
len -= op as u16;
2012-
writer.extend_from_window(&state.window, from..from + op);
2013+
writer.extend_from_window_with_features::<FEATURES>(
2014+
&state.window,
2015+
from..from + op,
2016+
);
20132017
from = 0;
20142018
op = window_next;
20152019
}
20162020
}
20172021

20182022
let copy = Ord::min(op, len as usize);
2019-
writer.extend_from_window(&state.window, from..from + copy);
2023+
writer.extend_from_window_with_features::<FEATURES>(
2024+
&state.window,
2025+
from..from + copy,
2026+
);
20202027

20212028
if op < len as usize {
20222029
// here we need some bytes from the output itself
2023-
writer.copy_match(dist as usize, len as usize - op);
2030+
writer.copy_match_with_features::<FEATURES>(
2031+
dist as usize,
2032+
len as usize - op,
2033+
);
20242034
}
20252035
} else if extra_safe {
20262036
todo!()
20272037
} else {
2028-
writer.copy_match(dist as usize, len as usize)
2038+
writer.copy_match_with_features::<FEATURES>(dist as usize, len as usize)
20292039
}
20302040
} else if (op & 64) == 0 {
20312041
// 2nd level distance code

zlib-rs/src/inflate/writer.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use core::fmt;
22
use core::mem::MaybeUninit;
33
use core::ops::Range;
44

5+
use crate::cpu_features::CpuFeatures;
56
use crate::weak_slice::WeakSliceMut;
67

78
pub struct Writer<'a> {
@@ -77,6 +78,28 @@ impl<'a> Writer<'a> {
7778

7879
#[inline(always)]
7980
pub fn extend_from_window(&mut self, window: &super::window::Window, range: Range<usize>) {
81+
self.extend_from_window_with_features::<{ CpuFeatures::NONE }>(window, range)
82+
}
83+
84+
pub fn extend_from_window_with_features<const FEATURES: usize>(
85+
&mut self,
86+
window: &super::window::Window,
87+
range: Range<usize>,
88+
) {
89+
match FEATURES {
90+
#[cfg(target_arch = "x86_64")]
91+
CpuFeatures::AVX2 => {
92+
self.extend_from_window_help::<core::arch::x86_64::__m256i>(window, range)
93+
}
94+
_ => self.extend_from_window_runtime_dispatch(window, range),
95+
}
96+
}
97+
98+
fn extend_from_window_runtime_dispatch(
99+
&mut self,
100+
window: &super::window::Window,
101+
range: Range<usize>,
102+
) {
80103
// NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
81104
//
82105
// #[cfg(target_arch = "x86_64")]
@@ -140,6 +163,25 @@ impl<'a> Writer<'a> {
140163

141164
#[inline(always)]
142165
pub fn copy_match(&mut self, offset_from_end: usize, length: usize) {
166+
self.copy_match_with_features::<{ CpuFeatures::NONE }>(offset_from_end, length)
167+
}
168+
169+
#[inline(always)]
170+
pub fn copy_match_with_features<const FEATURES: usize>(
171+
&mut self,
172+
offset_from_end: usize,
173+
length: usize,
174+
) {
175+
match FEATURES {
176+
#[cfg(target_arch = "x86_64")]
177+
CpuFeatures::AVX2 => {
178+
self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length)
179+
}
180+
_ => self.copy_match_runtime_dispatch(offset_from_end, length),
181+
}
182+
}
183+
184+
fn copy_match_runtime_dispatch(&mut self, offset_from_end: usize, length: usize) {
143185
// NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
144186
//
145187
// #[cfg(target_arch = "x86_64")]

0 commit comments

Comments
 (0)