Skip to content

Commit faf722a

Browse files
committed
Take &VecType in more codegen methods
1 parent 6290543 commit faf722a

File tree

3 files changed

+39
-54
lines changed

3 files changed

+39
-54
lines changed

fearless_simd_gen/src/arch/x86.rs

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,15 @@ impl X86 {
7171
}
7272
"neg" => match ty.scalar {
7373
ScalarType::Float => {
74-
let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits());
75-
let xor =
76-
simple_intrinsic("xor", ScalarType::Float, ty.scalar_bits, ty.n_bits());
74+
let set1 = set1_intrinsic(ty);
75+
let xor = simple_intrinsic("xor", ty);
7776
quote! {
7877
#( #xor(#args, #set1(-0.0)) )*
7978
}
8079
}
8180
ScalarType::Int => {
8281
let set0 = intrinsic_ident("setzero", coarse_type(*ty), ty.n_bits());
83-
let sub = simple_intrinsic("sub", ty.scalar, ty.scalar_bits, ty.n_bits());
82+
let sub = simple_intrinsic("sub", ty);
8483
let arg = &args[0];
8584
quote! {
8685
#sub(#set0(), #arg)
@@ -89,22 +88,19 @@ impl X86 {
8988
_ => unreachable!(),
9089
},
9190
"abs" => {
92-
let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits());
93-
let andnot =
94-
simple_intrinsic("andnot", ScalarType::Float, ty.scalar_bits, ty.n_bits());
91+
let set1 = set1_intrinsic(ty);
92+
let andnot = simple_intrinsic("andnot", ty);
9593
quote! {
9694
#( #andnot(#set1(-0.0), #args) )*
9795
}
9896
}
9997
"copysign" => {
10098
let a = &args[0];
10199
let b = &args[1];
102-
let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits());
103-
let and =
104-
simple_intrinsic("and", ScalarType::Float, ty.scalar_bits, ty.n_bits());
105-
let andnot =
106-
simple_intrinsic("andnot", ScalarType::Float, ty.scalar_bits, ty.n_bits());
107-
let or = simple_intrinsic("or", ScalarType::Float, ty.scalar_bits, ty.n_bits());
100+
let set1 = set1_intrinsic(ty);
101+
let and = simple_intrinsic("and", ty);
102+
let andnot = simple_intrinsic("andnot", ty);
103+
let or = simple_intrinsic("or", ty);
108104
quote! {
109105
let mask = #set1(-0.0);
110106
#or(#and(mask, #b), #andnot(mask, #a))
@@ -167,31 +163,26 @@ pub(crate) fn coarse_type(vec_ty: VecType) -> &'static str {
167163
}
168164
}
169165

170-
pub(crate) fn set1_intrinsic(ty: ScalarType, bits: usize, ty_bits: usize) -> Ident {
166+
pub(crate) fn set1_intrinsic(vec_ty: &VecType) -> Ident {
171167
use ScalarType::*;
172-
let suffix = match (ty, bits) {
168+
let suffix = match (vec_ty.scalar, vec_ty.scalar_bits) {
173169
(Int | Unsigned | Mask, 64) => "epi64x",
174-
_ => op_suffix(ty, bits, false),
170+
(scalar, bits) => op_suffix(scalar, bits, false),
175171
};
176172

177-
intrinsic_ident("set1", suffix, ty_bits)
173+
intrinsic_ident("set1", suffix, vec_ty.n_bits())
178174
}
179175

180-
pub(crate) fn simple_intrinsic(name: &str, ty: ScalarType, bits: usize, ty_bits: usize) -> Ident {
181-
let suffix = op_suffix(ty, bits, true);
176+
pub(crate) fn simple_intrinsic(name: &str, vec_ty: &VecType) -> Ident {
177+
let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, true);
182178

183-
intrinsic_ident(name, suffix, ty_bits)
179+
intrinsic_ident(name, suffix, vec_ty.n_bits())
184180
}
185181

186-
pub(crate) fn simple_sign_unaware_intrinsic(
187-
name: &str,
188-
ty: ScalarType,
189-
bits: usize,
190-
ty_bits: usize,
191-
) -> Ident {
192-
let suffix = op_suffix(ty, bits, false);
182+
pub(crate) fn simple_sign_unaware_intrinsic(name: &str, vec_ty: &VecType) -> Ident {
183+
let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false);
193184

194-
intrinsic_ident(name, suffix, ty_bits)
185+
intrinsic_ident(name, suffix, vec_ty.n_bits())
195186
}
196187

197188
pub(crate) fn extend_intrinsic(

fearless_simd_gen/src/mk_avx2.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,17 +183,15 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream {
183183
OpSig::Shift => mk_sse4_2::handle_shift(method_sig, method, vec_ty),
184184
OpSig::Ternary => match method {
185185
"madd" => {
186-
let intrinsic =
187-
simple_intrinsic("fmadd", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
186+
let intrinsic = simple_intrinsic("fmadd", vec_ty);
188187
quote! {
189188
#method_sig {
190189
unsafe { #intrinsic(a.into(), b.into(), c.into()).simd_into(self) }
191190
}
192191
}
193192
}
194193
"msub" => {
195-
let intrinsic =
196-
simple_intrinsic("fmsub", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
194+
let intrinsic = simple_intrinsic("fmsub", vec_ty);
197195
quote! {
198196
#method_sig {
199197
unsafe { #intrinsic(a.into(), b.into(), c.into()).simd_into(self) }
@@ -282,7 +280,7 @@ pub(crate) fn handle_compare(
282280
"simd_gt" => 0x1E,
283281
_ => unreachable!(),
284282
};
285-
let intrinsic = simple_intrinsic("cmp", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
283+
let intrinsic = simple_intrinsic("cmp", vec_ty);
286284
let cast = cast_ident(
287285
ScalarType::Float,
288286
ScalarType::Mask,
@@ -374,7 +372,11 @@ pub(crate) fn handle_widen_narrow(
374372
}
375373
}
376374
(256, 512) => {
377-
let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits());
375+
let mask = set1_intrinsic(&VecType::new(
376+
vec_ty.scalar,
377+
vec_ty.scalar_bits,
378+
vec_ty.len / 2,
379+
));
378380
let pack = pack_intrinsic(
379381
vec_ty.scalar_bits,
380382
matches!(vec_ty.scalar, ScalarType::Int),

fearless_simd_gen/src/mk_sse4_2.rs

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream {
204204
}
205205

206206
pub(crate) fn handle_splat(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream {
207-
let intrinsic = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
207+
let intrinsic = set1_intrinsic(vec_ty);
208208
let cast = match vec_ty.scalar {
209209
ScalarType::Unsigned => quote!(as _),
210210
_ => quote!(),
@@ -234,27 +234,17 @@ pub(crate) fn handle_compare(
234234
_ => unreachable!(),
235235
};
236236

237-
let eq_intrinsic = simple_sign_unaware_intrinsic(
238-
"cmpeq",
239-
vec_ty.scalar,
240-
vec_ty.scalar_bits,
241-
vec_ty.n_bits(),
242-
);
237+
let eq_intrinsic = simple_sign_unaware_intrinsic("cmpeq", vec_ty);
243238

244239
let max_min_expr = X86.expr(max_min, vec_ty, &args);
245240
quote! { #eq_intrinsic(#max_min_expr, a.into()) }
246241
}
247242
"simd_lt" | "simd_gt" => {
248-
let gt = simple_sign_unaware_intrinsic(
249-
"cmpgt",
250-
vec_ty.scalar,
251-
vec_ty.scalar_bits,
252-
vec_ty.n_bits(),
253-
);
243+
let gt = simple_sign_unaware_intrinsic("cmpgt", vec_ty);
254244

255245
if vec_ty.scalar == ScalarType::Unsigned {
256246
// SSE4.2 only has signed GT/LT, but not unsigned.
257-
let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
247+
let set = set1_intrinsic(vec_ty);
258248
let sign = match vec_ty.scalar_bits {
259249
8 => quote! { 0x80u8 },
260250
16 => quote! { 0x8000u16 },
@@ -372,7 +362,11 @@ pub(crate) fn handle_widen_narrow(
372362
}
373363
}
374364
"narrow" => {
375-
let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits());
365+
let mask = set1_intrinsic(&VecType::new(
366+
vec_ty.scalar,
367+
vec_ty.scalar_bits,
368+
vec_ty.len / 2,
369+
));
376370
let pack = pack_intrinsic(
377371
vec_ty.scalar_bits,
378372
matches!(vec_ty.scalar, ScalarType::Int),
@@ -778,11 +772,9 @@ pub(crate) fn handle_cvt(
778772
);
779773

780774
let expr = if vec_ty.scalar == ScalarType::Float {
781-
let floor_intrinsic =
782-
simple_intrinsic("floor", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
783-
let max_intrinsic =
784-
simple_intrinsic("max", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
785-
let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits());
775+
let floor_intrinsic = simple_intrinsic("floor", vec_ty);
776+
let max_intrinsic = simple_intrinsic("max", vec_ty);
777+
let set = set1_intrinsic(vec_ty);
786778

787779
if target_scalar == ScalarType::Unsigned {
788780
quote! { #max_intrinsic(#floor_intrinsic(a.into()), #set(0.0)) }

0 commit comments

Comments
 (0)