Skip to content

Commit ff2f829

Browse files
sse3 models
1 parent 534a986 commit ff2f829

File tree

1 file changed

+80
-81
lines changed
  • testable-simd-models/src/core_arch/x86/models

1 file changed

+80
-81
lines changed

testable-simd-models/src/core_arch/x86/models/ssse3.rs

Lines changed: 80 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,39 @@ use super::types::*;
1111
///
1212
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
1313
pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
14-
let a = a.as_i8x16();
15-
let zero = i8x16::from_fn(|_| 0);
16-
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
17-
transmute(r)
14+
{
15+
let a = a.as_i8x16();
16+
let zero = i8x16::ZERO();
17+
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
18+
transmute(r)
19+
}
1820
}
19-
2021
/// Computes the absolute value of each of the packed 16-bit signed integers in
2122
/// `a` and
2223
/// return the 16-bit unsigned integer
2324
///
2425
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
2526
pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
26-
let a = a.as_i16x8();
27-
let zero = i16x8::from_fn(|_| 0);
28-
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
29-
transmute(r)
27+
{
28+
let a = a.as_i16x8();
29+
let zero = i16x8::ZERO();
30+
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
31+
transmute(r)
32+
}
3033
}
31-
3234
/// Computes the absolute value of each of the packed 32-bit signed integers in
3335
/// `a` and
3436
/// return the 32-bit unsigned integer
3537
///
3638
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
3739
pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
38-
let a = a.as_i32x4();
39-
let zero = i32x4::from_fn(|_| 0);
40-
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
41-
transmute(r)
40+
{
41+
let a = a.as_i32x4();
42+
let zero = i32x4::ZERO();
43+
let r = simd_select(simd_lt(a, zero), simd_neg(a), a);
44+
transmute(r)
45+
}
4246
}
43-
4447
/// Shuffles bytes from `a` according to the content of `b`.
4548
///
4649
/// The last 4 bits of each byte of `b` are used as addresses
@@ -68,172 +71,168 @@ pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
6871
///
6972
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
7073
pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
71-
transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
74+
{
75+
transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
76+
}
7277
}
73-
7478
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
7579
/// shift the result right by `n` bytes, and returns the low 16 bytes.
7680
///
7781
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
78-
7982
pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
80-
// TODO static_assert_uimm_bits!(IMM8, 8);
81-
// If palignr is shifting the pair of vectors more than the size of two
82-
// lanes, emit zero.
83+
static_assert_uimm_bits!(IMM8, 8);
8384
if IMM8 > 32 {
8485
return _mm_setzero_si128();
8586
}
86-
// If palignr is shifting the pair of input vectors more than one lane,
87-
// but less than two lanes, convert to shifting in zeroes.
8887
let (a, b) = if IMM8 > 16 {
8988
(_mm_setzero_si128(), a)
9089
} else {
9190
(a, b)
9291
};
9392
const fn mask(shift: u32, i: u32) -> u32 {
9493
if shift > 32 {
95-
// Unused, but needs to be a valid index.
9694
i
9795
} else if shift > 16 {
9896
shift - 16 + i
9997
} else {
10098
shift + i
10199
}
102100
}
103-
104-
let r: i8x16 = simd_shuffle(
105-
b.as_i8x16(),
106-
a.as_i8x16(),
107-
[
108-
mask(IMM8 as u32, 0),
109-
mask(IMM8 as u32, 1),
110-
mask(IMM8 as u32, 2),
111-
mask(IMM8 as u32, 3),
112-
mask(IMM8 as u32, 4),
113-
mask(IMM8 as u32, 5),
114-
mask(IMM8 as u32, 6),
115-
mask(IMM8 as u32, 7),
116-
mask(IMM8 as u32, 8),
117-
mask(IMM8 as u32, 9),
118-
mask(IMM8 as u32, 10),
119-
mask(IMM8 as u32, 11),
120-
mask(IMM8 as u32, 12),
121-
mask(IMM8 as u32, 13),
122-
mask(IMM8 as u32, 14),
123-
mask(IMM8 as u32, 15),
124-
],
125-
);
126-
r.into()
101+
{
102+
let r: i8x16 = simd_shuffle(
103+
b.as_i8x16(),
104+
a.as_i8x16(),
105+
[
106+
mask(IMM8 as u32, 0),
107+
mask(IMM8 as u32, 1),
108+
mask(IMM8 as u32, 2),
109+
mask(IMM8 as u32, 3),
110+
mask(IMM8 as u32, 4),
111+
mask(IMM8 as u32, 5),
112+
mask(IMM8 as u32, 6),
113+
mask(IMM8 as u32, 7),
114+
mask(IMM8 as u32, 8),
115+
mask(IMM8 as u32, 9),
116+
mask(IMM8 as u32, 10),
117+
mask(IMM8 as u32, 11),
118+
mask(IMM8 as u32, 12),
119+
mask(IMM8 as u32, 13),
120+
mask(IMM8 as u32, 14),
121+
mask(IMM8 as u32, 15),
122+
],
123+
);
124+
transmute(r)
125+
}
127126
}
128-
129127
/// Horizontally adds the adjacent pairs of values contained in 2 packed
130128
/// 128-bit vectors of `[8 x i16]`.
131129
///
132130
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
133-
134131
pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
135-
phaddw128(a.as_i16x8(), b.as_i16x8()).into()
132+
{
133+
transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
134+
}
136135
}
137-
138136
/// Horizontally adds the adjacent pairs of values contained in 2 packed
139137
/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
140138
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
141139
///
142140
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
143-
144141
pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
145-
phaddsw128(a.as_i16x8(), b.as_i16x8()).into()
142+
{
143+
transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
144+
}
146145
}
147-
148146
/// Horizontally adds the adjacent pairs of values contained in 2 packed
149147
/// 128-bit vectors of `[4 x i32]`.
150148
///
151149
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
152-
153150
pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
154-
phaddd128(a.as_i32x4(), b.as_i32x4()).into()
151+
{
152+
transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
153+
}
155154
}
156-
157155
/// Horizontally subtract the adjacent pairs of values contained in 2
158156
/// packed 128-bit vectors of `[8 x i16]`.
159157
///
160158
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
161-
162159
pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
163-
phsubw128(a.as_i16x8(), b.as_i16x8()).into()
160+
{
161+
transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
162+
}
164163
}
165-
166164
/// Horizontally subtract the adjacent pairs of values contained in 2
167165
/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
168166
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
169167
/// saturated to 8000h.
170168
///
171169
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
172-
173170
pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
174-
phsubsw128(a.as_i16x8(), b.as_i16x8()).into()
171+
{
172+
transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
173+
}
175174
}
176-
177175
/// Horizontally subtract the adjacent pairs of values contained in 2
178176
/// packed 128-bit vectors of `[4 x i32]`.
179177
///
180178
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
181-
182179
pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
183-
phsubd128(a.as_i32x4(), b.as_i32x4()).into()
180+
{
181+
transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
182+
}
184183
}
185-
186184
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
187185
/// values contained in the first source operand and packed 8-bit signed
188186
/// integer values contained in the second source operand, add pairs of
189187
/// contiguous products with signed saturation, and writes the 16-bit sums to
190188
/// the corresponding bits in the destination.
191189
///
192190
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
193-
194191
pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
195-
pmaddubsw128(a.as_u8x16(), b.as_i8x16()).into()
192+
{
193+
transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
194+
}
196195
}
197-
198196
/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
199197
/// product to the 18 most significant bits by right-shifting, round the
200198
/// truncated value by adding 1, and write bits `[16:1]` to the destination.
201199
///
202200
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
203-
204201
pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
205-
pmulhrsw128(a.as_i16x8(), b.as_i16x8()).into()
202+
{
203+
transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
204+
}
206205
}
207-
208206
/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
209207
/// integer in `b` is negative, and returns the result.
210208
/// Elements in result are zeroed out when the corresponding element in `b`
211209
/// is zero.
212210
///
213211
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
214-
215212
pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
216-
psignb128(a.as_i8x16(), b.as_i8x16()).into()
213+
{
214+
transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
215+
}
217216
}
218-
219217
/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
220218
/// integer in `b` is negative, and returns the results.
221219
/// Elements in result are zeroed out when the corresponding element in `b`
222220
/// is zero.
223221
///
224222
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
225-
226223
pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
227-
psignw128(a.as_i16x8(), b.as_i16x8()).into()
224+
{
225+
transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
226+
}
228227
}
229-
230228
/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
231229
/// integer in `b` is negative, and returns the results.
232230
/// Element in result are zeroed out when the corresponding element in `b`
233231
/// is zero.
234232
///
235233
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
236-
237234
pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
238-
psignd128(a.as_i32x4(), b.as_i32x4()).into()
235+
{
236+
transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
237+
}
239238
}

0 commit comments

Comments
 (0)