@@ -11,36 +11,39 @@ use super::types::*;
11
11
///
12
12
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
13
13
pub fn _mm_abs_epi8 ( a : __m128i ) -> __m128i {
14
- let a = a. as_i8x16 ( ) ;
15
- let zero = i8x16:: from_fn ( |_| 0 ) ;
16
- let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
17
- transmute ( r)
14
+ {
15
+ let a = a. as_i8x16 ( ) ;
16
+ let zero = i8x16:: ZERO ( ) ;
17
+ let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
18
+ transmute ( r)
19
+ }
18
20
}
19
-
20
21
/// Computes the absolute value of each of the packed 16-bit signed integers in
21
22
/// `a` and
22
23
/// return the 16-bit unsigned integer
23
24
///
24
25
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
25
26
pub fn _mm_abs_epi16 ( a : __m128i ) -> __m128i {
26
- let a = a. as_i16x8 ( ) ;
27
- let zero = i16x8:: from_fn ( |_| 0 ) ;
28
- let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
29
- transmute ( r)
27
+ {
28
+ let a = a. as_i16x8 ( ) ;
29
+ let zero = i16x8:: ZERO ( ) ;
30
+ let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
31
+ transmute ( r)
32
+ }
30
33
}
31
-
32
34
/// Computes the absolute value of each of the packed 32-bit signed integers in
33
35
/// `a` and
34
36
/// return the 32-bit unsigned integer
35
37
///
36
38
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
37
39
pub fn _mm_abs_epi32 ( a : __m128i ) -> __m128i {
38
- let a = a. as_i32x4 ( ) ;
39
- let zero = i32x4:: from_fn ( |_| 0 ) ;
40
- let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
41
- transmute ( r)
40
+ {
41
+ let a = a. as_i32x4 ( ) ;
42
+ let zero = i32x4:: ZERO ( ) ;
43
+ let r = simd_select ( simd_lt ( a, zero) , simd_neg ( a) , a) ;
44
+ transmute ( r)
45
+ }
42
46
}
43
-
44
47
/// Shuffles bytes from `a` according to the content of `b`.
45
48
///
46
49
/// The last 4 bits of each byte of `b` are used as addresses
@@ -68,172 +71,168 @@ pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
68
71
///
69
72
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
70
73
pub fn _mm_shuffle_epi8 ( a : __m128i , b : __m128i ) -> __m128i {
71
- transmute ( pshufb128 ( a. as_u8x16 ( ) , b. as_u8x16 ( ) ) )
74
+ {
75
+ transmute ( pshufb128 ( a. as_u8x16 ( ) , b. as_u8x16 ( ) ) )
76
+ }
72
77
}
73
-
74
78
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
75
79
/// shift the result right by `n` bytes, and returns the low 16 bytes.
76
80
///
77
81
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
78
-
79
82
pub fn _mm_alignr_epi8 < const IMM8 : i32 > ( a : __m128i , b : __m128i ) -> __m128i {
80
- // TODO static_assert_uimm_bits!(IMM8, 8);
81
- // If palignr is shifting the pair of vectors more than the size of two
82
- // lanes, emit zero.
83
+ static_assert_uimm_bits ! ( IMM8 , 8 ) ;
83
84
if IMM8 > 32 {
84
85
return _mm_setzero_si128 ( ) ;
85
86
}
86
- // If palignr is shifting the pair of input vectors more than one lane,
87
- // but less than two lanes, convert to shifting in zeroes.
88
87
let ( a, b) = if IMM8 > 16 {
89
88
( _mm_setzero_si128 ( ) , a)
90
89
} else {
91
90
( a, b)
92
91
} ;
93
92
const fn mask ( shift : u32 , i : u32 ) -> u32 {
94
93
if shift > 32 {
95
- // Unused, but needs to be a valid index.
96
94
i
97
95
} else if shift > 16 {
98
96
shift - 16 + i
99
97
} else {
100
98
shift + i
101
99
}
102
100
}
103
-
104
- let r: i8x16 = simd_shuffle (
105
- b. as_i8x16 ( ) ,
106
- a. as_i8x16 ( ) ,
107
- [
108
- mask ( IMM8 as u32 , 0 ) ,
109
- mask ( IMM8 as u32 , 1 ) ,
110
- mask ( IMM8 as u32 , 2 ) ,
111
- mask ( IMM8 as u32 , 3 ) ,
112
- mask ( IMM8 as u32 , 4 ) ,
113
- mask ( IMM8 as u32 , 5 ) ,
114
- mask ( IMM8 as u32 , 6 ) ,
115
- mask ( IMM8 as u32 , 7 ) ,
116
- mask ( IMM8 as u32 , 8 ) ,
117
- mask ( IMM8 as u32 , 9 ) ,
118
- mask ( IMM8 as u32 , 10 ) ,
119
- mask ( IMM8 as u32 , 11 ) ,
120
- mask ( IMM8 as u32 , 12 ) ,
121
- mask ( IMM8 as u32 , 13 ) ,
122
- mask ( IMM8 as u32 , 14 ) ,
123
- mask ( IMM8 as u32 , 15 ) ,
124
- ] ,
125
- ) ;
126
- r. into ( )
101
+ {
102
+ let r: i8x16 = simd_shuffle (
103
+ b. as_i8x16 ( ) ,
104
+ a. as_i8x16 ( ) ,
105
+ [
106
+ mask ( IMM8 as u32 , 0 ) ,
107
+ mask ( IMM8 as u32 , 1 ) ,
108
+ mask ( IMM8 as u32 , 2 ) ,
109
+ mask ( IMM8 as u32 , 3 ) ,
110
+ mask ( IMM8 as u32 , 4 ) ,
111
+ mask ( IMM8 as u32 , 5 ) ,
112
+ mask ( IMM8 as u32 , 6 ) ,
113
+ mask ( IMM8 as u32 , 7 ) ,
114
+ mask ( IMM8 as u32 , 8 ) ,
115
+ mask ( IMM8 as u32 , 9 ) ,
116
+ mask ( IMM8 as u32 , 10 ) ,
117
+ mask ( IMM8 as u32 , 11 ) ,
118
+ mask ( IMM8 as u32 , 12 ) ,
119
+ mask ( IMM8 as u32 , 13 ) ,
120
+ mask ( IMM8 as u32 , 14 ) ,
121
+ mask ( IMM8 as u32 , 15 ) ,
122
+ ] ,
123
+ ) ;
124
+ transmute ( r)
125
+ }
127
126
}
128
-
129
127
/// Horizontally adds the adjacent pairs of values contained in 2 packed
130
128
/// 128-bit vectors of `[8 x i16]`.
131
129
///
132
130
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
133
-
134
131
pub fn _mm_hadd_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
135
- phaddw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
132
+ {
133
+ transmute ( phaddw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
134
+ }
136
135
}
137
-
138
136
/// Horizontally adds the adjacent pairs of values contained in 2 packed
139
137
/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
140
138
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
141
139
///
142
140
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
143
-
144
141
pub fn _mm_hadds_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
145
- phaddsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
142
+ {
143
+ transmute ( phaddsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
144
+ }
146
145
}
147
-
148
146
/// Horizontally adds the adjacent pairs of values contained in 2 packed
149
147
/// 128-bit vectors of `[4 x i32]`.
150
148
///
151
149
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
152
-
153
150
pub fn _mm_hadd_epi32 ( a : __m128i , b : __m128i ) -> __m128i {
154
- phaddd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) . into ( )
151
+ {
152
+ transmute ( phaddd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) )
153
+ }
155
154
}
156
-
157
155
/// Horizontally subtract the adjacent pairs of values contained in 2
158
156
/// packed 128-bit vectors of `[8 x i16]`.
159
157
///
160
158
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
161
-
162
159
pub fn _mm_hsub_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
163
- phsubw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
160
+ {
161
+ transmute ( phsubw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
162
+ }
164
163
}
165
-
166
164
/// Horizontally subtract the adjacent pairs of values contained in 2
167
165
/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
168
166
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
169
167
/// saturated to 8000h.
170
168
///
171
169
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
172
-
173
170
pub fn _mm_hsubs_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
174
- phsubsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
171
+ {
172
+ transmute ( phsubsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
173
+ }
175
174
}
176
-
177
175
/// Horizontally subtract the adjacent pairs of values contained in 2
178
176
/// packed 128-bit vectors of `[4 x i32]`.
179
177
///
180
178
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
181
-
182
179
pub fn _mm_hsub_epi32 ( a : __m128i , b : __m128i ) -> __m128i {
183
- phsubd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) . into ( )
180
+ {
181
+ transmute ( phsubd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) )
182
+ }
184
183
}
185
-
186
184
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
187
185
/// values contained in the first source operand and packed 8-bit signed
188
186
/// integer values contained in the second source operand, add pairs of
189
187
/// contiguous products with signed saturation, and writes the 16-bit sums to
190
188
/// the corresponding bits in the destination.
191
189
///
192
190
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
193
-
194
191
pub fn _mm_maddubs_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
195
- pmaddubsw128 ( a. as_u8x16 ( ) , b. as_i8x16 ( ) ) . into ( )
192
+ {
193
+ transmute ( pmaddubsw128 ( a. as_u8x16 ( ) , b. as_i8x16 ( ) ) )
194
+ }
196
195
}
197
-
198
196
/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
199
197
/// product to the 18 most significant bits by right-shifting, round the
200
198
/// truncated value by adding 1, and write bits `[16:1]` to the destination.
201
199
///
202
200
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
203
-
204
201
pub fn _mm_mulhrs_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
205
- pmulhrsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
202
+ {
203
+ transmute ( pmulhrsw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
204
+ }
206
205
}
207
-
208
206
/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
209
207
/// integer in `b` is negative, and returns the result.
210
208
/// Elements in result are zeroed out when the corresponding element in `b`
211
209
/// is zero.
212
210
///
213
211
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
214
-
215
212
pub fn _mm_sign_epi8 ( a : __m128i , b : __m128i ) -> __m128i {
216
- psignb128 ( a. as_i8x16 ( ) , b. as_i8x16 ( ) ) . into ( )
213
+ {
214
+ transmute ( psignb128 ( a. as_i8x16 ( ) , b. as_i8x16 ( ) ) )
215
+ }
217
216
}
218
-
219
217
/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
220
218
/// integer in `b` is negative, and returns the results.
221
219
/// Elements in result are zeroed out when the corresponding element in `b`
222
220
/// is zero.
223
221
///
224
222
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
225
-
226
223
pub fn _mm_sign_epi16 ( a : __m128i , b : __m128i ) -> __m128i {
227
- psignw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) . into ( )
224
+ {
225
+ transmute ( psignw128 ( a. as_i16x8 ( ) , b. as_i16x8 ( ) ) )
226
+ }
228
227
}
229
-
230
228
/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
231
229
/// integer in `b` is negative, and returns the results.
232
230
/// Element in result are zeroed out when the corresponding element in `b`
233
231
/// is zero.
234
232
///
235
233
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
236
-
237
234
pub fn _mm_sign_epi32 ( a : __m128i , b : __m128i ) -> __m128i {
238
- psignd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) . into ( )
235
+ {
236
+ transmute ( psignd128 ( a. as_i32x4 ( ) , b. as_i32x4 ( ) ) )
237
+ }
239
238
}
0 commit comments