@@ -7,6 +7,128 @@ pub const NOTICE: &str = "\
7
7
pub const F16_FORMATTING_DEF : & str = r#"
8
8
use std::arch::x86_64::*;
9
9
10
+ #[inline]
11
+ unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i {
12
+ _mm_castph_si128(_mm_loadu_ph(mem_addr))
13
+ }
14
+
15
+ #[inline]
16
+ unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i {
17
+ _mm256_castph_si256(_mm256_loadu_ph(mem_addr))
18
+ }
19
+
20
+ #[inline]
21
+ unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i {
22
+ _mm512_castph_si512(_mm512_loadu_ph(mem_addr))
23
+ }
24
+
25
+
26
+ #[inline]
27
+ unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h {
28
+ _mm_castps_ph(_mm_loadu_ps(mem_addr))
29
+ }
30
+
31
+ #[inline]
32
+ unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h {
33
+ _mm256_castps_ph(_mm256_loadu_ps(mem_addr))
34
+ }
35
+
36
+ #[inline]
37
+ unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h {
38
+ _mm512_castps_ph(_mm512_loadu_ps(mem_addr))
39
+ }
40
+
41
+ #[inline]
42
+ unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d {
43
+ _mm_castsi128_pd(_mm_loadu_epi16(mem_addr))
44
+ }
45
+
46
+ #[inline]
47
+ unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d {
48
+ _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr))
49
+ }
50
+
51
+ #[inline]
52
+ unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d {
53
+ _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr))
54
+ }
55
+
56
+ #[inline]
57
+ unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d {
58
+ _mm_castsi128_pd(_mm_loadu_epi32(mem_addr))
59
+ }
60
+
61
+ #[inline]
62
+ unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d {
63
+ _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr))
64
+ }
65
+
66
+ #[inline]
67
+ unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d {
68
+ _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr))
69
+ }
70
+
71
+ #[inline]
72
+ unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d {
73
+ _mm_castsi128_pd(_mm_loadu_epi64(mem_addr))
74
+ }
75
+
76
+ #[inline]
77
+ unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d {
78
+ _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr))
79
+ }
80
+
81
+ #[inline]
82
+ unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d {
83
+ _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr))
84
+ }
85
+
86
+ // ===
87
+ #[inline]
88
+ unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 {
89
+ _mm_castsi128_ps(_mm_loadu_epi16(mem_addr))
90
+ }
91
+
92
+ #[inline]
93
+ unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 {
94
+ _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr))
95
+ }
96
+
97
+ #[inline]
98
+ unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 {
99
+ _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr))
100
+ }
101
+
102
+ #[inline]
103
+ unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 {
104
+ _mm_castsi128_ps(_mm_loadu_epi32(mem_addr))
105
+ }
106
+
107
+ #[inline]
108
+ unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 {
109
+ _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr))
110
+ }
111
+
112
+ #[inline]
113
+ unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 {
114
+ _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr))
115
+ }
116
+
117
+ #[inline]
118
+ unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 {
119
+ _mm_castsi128_ps(_mm_loadu_epi64(mem_addr))
120
+ }
121
+
122
+ #[inline]
123
+ unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 {
124
+ _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr))
125
+ }
126
+
127
+ #[inline]
128
+ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
129
+ _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr))
130
+ }
131
+
10
132
#[inline]
11
133
fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
12
134
formatter: &mut core::fmt::Formatter<'_>,
@@ -50,19 +172,40 @@ impl DebugHexF16 for __m128h {
50
172
}
51
173
}
52
174
175
+ impl DebugHexF16 for __m128i {
176
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
177
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
178
+ debug_simd_finish(f, "__m128i", &array)
179
+ }
180
+ }
181
+
53
182
impl DebugHexF16 for __m256h {
54
183
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55
184
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
56
185
debug_simd_finish(f, "__m256h", &array)
57
186
}
58
187
}
59
188
189
+ impl DebugHexF16 for __m256i {
190
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
191
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
192
+ debug_simd_finish(f, "__m256i", &array)
193
+ }
194
+ }
195
+
60
196
impl DebugHexF16 for __m512h {
61
197
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62
198
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
63
199
debug_simd_finish(f, "__m512h", &array)
64
200
}
65
201
}
202
+
203
+ impl DebugHexF16 for __m512i {
204
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
205
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
206
+ debug_simd_finish(f, "__m512i", &array)
207
+ }
208
+ }
66
209
"# ;
67
210
68
211
pub const LANE_FUNCTION_HELPERS : & str = r#"
0 commit comments