Skip to content

Commit 52c0c08

Browse files
feat: defined more load functions that are natively not defined (such as
arguments with UI16 etype and __m128d type)
1 parent 51c8750 commit 52c0c08

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

crates/intrinsic-test/src/x86/config.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,128 @@ pub const NOTICE: &str = "\
77
pub const F16_FORMATTING_DEF: &str = r#"
88
use std::arch::x86_64::*;
99
10+
#[inline]
11+
unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i {
12+
_mm_castph_si128(_mm_loadu_ph(mem_addr))
13+
}
14+
15+
#[inline]
16+
unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i {
17+
_mm256_castph_si256(_mm256_loadu_ph(mem_addr))
18+
}
19+
20+
#[inline]
21+
unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i {
22+
_mm512_castph_si512(_mm512_loadu_ph(mem_addr))
23+
}
24+
25+
26+
#[inline]
27+
unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h {
28+
_mm_castps_ph(_mm_loadu_ps(mem_addr))
29+
}
30+
31+
#[inline]
32+
unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h {
33+
_mm256_castps_ph(_mm256_loadu_ps(mem_addr))
34+
}
35+
36+
#[inline]
37+
unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h {
38+
_mm512_castps_ph(_mm512_loadu_ps(mem_addr))
39+
}
40+
41+
#[inline]
42+
unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d {
43+
_mm_castsi128_pd(_mm_loadu_epi16(mem_addr))
44+
}
45+
46+
#[inline]
47+
unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d {
48+
_mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr))
49+
}
50+
51+
#[inline]
52+
unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d {
53+
_mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr))
54+
}
55+
56+
#[inline]
57+
unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d {
58+
_mm_castsi128_pd(_mm_loadu_epi32(mem_addr))
59+
}
60+
61+
#[inline]
62+
unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d {
63+
_mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr))
64+
}
65+
66+
#[inline]
67+
unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d {
68+
_mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr))
69+
}
70+
71+
#[inline]
72+
unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d {
73+
_mm_castsi128_pd(_mm_loadu_epi64(mem_addr))
74+
}
75+
76+
#[inline]
77+
unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d {
78+
_mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr))
79+
}
80+
81+
#[inline]
82+
unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d {
83+
_mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr))
84+
}
85+
86+
// ===
87+
#[inline]
88+
unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 {
89+
_mm_castsi128_ps(_mm_loadu_epi16(mem_addr))
90+
}
91+
92+
#[inline]
93+
unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 {
94+
_mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr))
95+
}
96+
97+
#[inline]
98+
unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 {
99+
_mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr))
100+
}
101+
102+
#[inline]
103+
unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 {
104+
_mm_castsi128_ps(_mm_loadu_epi32(mem_addr))
105+
}
106+
107+
#[inline]
108+
unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 {
109+
_mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr))
110+
}
111+
112+
#[inline]
113+
unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 {
114+
_mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr))
115+
}
116+
117+
#[inline]
118+
unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 {
119+
_mm_castsi128_ps(_mm_loadu_epi64(mem_addr))
120+
}
121+
122+
#[inline]
123+
unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 {
124+
_mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr))
125+
}
126+
127+
#[inline]
128+
unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
129+
_mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr))
130+
}
131+
10132
#[inline]
11133
fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
12134
formatter: &mut core::fmt::Formatter<'_>,
@@ -50,19 +172,40 @@ impl DebugHexF16 for __m128h {
50172
}
51173
}
52174
175+
impl DebugHexF16 for __m128i {
176+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
177+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
178+
debug_simd_finish(f, "__m128i", &array)
179+
}
180+
}
181+
53182
impl DebugHexF16 for __m256h {
54183
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55184
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
56185
debug_simd_finish(f, "__m256h", &array)
57186
}
58187
}
59188
189+
impl DebugHexF16 for __m256i {
190+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
191+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
192+
debug_simd_finish(f, "__m256i", &array)
193+
}
194+
}
195+
60196
impl DebugHexF16 for __m512h {
61197
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62198
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
63199
debug_simd_finish(f, "__m512h", &array)
64200
}
65201
}
202+
203+
impl DebugHexF16 for __m512i {
204+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
205+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
206+
debug_simd_finish(f, "__m512i", &array)
207+
}
208+
}
66209
"#;
67210

68211
pub const LANE_FUNCTION_HELPERS: &str = r#"

crates/intrinsic-test/src/x86/types.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,30 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
127127
.replace("64", "");
128128
{
129129
let suffix = match (self.bit_len, self.kind) {
130+
(Some(16), TypeKind::Float)
131+
if ["__m128i", "__m256i", "__m512i"]
132+
.contains(&self.param.type_data.as_str()) =>
133+
{
134+
format!("ph_to_{}", self.param.type_data)
135+
}
136+
(Some(32), TypeKind::Float)
137+
if ["__m128h", "__m256h", "__m512h"]
138+
.contains(&self.param.type_data.as_str()) =>
139+
{
140+
format!("ps_to_{}", self.param.type_data)
141+
}
142+
(Some(bit_len @ (16 | 32 | 64)), TypeKind::Int(_) | TypeKind::Mask)
143+
if ["__m128d", "__m256d", "__m512d"]
144+
.contains(&self.param.type_data.as_str()) =>
145+
{
146+
format!("epi{bit_len}_to_{}", self.param.type_data)
147+
}
148+
(Some(bit_len @ (16 | 32 | 64)), TypeKind::Int(_) | TypeKind::Mask)
149+
if ["__m128", "__m256", "__m512"]
150+
.contains(&self.param.type_data.as_str()) =>
151+
{
152+
format!("epi{bit_len}_to_{}", self.param.type_data)
153+
}
130154
(Some(bit_len @ (8 | 16 | 32 | 64)), TypeKind::Int(_)) => {
131155
format!("epi{bit_len}")
132156
}

0 commit comments

Comments
 (0)