Skip to content

Commit f7f0d4e

Browse files
feat: updated with debug printing and ostream implementation for vector
types
1 parent b28fc7a commit f7f0d4e

File tree

1 file changed

+98
-2
lines changed

1 file changed

+98
-2
lines changed

crates/intrinsic-test/src/x86/config.rs

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,64 @@ pub const NOTICE: &str = "\
55

66
// Format f16 values (and vectors containing them) in a way that is consistent with C.
77
pub const F16_FORMATTING_DEF: &str = r#"
8+
use std::arch::x86_64::*;
9+
10+
#[inline]
11+
fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
12+
formatter: &mut core::fmt::Formatter<'_>,
13+
type_name: &str,
14+
array: &[T; N],
15+
) -> core::fmt::Result {
16+
core::fmt::Formatter::debug_tuple_fields_finish(
17+
formatter,
18+
type_name,
19+
&core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]),
20+
)
21+
}
22+
823
#[repr(transparent)]
924
struct Hex<T>(T);
25+
26+
impl<T: DebugHexF16> core::fmt::Debug for Hex<T> {
27+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
28+
<T as DebugHexF16>::fmt(&self.0, f)
29+
}
30+
}
31+
32+
fn debug_f16<T: DebugHexF16>(x: T) -> impl core::fmt::Debug {
33+
Hex(x)
34+
}
35+
36+
trait DebugHexF16 {
37+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
38+
}
39+
40+
impl DebugHexF16 for f16 {
41+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42+
write!(f, "{:#06x?}", self.to_bits())
43+
}
44+
}
45+
46+
impl DebugHexF16 for __m128h {
47+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
49+
debug_simd_finish(f, "__m128h", &array)
50+
}
51+
}
52+
53+
impl DebugHexF16 for __m256h {
54+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
56+
debug_simd_finish(f, "__m256h", &array)
57+
}
58+
}
59+
60+
impl DebugHexF16 for __m512h {
61+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62+
let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
63+
debug_simd_finish(f, "__m512h", &array)
64+
}
65+
}
1066
"#;
1167

1268
pub const LANE_FUNCTION_HELPERS: &str = r#"
@@ -18,6 +74,9 @@ typedef double float64_t;
1874
#define __int32 int
1975
2076
std::ostream& operator<<(std::ostream& os, _Float16 value);
77+
std::ostream& operator<<(std::ostream& os, __m128i value);
78+
std::ostream& operator<<(std::ostream& os, __m256i value);
79+
std::ostream& operator<<(std::ostream& os, __m512i value);
2180
2281
std::ostream& operator<<(std::ostream& os, _Float16 value) {
2382
uint16_t temp = 0;
@@ -28,6 +87,45 @@ std::ostream& operator<<(std::ostream& os, _Float16 value) {
2887
return os;
2988
}
3089
90+
std::ostream& operator<<(std::ostream& os, __m128i value) {
91+
void* temp = malloc(sizeof(__m128i));
92+
_mm_storeu_si128((__m128i*)temp, value);
93+
std::stringstream ss;
94+
95+
ss << "0x";
96+
for(int i = 0; i < 16; i++) {
97+
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
98+
}
99+
os << ss.str();
100+
return os;
101+
}
102+
103+
std::ostream& operator<<(std::ostream& os, __m256i value) {
104+
void* temp = malloc(sizeof(__m256i));
105+
_mm256_storeu_si256((__m256i*)temp, value);
106+
std::stringstream ss;
107+
108+
ss << "0x";
109+
for(int i = 0; i < 32; i++) {
110+
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
111+
}
112+
os << ss.str();
113+
return os;
114+
}
115+
116+
std::ostream& operator<<(std::ostream& os, __m512i value) {
117+
void* temp = malloc(sizeof(__m512i));
118+
_mm512_storeu_si512((__m512i*)temp, value);
119+
std::stringstream ss;
120+
121+
ss << "0x";
122+
for(int i = 0; i < 64; i++) {
123+
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
124+
}
125+
os << ss.str();
126+
return os;
127+
}
128+
31129
#define _mm512_extract_intrinsic_test_epi8(m, lane) \
32130
_mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
33131
@@ -55,8 +153,6 @@ pub const X86_CONFIGURATIONS: &str = r#"
55153
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
56154
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
57155
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
58-
#![cfg_attr(target_arch = "x86_64", feature(sse))]
59-
#![cfg_attr(target_arch = "x86_64", feature(sse2))]
60156
#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
61157
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
62158
#![feature(fmt_helpers_for_derive)]

0 commit comments

Comments
 (0)