@@ -5,8 +5,64 @@ pub const NOTICE: &str = "\
5
5
6
6
// Format f16 values (and vectors containing them) in a way that is consistent with C.
7
7
pub const F16_FORMATTING_DEF : & str = r#"
8
+ use std::arch::x86_64::*;
9
+
10
+ #[inline]
11
+ fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
12
+ formatter: &mut core::fmt::Formatter<'_>,
13
+ type_name: &str,
14
+ array: &[T; N],
15
+ ) -> core::fmt::Result {
16
+ core::fmt::Formatter::debug_tuple_fields_finish(
17
+ formatter,
18
+ type_name,
19
+ &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]),
20
+ )
21
+ }
22
+
8
23
#[repr(transparent)]
9
24
struct Hex<T>(T);
25
+
26
+ impl<T: DebugHexF16> core::fmt::Debug for Hex<T> {
27
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
28
+ <T as DebugHexF16>::fmt(&self.0, f)
29
+ }
30
+ }
31
+
32
+ fn debug_f16<T: DebugHexF16>(x: T) -> impl core::fmt::Debug {
33
+ Hex(x)
34
+ }
35
+
36
+ trait DebugHexF16 {
37
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
38
+ }
39
+
40
+ impl DebugHexF16 for f16 {
41
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42
+ write!(f, "{:#06x?}", self.to_bits())
43
+ }
44
+ }
45
+
46
+ impl DebugHexF16 for __m128h {
47
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
49
+ debug_simd_finish(f, "__m128h", &array)
50
+ }
51
+ }
52
+
53
+ impl DebugHexF16 for __m256h {
54
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
56
+ debug_simd_finish(f, "__m256h", &array)
57
+ }
58
+ }
59
+
60
+ impl DebugHexF16 for __m512h {
61
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62
+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
63
+ debug_simd_finish(f, "__m512h", &array)
64
+ }
65
+ }
10
66
"# ;
11
67
12
68
pub const LANE_FUNCTION_HELPERS : & str = r#"
@@ -18,6 +74,9 @@ typedef double float64_t;
18
74
#define __int32 int
19
75
20
76
std::ostream& operator<<(std::ostream& os, _Float16 value);
77
+ std::ostream& operator<<(std::ostream& os, __m128i value);
78
+ std::ostream& operator<<(std::ostream& os, __m256i value);
79
+ std::ostream& operator<<(std::ostream& os, __m512i value);
21
80
22
81
std::ostream& operator<<(std::ostream& os, _Float16 value) {
23
82
uint16_t temp = 0;
@@ -28,6 +87,45 @@ std::ostream& operator<<(std::ostream& os, _Float16 value) {
28
87
return os;
29
88
}
30
89
90
+ std::ostream& operator<<(std::ostream& os, __m128i value) {
91
+ void* temp = malloc(sizeof(__m128i));
92
+ _mm_storeu_si128((__m128i*)temp, value);
93
+ std::stringstream ss;
94
+
95
+ ss << "0x";
96
+ for(int i = 0; i < 16; i++) {
97
+ ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
98
+ }
99
+ os << ss.str();
100
+ return os;
101
+ }
102
+
103
+ std::ostream& operator<<(std::ostream& os, __m256i value) {
104
+ void* temp = malloc(sizeof(__m256i));
105
+ _mm256_storeu_si256((__m256i*)temp, value);
106
+ std::stringstream ss;
107
+
108
+ ss << "0x";
109
+ for(int i = 0; i < 32; i++) {
110
+ ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
111
+ }
112
+ os << ss.str();
113
+ return os;
114
+ }
115
+
116
+ std::ostream& operator<<(std::ostream& os, __m512i value) {
117
+ void* temp = malloc(sizeof(__m512i));
118
+ _mm512_storeu_si512((__m512i*)temp, value);
119
+ std::stringstream ss;
120
+
121
+ ss << "0x";
122
+ for(int i = 0; i < 64; i++) {
123
+ ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
124
+ }
125
+ os << ss.str();
126
+ return os;
127
+ }
128
+
31
129
#define _mm512_extract_intrinsic_test_epi8(m, lane) \
32
130
_mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
33
131
@@ -55,8 +153,6 @@ pub const X86_CONFIGURATIONS: &str = r#"
55
153
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
56
154
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
57
155
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
58
- #![cfg_attr(target_arch = "x86_64", feature(sse))]
59
- #![cfg_attr(target_arch = "x86_64", feature(sse2))]
60
156
#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
61
157
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
62
158
#![feature(fmt_helpers_for_derive)]
0 commit comments