Skip to content

Commit 71c9e13

Browse files
authored
[SYCLomatic][NFC] Add more LIT tests for asm PTX ld/st (#2942)
Signed-off-by: [email protected]
1 parent 00142c1 commit 71c9e13

File tree

1 file changed

+204
-0
lines changed

1 file changed

+204
-0
lines changed

clang/test/dpct/asm/ld.cu

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66

77
// clang-format off
88
#include <cuda_runtime.h>
9+
#include <cuda_bf16.h>
10+
#include <stdint.h>
11+
#include <stdio.h>
12+
13+
using bf16 = __nv_bfloat16;
14+
using bf16_2 = __nv_bfloat162;
15+
using half_2 = __half2;
916

1017
/*
1118
.ss = { .const, .global, .local, .param, .shared };
@@ -93,4 +100,201 @@ __device__ __forceinline__ int ld_flag_acquire(int* flag_addr) {
93100
return flag;
94101
}
95102

103+
// CHECK: static inline void lds(bf16& dst, uint32_t src) {
104+
// CHECK-NEXT: *(uint16_t*)&dst = *((uint16_t *)(uintptr_t)src);
105+
// CHECK-NEXT: }
106+
__device__ static inline void lds(bf16& dst, uint32_t src) {
107+
asm volatile("ld.shared.b16 %0, [%1];" : "=h"(*(uint16_t*)&dst) : "r"(src));
108+
}
109+
110+
// CHECK: static inline void sts(uint32_t dst, const bf16& src) {
111+
// CHECK-NEXT: *((uint16_t *)(uintptr_t)dst) = *(uint16_t*)&src;
112+
// CHECK-NEXT: }
113+
__device__ static inline void sts(uint32_t dst, const bf16& src) {
114+
asm volatile("st.shared.b16 [%1], %0;\n" : : "h"(*(uint16_t*)&src), "r"(dst));
115+
}
116+
117+
// CHECK: static inline void ldg(bf16& dst, bf16* src) {
118+
// CHECK-NEXT: *(uint16_t*)&dst = *((uint16_t *)(uintptr_t)src);
119+
// CHECK-NEXT: }
120+
__device__ static inline void ldg(bf16& dst, bf16* src) {
121+
asm volatile("ld.global.b16 %0, [%1];\n" : "=h"(*(uint16_t*)&dst) : "l"(src));
122+
}
123+
124+
// CHECK: static inline void stg(bf16* dst, const bf16& src) {
125+
// CHECK-NEXT: *((uint16_t *)(uintptr_t)dst) = *(uint16_t*)&src;
126+
// CHECK-NEXT: }
127+
__device__ static inline void stg(bf16* dst, const bf16& src) {
128+
asm volatile("st.global.b16 [%1], %0;\n" : : "h"(*(uint16_t*)&src), "l"(dst));
129+
}
130+
131+
// CHECK: static inline void lds(sycl::half& dst, uint32_t src) {
132+
// CHECK-NEXT: *(uint16_t*)&dst = *((uint16_t *)(uintptr_t)src);
133+
// CHECK-NEXT: }
134+
__device__ static inline void lds(half& dst, uint32_t src) {
135+
asm volatile("ld.shared.b16 %0, [%1];\n" : "=h"(*(uint16_t*)&dst) : "r"(src));
136+
}
137+
138+
// CHECK: static inline void sts(uint32_t dst, const sycl::half& src) {
139+
// CHECK-NEXT: *((uint16_t *)(uintptr_t)dst) = *(uint16_t*)&src;
140+
// CHECK-NEXT: }
141+
__device__ static inline void sts(uint32_t dst, const half& src) {
142+
asm volatile("st.shared.b16 [%1], %0;\n" : : "h"(*(uint16_t*)&src), "r"(dst));
143+
}
144+
145+
// CHECK: static inline void ldg(sycl::half& dst, sycl::half* src) {
146+
// CHECK-NEXT: *(uint16_t*)&dst = *((uint16_t *)(uintptr_t)src);
147+
// CHECK-NEXT: }
148+
__device__ static inline void ldg(half& dst, half* src) {
149+
asm volatile("ld.global.b16 %0, [%1];\n" : "=h"(*(uint16_t*)&dst) : "l"(src));
150+
}
151+
152+
// CHECK: static inline void stg(sycl::half* dst, const sycl::half& src) {
153+
// CHECK-NEXT: *((uint16_t *)(uintptr_t)dst) = *(uint16_t*)&src;
154+
// CHECK-NEXT: }
155+
__device__ static inline void stg(half* dst, const half& src) {
156+
asm volatile("st.global.b16 [%1], %0;\n" : : "h"(*(uint16_t*)&src), "l"(dst));
157+
}
158+
159+
// CHECK: static inline void lds(float& dst, uint32_t src) {
160+
// CHECK-NEXT: dst = *((float *)(uintptr_t)src);
161+
// CHECK-NEXT: }
162+
__device__ static inline void lds(float& dst, uint32_t src) {
163+
asm volatile("ld.shared.f32 %0, [%1];\n" : "=f"(dst) : "r"(src));
164+
}
165+
166+
// CHECK: static inline void sts(uint32_t dst, const float& src) {
167+
// CHECK-NEXT: *((float *)(uintptr_t)dst) = src;
168+
// CHECK-NEXT: }
169+
__device__ static inline void sts(uint32_t dst, const float& src) {
170+
asm volatile("st.shared.f32 [%1], %0;\n" : : "f"(src), "r"(dst));
171+
}
172+
173+
// CHECK: static inline void ldg(float& dst, float* src) {
174+
// CHECK-NEXT: dst = *src;
175+
// CHECK-NEXT: }
176+
__device__ static inline void ldg(float& dst, float* src) {
177+
asm volatile("ld.global.f32 %0, [%1];\n" : "=f"(dst) : "l"(src));
178+
}
179+
180+
// CHECK: static inline void stg(float* dst, const float& src) {
181+
// CHECK-NEXT: *dst = src;
182+
// CHECK-NEXT: }
183+
__device__ static inline void stg(float* dst, const float& src) {
184+
asm volatile("st.global.f32 [%1], %0;\n" : : "f"(src), "l"(dst));
185+
}
186+
187+
// CHECK: static inline void lds(bf16_2& dst, uint32_t src) {
188+
// CHECK-NEXT: *(uint32_t*)&dst = *((uint32_t *)(uintptr_t)src);
189+
// CHECK-NEXT: }
190+
__device__ static inline void lds(bf16_2& dst, uint32_t src) {
191+
asm volatile("ld.shared.b32 %0, [%1];\n" : "=r"(*(uint32_t*)&dst) : "r"(src));
192+
}
193+
194+
// CHECK: static inline void sts(uint32_t dst, const bf16_2& src) {
195+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)dst) = (*(uint32_t*)&src);
196+
// CHECK-NEXT: }
197+
__device__ static inline void sts(uint32_t dst, const bf16_2& src) {
198+
asm volatile("st.shared.b32 [%1], %0;\n" : : "r"(*(uint32_t*)&src), "r"(dst));
199+
}
200+
201+
// CHECK: static inline void ldg(bf16_2& dst, bf16_2* src) {
202+
// CHECK-NEXT: *(uint32_t*)&dst = *((uint32_t *)(uintptr_t)src);
203+
// CHECK-NEXT: }
204+
__device__ static inline void ldg(bf16_2& dst, bf16_2* src) {
205+
asm volatile("ld.global.b32 %0, [%1];\n" : "=r"(*(uint32_t*)&dst) : "l"(src));
206+
}
207+
208+
// CHECK: static inline void stg(bf16_2* dst, const bf16_2& src) {
209+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)dst) = (*(uint32_t*)&src);
210+
// CHECK-NEXT: }
211+
__device__ static inline void stg(bf16_2* dst, const bf16_2& src) {
212+
asm volatile("st.global.b32 [%1], %0;\n" : : "r"(*(uint32_t*)&src), "l"(dst));
213+
}
214+
215+
// CHECK: static inline void lds(half_2& dst, uint32_t src) {
216+
// CHECK-NEXT: *(uint32_t*)&dst = *((uint32_t *)(uintptr_t)src);
217+
// CHECK-NEXT: }
218+
__device__ static inline void lds(half_2& dst, uint32_t src) {
219+
asm volatile("ld.shared.b32 %0, [%1];\n" : "=r"(*(uint32_t*)&dst) : "r"(src));
220+
}
221+
222+
// CHECK: static inline void sts(uint32_t dst, const half_2& src) {
223+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)dst) = (*(uint32_t*)&src);
224+
// CHECK-NEXT: }
225+
__device__ static inline void sts(uint32_t dst, const half_2& src) {
226+
asm volatile("st.shared.b32 [%1], %0;\n" : : "r"(*(uint32_t*)&src), "r"(dst));
227+
}
228+
229+
// CHECK: static inline void ldg(half_2& dst, half_2* src) {
230+
// CHECK-NEXT: *(uint32_t*)&dst = *((uint32_t *)(uintptr_t)src);
231+
// CHECK-NEXT: }
232+
__device__ static inline void ldg(half_2& dst, half_2* src) {
233+
asm volatile("ld.global.b32 %0, [%1];\n" : "=r"(*(uint32_t*)&dst) : "l"(src));
234+
}
235+
236+
// CHECK: static inline void stg(half_2* dst, const half_2& src) {
237+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)dst) = (*(uint32_t*)&src);
238+
// CHECK-NEXT: }
239+
__device__ static inline void stg(half_2* dst, const half_2& src) {
240+
asm volatile("st.global.b32 [%1], %0;\n" : : "r"(*(uint32_t*)&src), "l"(dst));
241+
}
242+
243+
// CHECK: static inline void lds(sycl::float2& dst, uint32_t src) {
244+
// CHECK-NEXT: {dst.x(), dst.y()} = *((float *)(uintptr_t)src);
245+
// CHECK-NEXT: }
246+
__device__ static inline void lds(float2& dst, uint32_t src) {
247+
asm volatile("ld.shared.v2.f32 {%0, %1}, [%2];\n" : "=f"(dst.x), "=f"(dst.y) : "r"(src));
248+
}
249+
250+
// CHECK: static inline void sts(uint32_t dst, const sycl::float2& src) {
251+
// CHECK-NEXT: *((float *)(uintptr_t)dst) = {src.x(), src.y()};
252+
// CHECK-NEXT: }
253+
__device__ static inline void sts(uint32_t dst, const float2& src) {
254+
asm volatile("st.shared.v2.f32 [%2], {%0, %1};\n" : : "f"(src.x), "f"(src.y), "r"(dst));
255+
}
256+
257+
// CHECK: static inline void ldg(sycl::float2& dst, sycl::float2* src) {
258+
// CHECK-NEXT: {dst.x(), dst.y()} = *((float *)(uintptr_t)src);
259+
// CHECK-NEXT: }
260+
__device__ static inline void ldg(float2& dst, float2* src) {
261+
asm volatile("ld.global.v2.f32 {%0, %1}, [%2];\n" : "=f"(dst.x), "=f"(dst.y) : "l"(src));
262+
}
263+
264+
// CHECK: static inline void stg(sycl::float2* dst, const sycl::float2& src) {
265+
// CHECK-NEXT: *((float *)(uintptr_t)dst) = {src.x(), src.y()};
266+
// CHECK-NEXT: }
267+
__device__ static inline void stg(float2* dst, const float2& src) {
268+
asm volatile("st.global.v2.f32 [%2], {%0, %1};\n" : : "f"(src.x), "f"(src.y), "l"(dst));
269+
}
270+
271+
// CHECK: static inline void lds(sycl::float4& dst, uint32_t src) {
272+
// CHECK-NEXT: {dst.x(), dst.y(), dst.z(), dst.w()} = *((float *)(uintptr_t)src);
273+
// CHECK-NEXT: }
274+
__device__ static inline void lds(float4& dst, uint32_t src) {
275+
asm volatile("ld.shared.v4.f32 {%0, %1, %2, %3}, [%4];\n" : "=f"(dst.x), "=f"(dst.y), "=f"(dst.z), "=f"(dst.w) : "r"(src));
276+
}
277+
278+
// CHECK: static inline void sts(uint32_t dst, const sycl::float4& src) {
279+
// CHECK-NEXT: *((float *)(uintptr_t)dst) = {src.x(), src.y(), src.z(), src.w()};
280+
// CHECK-NEXT: }
281+
__device__ static inline void sts(uint32_t dst, const float4& src) {
282+
asm volatile("st.shared.v4.f32 [%4], {%0, %1, %2, %3};\n" : : "f"(src.x), "f"(src.y), "f"(src.z), "f"(src.w), "r"(dst));
283+
}
284+
285+
// CHECK: static inline void ldg(sycl::float4& dst, sycl::float4* src) {
286+
// CHECK-NEXT: {dst.x(), dst.y(), dst.z(), dst.w()} = *((float *)(uintptr_t)src);
287+
// CHECK-NEXT: }
288+
__device__ static inline void ldg(float4& dst, float4* src) {
289+
asm volatile("ld.global.v4.f32 {%0, %1, %2, %3}, [%4];\n" : "=f"(dst.x), "=f"(dst.y), "=f"(dst.z), "=f"(dst.w) : "l"(src));
290+
}
291+
292+
// CHECK: static inline void stg(sycl::float4* dst, const sycl::float4& src) {
293+
// CHECK-NEXT: *((float *)(uintptr_t)dst) = {src.x(), src.y(), src.z(), src.w()};
294+
// CHECK-NEXT: }
295+
__device__ static inline void stg(float4* dst, const float4& src) {
296+
asm volatile("st.global.v4.f32 [%4], {%0, %1, %2, %3};\n" : : "f"(src.x), "f"(src.y), "f"(src.z), "f"(src.w), "l"(dst));
297+
}
298+
299+
96300
// clang-format on

0 commit comments

Comments
 (0)