Skip to content

Commit 4fcf105

Browse files
author
Yihan Wang
authored
[SYCLomatic #2054] Add test for asm ld and st instrutions (#750)
Signed-off-by: Wang, Yihan <[email protected]>
1 parent 1cfa1cb commit 4fcf105

File tree

3 files changed

+78
-1
lines changed

3 files changed

+78
-1
lines changed

features/feature_case/asm/asm_mem.cu

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// ===------- asm_mem.cu ----------------------------------- *- CUDA -* ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//
8+
// ===---------------------------------------------------------------------===//
9+
10+
#include <cuda_runtime.h>
11+
#include <stdio.h>
12+
13+
__global__ void st(int *a) {
14+
asm volatile("st.global.s32 [%0], %1;" ::"l"(a), "r"(111));
15+
asm volatile("st.global.s32 [%0 + 4], %1;" ::"l"(a), "r"(222));
16+
asm volatile("st.global.s32 [%0 + 8], %1;" ::"l"(a), "r"(333));
17+
asm volatile("st.global.s32 [%0 + 12], %1;" ::"l"(a), "r"(444));
18+
}
19+
20+
bool test_store() {
21+
int *d_arr = nullptr;
22+
cudaMalloc(&d_arr, sizeof(int) * 4);
23+
st<<<1, 1>>>(d_arr);
24+
cudaStreamSynchronize(0);
25+
int h_arr[4], exp[] = {111, 222, 333, 444};
26+
cudaMemcpy(h_arr, d_arr, sizeof(h_arr), cudaMemcpyDeviceToHost);
27+
cudaFree(d_arr);
28+
for (int i = 0; i < 4; ++i)
29+
if (h_arr[i] != exp[i])
30+
return false;
31+
return true;
32+
}
33+
34+
__global__ void ld(int *arr, int *arr2) {
35+
int a, b, c, d;
36+
asm volatile("ld.global.s32 %0, [%1];" : "=r"(a) : "l"(arr));
37+
asm volatile("ld.global.s32 %0, [%1 + 4];" : "=r"(b) : "l"(arr));
38+
asm volatile("ld.global.s32 %0, [%1 + 8];" : "=r"(c) : "l"(arr));
39+
asm volatile("ld.global.s32 %0, [%1 + 12];" : "=r"(d) : "l"(arr));
40+
asm volatile("st.global.s32 [%0], %1;" ::"l"(arr2), "r"(a));
41+
asm volatile("st.global.s32 [%0 + 4], %1;" ::"l"(arr2), "r"(b));
42+
asm volatile("st.global.s32 [%0 + 8], %1;" ::"l"(arr2), "r"(c));
43+
asm volatile("st.global.s32 [%0 + 12], %1;" ::"l"(arr2), "r"(d));
44+
}
45+
46+
bool test_load() {
47+
int h_arr[4], exp[] = {111, 222, 333, 444};
48+
int *d_arr = nullptr, *d_arr2 = nullptr;
49+
cudaMalloc(&d_arr, sizeof(int) * 4);
50+
cudaMalloc(&d_arr2, sizeof(int) * 4);
51+
cudaMemcpy(d_arr, exp, sizeof(exp), cudaMemcpyHostToDevice);
52+
ld<<<1, 1>>>(d_arr, d_arr2);
53+
cudaStreamSynchronize(0);
54+
cudaMemcpy(h_arr, d_arr2, sizeof(h_arr), cudaMemcpyDeviceToHost);
55+
for (int i = 0; i < 4; ++i)
56+
if (h_arr[i] != exp[i])
57+
return false;
58+
return true;
59+
}
60+
61+
#define TEST(FN) \
62+
{ \
63+
if (FN()) { \
64+
printf("Test " #FN " PASS\n"); \
65+
} else { \
66+
printf("Test " #FN " FAIL\n"); \
67+
return 1; \
68+
} \
69+
}
70+
71+
int main() {
72+
TEST(test_store);
73+
TEST(test_load);
74+
75+
return 0;
76+
}

features/features.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
<tests>
77
<test testName="asm" configFile="config/TEMPLATE_asm.xml" />
88
<test testName="asm_bar" configFile="config/TEMPLATE_asm.xml" />
9+
<test testName="asm_mem" configFile="config/TEMPLATE_asm.xml" />
910
<test testName="asm_vinst" configFile="config/TEMPLATE_asm.xml" />
1011
<test testName="asm_v2inst" configFile="config/TEMPLATE_asm.xml" />
1112
<test testName="asm_v4inst" configFile="config/TEMPLATE_asm.xml" />

features/test_feature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from test_utils import *
2020

21-
exec_tests = ['asm', 'asm_bar', 'asm_arith', 'asm_vinst', 'asm_v2inst', 'asm_v4inst', 'asm_optimize', 'thrust-vector-2', 'thrust-binary-search', 'thrust-count', 'thrust-copy',
21+
exec_tests = ['asm', 'asm_bar', 'asm_mem', 'asm_arith', 'asm_vinst', 'asm_v2inst', 'asm_v4inst', 'asm_optimize', 'thrust-vector-2', 'thrust-binary-search', 'thrust-count', 'thrust-copy',
2222
'thrust-qmc', 'thrust-transform-if', 'thrust-policy', 'thrust-list', 'module-kernel',
2323
'kernel-launch', 'thrust-gather', 'thrust-gather_if', 'cub_device_partition',
2424
'thrust-scatter', 'thrust-unique_by_key_copy', 'thrust-for-hypre', 'thrust-merge_by_key',

0 commit comments

Comments
 (0)