3838// AMDGPU-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR7]]
3939// AMDGPU-NEXT: call void @__gpu_sync_threads() #[[ATTR7]]
4040// AMDGPU-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR7]]
41- // AMDGPU-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR7]]
41+ // AMDGPU-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0 ) #[[ATTR7]]
4242// AMDGPU-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR7]]
4343// AMDGPU-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR7]]
4444// AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]]
7070// NVPTX-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR6]]
7171// NVPTX-NEXT: call void @__gpu_sync_threads() #[[ATTR6]]
7272// NVPTX-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]]
73- // NVPTX-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR6]]
73+ // NVPTX-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0 ) #[[ATTR6]]
7474// NVPTX-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR6]]
7575// NVPTX-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR6]]
7676// NVPTX-NEXT: call void @__gpu_exit() #[[ATTR7:[0-9]+]]
@@ -90,6 +90,68 @@ __gpu_kernel void foo() {
9090 __gpu_num_threads_z ();
9191 __gpu_num_threads (0 );
9292 __gpu_thread_id_x ();
93+ // AMDGPU-LABEL: define internal i32 @__gpu_thread_id(
94+ // AMDGPU-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
95+ // AMDGPU-NEXT: [[ENTRY:.*:]]
96+ // AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
97+ // AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
98+ // AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
99+ // AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
100+ // AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
101+ // AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
102+ // AMDGPU-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
103+ // AMDGPU-NEXT: i32 0, label %[[SW_BB:.*]]
104+ // AMDGPU-NEXT: i32 1, label %[[SW_BB1:.*]]
105+ // AMDGPU-NEXT: i32 2, label %[[SW_BB3:.*]]
106+ // AMDGPU-NEXT: ]
107+ // AMDGPU: [[SW_BB]]:
108+ // AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR7]]
109+ // AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
110+ // AMDGPU-NEXT: br label %[[RETURN:.*]]
111+ // AMDGPU: [[SW_BB1]]:
112+ // AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR7]]
113+ // AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
114+ // AMDGPU-NEXT: br label %[[RETURN]]
115+ // AMDGPU: [[SW_BB3]]:
116+ // AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR7]]
117+ // AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
118+ // AMDGPU-NEXT: br label %[[RETURN]]
119+ // AMDGPU: [[SW_DEFAULT]]:
120+ // AMDGPU-NEXT: unreachable
121+ // AMDGPU: [[RETURN]]:
122+ // AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
123+ // AMDGPU-NEXT: ret i32 [[TMP1]]
124+ //
125+ // NVPTX-LABEL: define internal i32 @__gpu_thread_id(
126+ // NVPTX-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
127+ // NVPTX-NEXT: [[ENTRY:.*:]]
128+ // NVPTX-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
129+ // NVPTX-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
130+ // NVPTX-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
131+ // NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
132+ // NVPTX-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
133+ // NVPTX-NEXT: i32 0, label %[[SW_BB:.*]]
134+ // NVPTX-NEXT: i32 1, label %[[SW_BB1:.*]]
135+ // NVPTX-NEXT: i32 2, label %[[SW_BB3:.*]]
136+ // NVPTX-NEXT: ]
137+ // NVPTX: [[SW_BB]]:
138+ // NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]]
139+ // NVPTX-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
140+ // NVPTX-NEXT: br label %[[RETURN:.*]]
141+ // NVPTX: [[SW_BB1]]:
142+ // NVPTX-NEXT: [[CALL2:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]]
143+ // NVPTX-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
144+ // NVPTX-NEXT: br label %[[RETURN]]
145+ // NVPTX: [[SW_BB3]]:
146+ // NVPTX-NEXT: [[CALL4:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]]
147+ // NVPTX-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
148+ // NVPTX-NEXT: br label %[[RETURN]]
149+ // NVPTX: [[SW_DEFAULT]]:
150+ // NVPTX-NEXT: unreachable
151+ // NVPTX: [[RETURN]]:
152+ // NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
153+ // NVPTX-NEXT: ret i32 [[TMP1]]
154+ //
93155 __gpu_thread_id_y ();
94156 __gpu_thread_id_z ();
95157 __gpu_thread_id (0 );
@@ -100,7 +162,7 @@ __gpu_kernel void foo() {
100162 __gpu_ballot (-1 , 1 );
101163 __gpu_sync_threads ();
102164 __gpu_sync_lane (-1 );
103- __gpu_shuffle_idx_u32 (-1 , -1 , -1 );
165+ __gpu_shuffle_idx_u32 (-1 , -1 , -1 , 0 );
104166 __gpu_first_lane_id (-1 );
105167 __gpu_is_first_in_lane (-1 );
106168 __gpu_exit ();
0 commit comments