@@ -390,15 +390,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
390390; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
391391; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
392392; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
393- ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
394393; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
395394; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
396395; AMDGPU: worker_state_machine.finished:
397396; AMDGPU-NEXT: ret void
398397; AMDGPU: worker_state_machine.is_active.check:
399398; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
400399; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
401- ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
400+ ; AMDGPU-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
402401; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
403402; AMDGPU: worker_state_machine.parallel_region.end:
404403; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -451,15 +450,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
451450; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
452451; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
453452; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
454- ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
455453; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
456454; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
457455; NVPTX: worker_state_machine.finished:
458456; NVPTX-NEXT: ret void
459457; NVPTX: worker_state_machine.is_active.check:
460458; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
461459; NVPTX: worker_state_machine.parallel_region.fallback.execute:
462- ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
460+ ; NVPTX-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
463461; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
464462; NVPTX: worker_state_machine.parallel_region.end:
465463; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -736,15 +734,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
736734; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
737735; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
738736; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
739- ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
740737; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
741738; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
742739; AMDGPU: worker_state_machine.finished:
743740; AMDGPU-NEXT: ret void
744741; AMDGPU: worker_state_machine.is_active.check:
745742; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
746743; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
747- ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
744+ ; AMDGPU-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
748745; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
749746; AMDGPU: worker_state_machine.parallel_region.end:
750747; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -796,15 +793,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
796793; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
797794; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
798795; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
799- ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
800796; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
801797; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
802798; NVPTX: worker_state_machine.finished:
803799; NVPTX-NEXT: ret void
804800; NVPTX: worker_state_machine.is_active.check:
805801; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
806802; NVPTX: worker_state_machine.parallel_region.fallback.execute:
807- ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
803+ ; NVPTX-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
808804; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
809805; NVPTX: worker_state_machine.parallel_region.end:
810806; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
0 commit comments