Skip to content

Commit 90a7741

Browse files
committed
Address comments
1 parent fff1f42 commit 90a7741

File tree

1 file changed

+29
-21
lines changed

1 file changed

+29
-21
lines changed

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,38 @@
22
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
33
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=100 -S < %s | FileCheck -check-prefix=PRELOAD %s
44

5-
define amdgpu_kernel void @incompatible_attribute_block_count_x(ptr addrspace(1) byref(i32) %out) {
5+
define amdgpu_kernel void @incompatible_attribute_block_count_x(ptr addrspace(1) %out, ptr addrspace(1) byref(i32) %arg) {
66
; NO-PRELOAD-LABEL: define {{[^@]+}}@incompatible_attribute_block_count_x
7-
; NO-PRELOAD-SAME: (ptr addrspace(1) byref(i32) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
8-
; NO-PRELOAD-NEXT: [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
9-
; NO-PRELOAD-NEXT: [[OUT_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
10-
; NO-PRELOAD-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[OUT_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
7+
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) byref(i32) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; NO-PRELOAD-NEXT: [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
9+
; NO-PRELOAD-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
10+
; NO-PRELOAD-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]]
11+
; NO-PRELOAD-NEXT: [[ARG_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 8
12+
; NO-PRELOAD-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[ARG_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
1113
; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
12-
; NO-PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
13-
; NO-PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[TMP1]], align 4
14+
; NO-PRELOAD-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
15+
; NO-PRELOAD-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
16+
; NO-PRELOAD-NEXT: [[ADD:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
17+
; NO-PRELOAD-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1418
; NO-PRELOAD-NEXT: ret void
1519
;
1620
; PRELOAD-LABEL: define {{[^@]+}}@incompatible_attribute_block_count_x
17-
; PRELOAD-SAME: (ptr addrspace(1) byref(i32) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
18-
; PRELOAD-NEXT: [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
19-
; PRELOAD-NEXT: [[OUT_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
20-
; PRELOAD-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[OUT_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
21+
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], ptr addrspace(1) byref(i32) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
22+
; PRELOAD-NEXT: [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
23+
; PRELOAD-NEXT: [[ARG_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[INCOMPATIBLE_ATTRIBUTE_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 8
24+
; PRELOAD-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[ARG_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
2125
; PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
22-
; PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
23-
; PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[TMP1]], align 4
26+
; PRELOAD-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
27+
; PRELOAD-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
28+
; PRELOAD-NEXT: [[ADD:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
29+
; PRELOAD-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT]], align 4
2430
; PRELOAD-NEXT: ret void
2531
;
2632
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
27-
%load = load i32, ptr addrspace(4) %imp_arg_ptr
28-
store i32 %load, ptr addrspace(1) %out
33+
%load0 = load i32, ptr addrspace(4) %imp_arg_ptr
34+
%load1 = load i32, ptr addrspace(1) %arg
35+
%add = add i32 %load0, %load1
36+
store i32 %add, ptr addrspace(1) %out
2937
ret void
3038
}
3139

@@ -34,7 +42,7 @@ define amdgpu_kernel void @preload_aggregate_arg_block_count_x(ptr addrspace(1)
3442
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], { i32, i32 } inreg [[TMP0:%.*]]) #[[ATTR0]] {
3543
; NO-PRELOAD-NEXT: [[PRELOAD_AGGREGATE_ARG_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
3644
; NO-PRELOAD-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[PRELOAD_AGGREGATE_ARG_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
37-
; NO-PRELOAD-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]]
45+
; NO-PRELOAD-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
3846
; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
3947
; NO-PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
4048
; NO-PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
@@ -104,10 +112,10 @@ define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) %ou
104112
ret void
105113
}
106114

107-
define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, i512 inreg) {
115+
define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, <16 x i32> inreg) {
108116
; NO-PRELOAD-LABEL: define {{[^@]+}}@no_free_sgprs_block_count_x
109-
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], i512 inreg [[TMP0:%.*]]) #[[ATTR0]] {
110-
; NO-PRELOAD-NEXT: [[NO_FREE_SGPRS_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(328) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
117+
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], <16 x i32> inreg [[TMP0:%.*]]) #[[ATTR0]] {
118+
; NO-PRELOAD-NEXT: [[NO_FREE_SGPRS_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
111119
; NO-PRELOAD-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NO_FREE_SGPRS_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
112120
; NO-PRELOAD-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
113121
; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -116,8 +124,8 @@ define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, i5
116124
; NO-PRELOAD-NEXT: ret void
117125
;
118126
; PRELOAD-LABEL: define {{[^@]+}}@no_free_sgprs_block_count_x
119-
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], i512 inreg [[TMP0:%.*]]) #[[ATTR0]] {
120-
; PRELOAD-NEXT: [[NO_FREE_SGPRS_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(328) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
127+
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], <16 x i32> inreg [[TMP0:%.*]]) #[[ATTR0]] {
128+
; PRELOAD-NEXT: [[NO_FREE_SGPRS_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
121129
; PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
122130
; PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
123131
; PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT]], align 4

0 commit comments

Comments
 (0)