Skip to content

Commit 08226aa

Browse files
committed
More tests per review comments
1 parent 4ea71e1 commit 08226aa

File tree

1 file changed

+114
-11
lines changed

1 file changed

+114
-11
lines changed

llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Lines changed: 114 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
target datalayout = "A5"
66

7+
declare void @llvm.fake.use(...)
8+
79
define amdgpu_kernel void @kern_noargs() {
810
; GCN-LABEL: @kern_noargs(
911
; GCN-NEXT: ret void
@@ -260,17 +262,17 @@ define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %a
260262
; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
261263
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
262264
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
263-
; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
265+
; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
264266
; HSA-NEXT: ret void
265267
;
266268
; MESA-LABEL: @kern_range_noundef_i32(
267269
; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
268270
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
269271
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
270-
; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
272+
; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
271273
; MESA-NEXT: ret void
272274
;
273-
store volatile i32 %arg0, ptr addrspace(1) poison
275+
call void (...) @llvm.fake.use(i32 %arg0)
274276
ret void
275277
}
276278

@@ -1728,6 +1730,105 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
17281730
ret void
17291731
}
17301732

1733+
define amdgpu_kernel void @noundef_f32(float noundef %arg0) {
1734+
; HSA-LABEL: @noundef_f32(
1735+
; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1736+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0
1737+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1738+
; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1739+
; HSA-NEXT: ret void
1740+
;
1741+
; MESA-LABEL: @noundef_f32(
1742+
; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1743+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36
1744+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1745+
; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1746+
; MESA-NEXT: ret void
1747+
;
1748+
call void (...) @llvm.fake.use(float %arg0)
1749+
ret void
1750+
}
1751+
1752+
define amdgpu_kernel void @noundef_f16(half noundef %arg0) {
1753+
; HSA-LABEL: @noundef_f16(
1754+
; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1755+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0
1756+
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1757+
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1758+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1759+
; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1760+
; HSA-NEXT: ret void
1761+
;
1762+
; MESA-LABEL: @noundef_f16(
1763+
; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1764+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36
1765+
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1766+
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1767+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1768+
; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1769+
; MESA-NEXT: ret void
1770+
;
1771+
call void (...) @llvm.fake.use(half %arg0)
1772+
ret void
1773+
}
1774+
1775+
define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) {
1776+
; HSA-LABEL: @noundef_v2i32(
1777+
; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1778+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0
1779+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1780+
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1781+
; HSA-NEXT: ret void
1782+
;
1783+
; MESA-LABEL: @noundef_v2i32(
1784+
; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1785+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36
1786+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1787+
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1788+
; MESA-NEXT: ret void
1789+
;
1790+
call void (...) @llvm.fake.use(<2 x i32> %arg0)
1791+
ret void
1792+
}
1793+
1794+
define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) {
1795+
; HSA-LABEL: @noundef_p0(
1796+
; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1797+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0
1798+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1799+
; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1800+
; HSA-NEXT: ret void
1801+
;
1802+
; MESA-LABEL: @noundef_p0(
1803+
; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1804+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36
1805+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1806+
; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1807+
; MESA-NEXT: ret void
1808+
;
1809+
call void (...) @llvm.fake.use(ptr %arg0)
1810+
ret void
1811+
}
1812+
1813+
define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) {
1814+
; HSA-LABEL: @noundef_v2p0(
1815+
; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1816+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0
1817+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1818+
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1819+
; HSA-NEXT: ret void
1820+
;
1821+
; MESA-LABEL: @noundef_v2p0(
1822+
; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1823+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36
1824+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1825+
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1826+
; MESA-NEXT: ret void
1827+
;
1828+
call void (...) @llvm.fake.use(<2 x ptr> %arg0)
1829+
ret void
1830+
}
1831+
17311832
attributes #0 = { nounwind "target-cpu"="kaveri" }
17321833
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
17331834
attributes #2 = { nounwind "target-cpu"="tahiti" }
@@ -1736,15 +1837,17 @@ attributes #2 = { nounwind "target-cpu"="tahiti" }
17361837
!llvm.module.flags = !{!0}
17371838
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
17381839
;.
1739-
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1740-
; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1741-
; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1742-
; HSA: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1840+
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1841+
; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1842+
; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1843+
; HSA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1844+
; HSA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
17431845
;.
1744-
; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1745-
; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1746-
; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1747-
; MESA: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1846+
; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1847+
; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1848+
; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1849+
; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1850+
; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
17481851
;.
17491852
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
17501853
; HSA: [[META1]] = !{}

0 commit comments

Comments
 (0)