From 1e7625eb63cb7cfed620c2709b2c6fe29121e0a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 12 Mar 2025 14:34:37 +0700 Subject: [PATCH] AMDGPU: Remove amdhsa_code_object_version module flags from most tests These were added to the migration from v4 to v5 and should be removed now that the default has changed. --- .../GlobalISel/crash-stack-address-O0.ll | 2 - .../GlobalISel/irtranslator-assert-align.ll | 2 - .../GlobalISel/irtranslator-atomicrmw.ll | 2 - .../irtranslator-call-abi-attribute-hints.ll | 2 - .../irtranslator-call-return-values.ll | 2 - .../GlobalISel/irtranslator-call-sret.ll | 2 - .../AMDGPU/GlobalISel/irtranslator-call.ll | 2 - .../irtranslator-constant-fold-vector-op.ll | 2 - .../GlobalISel/irtranslator-indirect-call.ll | 2 - .../GlobalISel/irtranslator-sibling-call.ll | 2 - .../GlobalISel/irtranslator-tail-call.ll | 2 - .../AMDGPU/GlobalISel/non-entry-alloca.ll | 2 - .../abi-attribute-hints-undefined-behavior.ll | 2 - .../AMDGPU/addrspacecast-constantexpr.ll | 4 - .../test/CodeGen/AMDGPU/addrspacecast.gfx6.ll | 2 - .../amdgpu-simplify-libcall-pow-codegen.ll | 2 - .../annotate-kernel-features-hsa-call.ll | 6 - .../AMDGPU/annotate-kernel-features-hsa.ll | 4 - ...der-no-live-segment-at-def-implicit-def.ll | 2 - .../branch-folding-implicit-def-subreg.ll | 2 - .../AMDGPU/call-alias-register-usage-agpr.ll | 2 - .../AMDGPU/call-alias-register-usage0.ll | 2 - .../AMDGPU/call-alias-register-usage1.ll | 2 - .../AMDGPU/call-alias-register-usage2.ll | 2 - .../AMDGPU/call-alias-register-usage3.ll | 2 - llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 2 - .../CodeGen/AMDGPU/call-argument-types.ll | 2 - llvm/test/CodeGen/AMDGPU/call-waitcnt.ll | 2 - .../callee-special-input-sgprs-fixed-abi.ll | 2 - llvm/test/CodeGen/AMDGPU/cc-update.ll | 2 - .../CodeGen/AMDGPU/cf-loop-on-constant.ll | 2 - llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 2 - .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 2 - llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 2 - .../CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll | 2 - llvm/test/CodeGen/AMDGPU/ds_read2.ll | 2 - llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll | 2 - llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll | 2 - llvm/test/CodeGen/AMDGPU/fneg-fabs.ll | 2 - .../AMDGPU/gfx11-user-sgpr-init16-bug.ll | 2 - .../AMDGPU/global_atomics_scan_fadd.ll | 2 - .../AMDGPU/global_atomics_scan_fmax.ll | 2 - .../AMDGPU/global_atomics_scan_fmin.ll | 2 - .../AMDGPU/global_atomics_scan_fsub.ll | 2 - .../implicit-kernel-argument-alignment.ll | 2 - .../AMDGPU/indirect-addressing-term.ll | 2 - .../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 2 - .../kernel-vgpr-spill-mubuf-with-voffset.ll | 2 - llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll | 2 - .../CodeGen/AMDGPU/llvm.amdgcn.is.private.ll | 2 - .../CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll | 2 - llvm/test/CodeGen/AMDGPU/lower-kernargs.ll | 404 +++++++++--------- .../AMDGPU/module-lds-false-sharing.ll | 2 - .../AMDGPU/need-fp-from-vgpr-spills.ll | 2 - .../AMDGPU/partial-sgpr-to-vgpr-spills.ll | 2 - .../AMDGPU/preserve-wwm-copy-dst-reg.ll | 2 - .../AMDGPU/promote-alloca-calling-conv.ll | 2 - .../CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll | 2 - .../sgpr-spill-update-only-slot-indexes.ll | 2 - .../CodeGen/AMDGPU/simple-indirect-call.ll | 5 - llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll | 2 - llvm/test/CodeGen/AMDGPU/spill-m0.ll | 2 - .../CodeGen/AMDGPU/stacksave_stackrestore.ll | 2 - .../AMDGPU/tuple-allocation-failure.ll | 2 - .../AMDGPU/unstructured-cfg-def-use-issue.ll | 2 - .../AMDGPU/vgpr-spill-placement-issue61083.ll | 2 - llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 2 - 67 files changed, 200 insertions(+), 347 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll index 515b9f8955d59..8efe711c9267c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -26,5 +26,3 @@ entry: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll index fc3eb19e98524..a81ce316081d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -210,5 +210,3 @@ entry: ret ptr addrspace(1) %call } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 4ee658666a1b7..a439226fbc5fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -178,5 +178,3 @@ define <2 x half> @test_atomicrmw_fmax_vector(ptr addrspace(3) %addr) { ret <2 x half> %oldval } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 31c08a3479bb3..a12ee14aee427 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -227,5 +227,3 @@ define void @func_call_no_other_sgprs() { ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 7c54d303e91a3..0b6fe90b90654 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -3013,5 +3013,3 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index c13c6e96d7e7d..2910d35e3e495 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -90,5 +90,3 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 4e7688539423b..92106d7e1d60d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -6124,5 +6124,3 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll index 2ba0979c72533..1e033f24994b2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -23,5 +23,3 @@ entry: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index 951be00a124c7..ac0d5ee78666e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -74,5 +74,3 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 8813462652efd..c18c96d9c50c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1488,5 +1488,3 @@ entry: attributes #0 = { nounwind } attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index 7be77f403a214..d3a6f70287ca5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -43,5 +43,3 @@ define void @tail_call_void_func_void() { ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index 76066e3b4bf52..8bb060fc4ed54 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -273,8 +273,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone speculatable } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ASSUME1024: {{.*}} ; DEFAULTSIZE: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index c78f0a4eb61e9..3194581fa4213 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -418,5 +418,3 @@ declare void @llvm.debugtrap() attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 26b3c98e85cde..42c7b90da63d3 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -167,12 +167,8 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. -; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -;. diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll index 4e0fc580afdd0..35d0214a6d48d 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll @@ -207,5 +207,3 @@ define ptr addrspace(6) @addrspacecast_flat_null_to_constant32bit() { attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 5bda853b76727..0329f23ea434f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -769,5 +769,3 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ret double %call } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index 8389a8e86cb44..dfa645c8379d5 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -689,8 +689,6 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } attributes #6 = { "enqueued-block" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } @@ -722,7 +720,3 @@ attributes #6 = { "enqueued-block" } ; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" } ;. -; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -;. -; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 0e387716dda99..6f7193064641c 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -472,8 +472,6 @@ define void @use_alloca_func() #1 { attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. @@ -492,5 +490,3 @@ attributes #1 = { nounwind } ; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. -; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -;. diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll index a6af63b816573..ad0d6d8016ad6 100644 --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -126,5 +126,3 @@ kernel_direct_lighting.exit: ; preds = %if.end294.i.i, %ent declare float @_Z3dotDv3_fS_(<3 x float>) -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 266216c4d8b50..d51e47bfb8d4f 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -1294,5 +1294,3 @@ declare void @f2(i64) declare i32 @llvm.amdgcn.workitem.id.x() -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll index 9de6aea9385df..2e79d8bab46a6 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll @@ -34,5 +34,3 @@ attributes #0 = { noinline norecurse nounwind optnone } attributes #1 = { noinline norecurse nounwind readnone willreturn } attributes #2 = { nounwind readnone willreturn } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll index f719f50ef6f13..337da5d0ecbe0 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll @@ -29,5 +29,3 @@ attributes #0 = { noinline norecurse nounwind optnone } attributes #1 = { noinline norecurse nounwind readnone willreturn } attributes #2 = { nounwind readnone willreturn } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll index fe27859eb0afd..075eddd2763d3 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll @@ -35,5 +35,3 @@ attributes #0 = { noinline norecurse nounwind optnone } attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" } attributes #2 = { nounwind readnone willreturn } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll index 35b67351e85dd..4fd181d3c0f43 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll @@ -32,5 +32,3 @@ attributes #0 = { noinline norecurse nounwind optnone } attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" } attributes #2 = { nounwind readnone willreturn } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll index 3674d740b987b..00f72d5d8b1dd 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll @@ -32,5 +32,3 @@ attributes #0 = { noinline norecurse nounwind optnone } attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" } attributes #2 = { nounwind readnone willreturn } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index c10cb0ae6d336..d1cede64ce71d 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -1529,5 +1529,3 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 6be80d2f5957b..951c846379485 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -7203,5 +7203,3 @@ attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amd attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll index 15152409eacf9..9abb50651146a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll @@ -156,5 +156,3 @@ declare void @got.func(i32) #0 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll index 10ca3c9d5f2c8..67f2eb113d880 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -584,5 +584,3 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind noinline } attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll index fc24041fe771a..ee1feccc6334c 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -610,5 +610,3 @@ attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-num-vgpr"="8" } attributes #2 = { nounwind "frame-pointer"="all" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index 7ce69fe2f4989..f78cb0daee5c9 100644 --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -473,5 +473,3 @@ for.body: br i1 %cond, label %for.body, label %for.exit } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index a60a16c9be47e..b93750114e191 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1391,5 +1391,3 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind convergent } attributes #2 = { nounwind } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index b3ef8ed129d33..93cb11b82e0ed 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -286,5 +286,3 @@ declare hidden { <4 x i32>, <4 x half> } @func_struct() #0 attributes #0 = { nounwind} -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 2ec6f7ab7602b..507af541fe145 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -3015,5 +3015,3 @@ for.body.i: ; preds = %for.body.i, %entry ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll index 012f33952f990..0bda7e4a5e49a 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -95,5 +95,3 @@ out.else: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 7bfd9ab8cadb2..06c30dfd36033 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1535,5 +1535,3 @@ attributes #1 = { nounwind readnone speculatable } attributes #2 = { convergent nounwind } attributes #3 = { nounwind noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index c3694f3b92fb4..37cbd2d926413 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -278,5 +278,3 @@ define amdgpu_kernel void @kernel_no_calls_no_stack() { attributes #0 = { nounwind } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll index 4f5271ed23252..caf7a845759fc 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll @@ -176,5 +176,3 @@ declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll index 17e509acfb6e6..1fa9bfa3cfa3f 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -276,7 +276,5 @@ declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; FUNC: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll index 3685ac18ab4d4..19d301144c2b5 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll @@ -196,5 +196,3 @@ attributes #1 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-ld attributes #2 = { "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 9d5b324b271cb..82c58394c03bb 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -14728,6 +14728,4 @@ attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #2 = { strictfp } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} !1 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index aad38411d12e6..f8f911b693e09 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -9465,7 +9465,5 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_system_scope attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} !1 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index 478daf363820f..1f76a476107a3 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -9465,6 +9465,4 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_system_scope attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} !1 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 66f8c2de30530..9db3c37045ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -14362,5 +14362,3 @@ attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #2 = { strictfp } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll index 3762ed5a3bfec..06c902a3cac47 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll @@ -57,5 +57,3 @@ define amdgpu_kernel void @test_aligned_to_eight(i64 %eight) { declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index c664c5ccab431..3964207792163 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -110,5 +110,3 @@ entry: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index 44b1bb25bc057..1d3463ea5bfea 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -257,5 +257,3 @@ bb43: attributes #0 = { noinline optnone } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index 77a76387d1049..0681263b7428e 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -92,5 +92,3 @@ declare void @device_func(ptr addrspace(5)) attributes #0 = { nounwind "frame-pointer"="all" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll index 2f694def5396b..04abb75c3f912 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll @@ -696,5 +696,3 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i attributes #0 = { noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll index 4edd0357c6e7a..9e1815b48abfd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll @@ -240,8 +240,6 @@ bb1: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CI: {{.*}} ; GFX10-GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll index 9d078f7906b4d..c364c391559ea 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll @@ -307,8 +307,6 @@ bb1: ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CI: {{.*}} ; GFX10-GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll index 119beb958f1bd..e970027ca5d40 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -17,7 +17,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 { ; HSA-LABEL: @kern_i8( ; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1:![0-9]+]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0:![0-9]+]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -25,7 +25,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 { ; MESA-LABEL: @kern_i8( ; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1:![0-9]+]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0:![0-9]+]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -38,7 +38,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 { ; HSA-LABEL: @kern_i16( ; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -46,7 +46,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 { ; MESA-LABEL: @kern_i16( ; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -59,7 +59,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 { ; HSA-LABEL: @kern_f16( ; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; HSA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) poison, align 1 @@ -68,7 +68,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 { ; MESA-LABEL: @kern_f16( ; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; MESA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) poison, align 1 @@ -82,7 +82,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { ; HSA-LABEL: @kern_zeroext_i8( ; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -90,7 +90,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { ; MESA-LABEL: @kern_zeroext_i8( ; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -103,7 +103,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { ; HSA-LABEL: @kern_zeroext_i16( ; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -111,7 +111,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { ; MESA-LABEL: @kern_zeroext_i16( ; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -124,7 +124,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { ; HSA-LABEL: @kern_signext_i8( ; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -132,7 +132,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { ; MESA-LABEL: @kern_signext_i8( ; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -145,7 +145,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { ; HSA-LABEL: @kern_signext_i16( ; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; HSA-NEXT: ret void @@ -153,7 +153,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { ; MESA-LABEL: @kern_signext_i16( ; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) poison, align 1 ; MESA-NEXT: ret void @@ -166,10 +166,10 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { ; HSA-LABEL: @kern_i8_i8( ; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -179,10 +179,10 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { ; MESA-LABEL: @kern_i8_i8( ; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -198,7 +198,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { ; HSA-LABEL: @kern_v3i8( ; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> ; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) poison, align 4 @@ -207,7 +207,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { ; MESA-LABEL: @kern_v3i8( ; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> ; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) poison, align 4 @@ -221,7 +221,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) { ; HSA-LABEL: @kern_i24( ; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 ; HSA-NEXT: store i24 [[TMP2]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void @@ -229,7 +229,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) { ; MESA-LABEL: @kern_i24( ; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 ; MESA-NEXT: store i24 [[TMP2]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void @@ -242,14 +242,14 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) { ; HSA-LABEL: @kern_i32( ; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i32( ; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -261,14 +261,14 @@ define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %a ; HSA-LABEL: @kern_range_noundef_i32( ; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG1:![0-9]+]], !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]]) ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_range_noundef_i32( ; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]]) ; MESA-NEXT: ret void ; @@ -280,14 +280,14 @@ define amdgpu_kernel void @kern_f32(float %arg0) { ; HSA-LABEL: @kern_f32( ; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_f32( ; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -299,7 +299,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { ; HSA-LABEL: @kern_v3i32( ; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void @@ -307,7 +307,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { ; MESA-LABEL: @kern_v3i32( ; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void @@ -320,14 +320,14 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { ; HSA-LABEL: @kern_v8i32( ; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) poison, align 32 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v8i32( ; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) poison, align 32 ; MESA-NEXT: ret void ; @@ -339,14 +339,14 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { ; HSA-LABEL: @kern_v8i64( ; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) poison, align 64 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v8i64( ; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) poison, align 64 ; MESA-NEXT: ret void ; @@ -358,14 +358,14 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { ; HSA-LABEL: @kern_v16i64( ; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) poison, align 128 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v16i64( ; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) poison, align 128 ; MESA-NEXT: ret void ; @@ -377,9 +377,9 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { ; HSA-LABEL: @kern_i32_v3i32( ; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16 -; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 @@ -388,9 +388,9 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { ; MESA-LABEL: @kern_i32_v3i32( ; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52 -; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 @@ -408,14 +408,14 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { ; HSA-LABEL: @kern_struct_a( ; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_struct_a( ; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -427,14 +427,14 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { ; HSA-LABEL: @kern_struct_b_packed( ; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) poison, align 16 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_struct_b_packed( ; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) poison, align 16 ; MESA-NEXT: ret void ; @@ -446,14 +446,14 @@ define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 { ; HSA-LABEL: @kern_implicit_arg_num_bytes( ; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_implicit_arg_num_bytes( ; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -465,14 +465,14 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a ; HSA-LABEL: @kernel_implicitarg_no_struct_align( ; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64 -; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kernel_implicitarg_no_struct_align( ; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100 -; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -484,14 +484,14 @@ define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 { ; HSA-LABEL: @kern_lds_ptr( ; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr( ; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4 ; MESA-NEXT: ret void ; @@ -513,10 +513,10 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_i8( ; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -526,10 +526,10 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { ; MESA-LABEL: @kern_realign_i8_i8( ; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -545,14 +545,14 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) # ; HSA-LABEL: @kern_realign_i8_i8_i8( ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -563,14 +563,14 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) # ; MESA-LABEL: @kern_realign_i8_i8_i8( ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -588,18 +588,18 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8( ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -611,18 +611,18 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8( ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -642,10 +642,10 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_v3i8( ; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -655,10 +655,10 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { ; MESA-LABEL: @kern_realign_i8_v3i8( ; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -674,10 +674,10 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_i16( ; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -687,10 +687,10 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { ; MESA-LABEL: @kern_realign_i8_i16( ; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -706,10 +706,10 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_i1( ; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -719,10 +719,10 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { ; MESA-LABEL: @kern_realign_i1_i1( ; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -738,14 +738,14 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) # ; HSA-LABEL: @kern_realign_i1_i1_i1( ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -756,14 +756,14 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) # ; MESA-LABEL: @kern_realign_i1_i1_i1( ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -781,18 +781,18 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2 ; HSA-LABEL: @kern_realign_i1_i1_i1_i1( ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -804,18 +804,18 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2 ; MESA-LABEL: @kern_realign_i1_i1_i1_i1( ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -835,10 +835,10 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_v3i1( ; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1> @@ -849,10 +849,10 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { ; MESA-LABEL: @kern_realign_i1_v3i1( ; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1> @@ -869,10 +869,10 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_i16( ; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -882,10 +882,10 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { ; MESA-LABEL: @kern_realign_i1_i16( ; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -901,30 +901,30 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 ; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 -; HSA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 ; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 ; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 -; HSA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 ; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 ; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 -; HSA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 ; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -939,30 +939,30 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 ; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 -; MESA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] +; MESA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META0]] ; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 ; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 ; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 -; MESA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] +; MESA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META0]] ; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 ; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 ; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 -; MESA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] +; MESA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META0]] ; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 ; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) poison, align 1 @@ -988,11 +988,11 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { ; HSA-LABEL: @kern_realign_f16_f16( ; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half @@ -1003,11 +1003,11 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { ; MESA-LABEL: @kern_realign_f16_f16( ; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half @@ -1024,14 +1024,14 @@ define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 { ; HSA-LABEL: @kern_global_ptr( ; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr( ; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; MESA-NEXT: ret void ; @@ -1043,14 +1043,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref ; HSA-LABEL: @kern_global_ptr_dereferencable( ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !dereferenceable [[META2:![0-9]+]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr_dereferencable( ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !dereferenceable [[META2:![0-9]+]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; MESA-NEXT: ret void ; @@ -1062,14 +1062,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace( ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null( ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !dereferenceable_or_null [[META3:![0-9]+]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null( ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !dereferenceable_or_null [[META3:![0-9]+]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; MESA-NEXT: ret void ; @@ -1081,14 +1081,14 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr ; HSA-LABEL: @kern_nonnull_global_ptr( ; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !nonnull [[META1]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !nonnull [[META0]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_nonnull_global_ptr( ; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !nonnull [[META1]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !nonnull [[META0]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; MESA-NEXT: ret void ; @@ -1100,14 +1100,14 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 % ; HSA-LABEL: @kern_align32_global_ptr( ; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !align [[META4:![0-9]+]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_align32_global_ptr( ; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !align [[META4:![0-9]+]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) poison, align 8 ; MESA-NEXT: ret void ; @@ -1141,14 +1141,14 @@ define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr ; HSA-LABEL: @kern_noundef_global_ptr( ; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_noundef_global_ptr( ; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8 ; MESA-NEXT: ret void ; @@ -1161,7 +1161,7 @@ define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { ; HSA-NEXT: entry: ; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1172,7 +1172,7 @@ define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { ; MESA-NEXT: entry: ; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1192,7 +1192,7 @@ define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { ; HSA-NEXT: entry: ; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1203,7 +1203,7 @@ define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { ; MESA-NEXT: entry: ; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1223,7 +1223,7 @@ define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { ; HSA-NEXT: entry: ; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1234,7 +1234,7 @@ define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { ; MESA-NEXT: entry: ; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1254,7 +1254,7 @@ define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { ; HSA-NEXT: entry: ; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 ; HSA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1265,7 +1265,7 @@ define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { ; MESA-NEXT: entry: ; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 ; MESA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4 @@ -1292,14 +1292,14 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { ; HSA-LABEL: @empty_struct_with_other( ; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @empty_struct_with_other( ; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) poison, align 4 ; MESA-NEXT: ret void ; @@ -1313,7 +1313,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { ; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) ; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4 ; HSA-NEXT: ret void ; @@ -1321,7 +1321,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { ; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) ; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4 ; MESA-NEXT: ret void ; @@ -1337,7 +1337,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { ; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) ; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5) ; HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4 ; HSA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4 @@ -1347,7 +1347,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { ; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) ; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5) ; MESA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4 ; MESA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4 @@ -1365,7 +1365,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out ; HSA-LABEL: @byref_constant_i8_arg( ; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1 ; HSA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32 @@ -1375,7 +1375,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out ; MESA-LABEL: @byref_constant_i8_arg( ; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1 ; MESA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32 @@ -1392,7 +1392,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou ; HSA-LABEL: @byref_constant_i16_arg( ; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2 ; HSA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32 @@ -1402,7 +1402,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou ; MESA-LABEL: @byref_constant_i16_arg( ; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2 ; MESA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32 @@ -1419,10 +1419,10 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou ; HSA-LABEL: @byref_constant_i32_arg( ; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12 -; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1431,10 +1431,10 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou ; MESA-LABEL: @byref_constant_i32_arg( ; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48 -; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1450,10 +1450,10 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture % ; HSA-LABEL: @byref_constant_v4i32_arg( ; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 16 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 32 -; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16 ; HSA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1462,10 +1462,10 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture % ; MESA-LABEL: @byref_constant_v4i32_arg( ; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 52 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 68 -; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16 ; MESA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1481,10 +1481,10 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu ; HSA-LABEL: @byref_align_constant_i32_arg( ; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 256 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 260 -; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1493,10 +1493,10 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu ; MESA-LABEL: @byref_align_constant_i32_arg( ; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 292 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 296 -; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 8, !invariant.load [[META1]] +; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1512,10 +1512,10 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace ; HSA-LABEL: @byref_natural_align_constant_v16i32_arg( ; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 64 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 128 -; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64 ; HSA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1524,10 +1524,10 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace ; MESA-LABEL: @byref_natural_align_constant_v16i32_arg( ; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 100 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 164 -; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64 ; MESA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1544,7 +1544,7 @@ define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ; HSA-LABEL: @byref_global_i32_arg( ; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1) ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4 @@ -1554,7 +1554,7 @@ define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ; MESA-LABEL: @byref_global_i32_arg( ; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1) ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4 @@ -1570,7 +1570,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, p ; HSA-LABEL: @byref_flat_i32_arg( ; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr ; HSA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -1580,7 +1580,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, p ; MESA-LABEL: @byref_flat_i32_arg( ; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr ; MESA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -1596,7 +1596,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu ; HSA-LABEL: @byref_constant_32bit_i32_arg( ; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6) ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4 @@ -1606,7 +1606,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu ; MESA-LABEL: @byref_constant_32bit_i32_arg( ; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6) ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4 @@ -1622,7 +1622,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture % ; HSA-LABEL: @byref_unknown_as_i32_arg( ; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999) ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4 @@ -1632,7 +1632,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture % ; MESA-LABEL: @byref_unknown_as_i32_arg( ; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999) ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4 @@ -1649,7 +1649,7 @@ define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ; HSA-LABEL: @byref_local_i32_arg( ; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3) ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 @@ -1659,7 +1659,7 @@ define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ; MESA-LABEL: @byref_local_i32_arg( ; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3) ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 @@ -1675,11 +1675,11 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu ; HSA-LABEL: @multi_byref_constant_i32_arg( ; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8 ; HSA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 16 -; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] +; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; HSA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; HSA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; HSA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1690,11 +1690,11 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu ; MESA-LABEL: @multi_byref_constant_i32_arg( ; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44 ; MESA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 52 -; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] +; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META0]] ; MESA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; MESA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4 ; MESA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4 @@ -1734,14 +1734,14 @@ define amdgpu_kernel void @noundef_f32(float noundef %arg0) { ; HSA-LABEL: @noundef_f32( ; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]]) ; HSA-NEXT: ret void ; ; MESA-LABEL: @noundef_f32( ; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]]) ; MESA-NEXT: ret void ; @@ -1753,7 +1753,7 @@ define amdgpu_kernel void @noundef_f16(half noundef %arg0) { ; HSA-LABEL: @noundef_f16( ; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]]) @@ -1762,7 +1762,7 @@ define amdgpu_kernel void @noundef_f16(half noundef %arg0) { ; MESA-LABEL: @noundef_f16( ; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half ; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]]) @@ -1776,14 +1776,14 @@ define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) { ; HSA-LABEL: @noundef_v2i32( ; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]]) ; HSA-NEXT: ret void ; ; MESA-LABEL: @noundef_v2i32( ; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]]) ; MESA-NEXT: ret void ; @@ -1795,14 +1795,14 @@ define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) { ; HSA-LABEL: @noundef_p0( ; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]]) ; HSA-NEXT: ret void ; ; MESA-LABEL: @noundef_p0( ; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]]) ; MESA-NEXT: ret void ; @@ -1814,14 +1814,14 @@ define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) { ; HSA-LABEL: @noundef_v2p0( ; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] +; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META0]], !noundef [[META0]] ; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]]) ; HSA-NEXT: ret void ; ; MESA-LABEL: @noundef_v2p0( ; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36 -; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] +; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]]) ; MESA-NEXT: ret void ; @@ -1834,8 +1834,6 @@ attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"= attributes #2 = { nounwind "target-cpu"="tahiti" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" } @@ -1849,17 +1847,15 @@ attributes #2 = { nounwind "target-cpu"="tahiti" } ; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" } ; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. -; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -; HSA: [[META1]] = !{} -; HSA: [[RNG2]] = !{i32 0, i32 8} -; HSA: [[META3]] = !{i64 42} -; HSA: [[META4]] = !{i64 128} -; HSA: [[META5]] = !{i64 1024} +; HSA: [[META0]] = !{} +; HSA: [[RNG1]] = !{i32 0, i32 8} +; HSA: [[META2]] = !{i64 42} +; HSA: [[META3]] = !{i64 128} +; HSA: [[META4]] = !{i64 1024} ;. -; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -; MESA: [[META1]] = !{} -; MESA: [[RNG2]] = !{i32 0, i32 8} -; MESA: [[META3]] = !{i64 42} -; MESA: [[META4]] = !{i64 128} -; MESA: [[META5]] = !{i64 1024} +; MESA: [[META0]] = !{} +; MESA: [[RNG1]] = !{i32 0, i32 8} +; MESA: [[META2]] = !{i64 42} +; MESA: [[META3]] = !{i64 128} +; MESA: [[META4]] = !{i64 1024} ;. diff --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll index 11d73732bbbc7..15f93f173eae8 100644 --- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll +++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll @@ -230,5 +230,3 @@ define amdgpu_kernel void @nocall_false_sharing() { } ; CHECK: ; LDSByteSize: 4 bytes -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 2d853212166e9..f30a04a5a76cf 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -274,5 +274,3 @@ entry: attributes #0 = { "frame-pointer"="none" noinline } attributes #1 = { "frame-pointer"="all" noinline } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 5f2bd53dc91a3..5025c1d8a433f 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -1193,5 +1193,3 @@ ret: attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll index 20e5af1b87f1f..2d95ec6f699dc 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll @@ -825,5 +825,3 @@ declare void @foo() attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"} -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll index 6e76ee10292ea..a88acb69bc4da 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -95,5 +95,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" } attributes #1 = { nounwind readnone } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll index cdfba3cf0db7f..6d69b4ca6fa3e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -231,5 +231,3 @@ ret: attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll index 47810346c50b7..c4610200d18a0 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -45,5 +45,3 @@ define amdgpu_kernel void @kernel() { ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index 3c47e2504747d..2e3693b7250f3 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -63,13 +63,8 @@ define amdgpu_kernel void @test_simple_indirect_call() { } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. -;. -; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -;. diff --git a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll index e59e3f6de697f..1aec329ac483c 100644 --- a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll +++ b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll @@ -23,5 +23,3 @@ define amdgpu_kernel void @test_sopk_size(i32 %var.mode) { declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index 3abd1562dbbd8..72254023bb9de 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -191,5 +191,3 @@ declare float @llvm.amdgcn.wqm.f32(float) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index 8f16fcf6d0890..d2394bab82c77 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -1672,8 +1672,6 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; WAVE32: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index 474482b2d89ff..d23e314b9465f 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -771,5 +771,3 @@ bb73.i: ; preds = %bb70.i br label %bb5.backedge } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index f08e5be0fd742..e0d16987b31e7 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -522,5 +522,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() declare hidden float @spam() -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll index ab7e85fdff516..9ed533297e832 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -97,5 +97,3 @@ declare void @llvm.assume(i1 noundef) declare void @llvm.amdgcn.s.barrier() declare void @llvm.trap() -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index ddc50b7d49504..68010fc45b5a5 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -1723,5 +1723,3 @@ declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace( declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32) declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}