-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[NFC] Reduce fragility of swdev503538-move-to-valu… test #170702
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…test. This MIR test was generated from swdev503538-move-to-valu-stack-srd-physreg.ll using: llc -stop-before=si-fix-sgpr-copies -O0 The original test was created in PR 120815, but it depends on -O0 AND DAGCombiner, that is switched on by default for -O0. The patch reduces frigility of the test and removes dependency on DAGCombiner.
|
@llvm/pr-subscribers-backend-amdgpu Author: Daniil Fukalov (dfukalov) ChangesThis MIR test was generated from swdev503538-move-to-valu-stack-srd-physreg.ll using: The original test was created in PR #120815, but it depends on -O0 AND DAGCombiner, that is switched on by default for -O0. The patch reduces fragility of the test and removes dependency on DAGCombiner. Full diff: https://github.com/llvm/llvm-project/pull/170702.diff 2 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
deleted file mode 100644
index f0b3d334af67d..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 -O0 2> %t.err < %s | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t.err
-
-; FIXME: This error will be fixed by supporting arbitrary divergent
-; dynamic allocas by performing a wave umax of the size.
-
-; ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
-
-; CHECK: ; illegal copy v0 to s32
-
-define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) {
-entry:
- %idx = load i32, ptr addrspace(1) %ptr, align 4
- %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
- %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
- %ld = load i32, ptr addrspace(5) %alloca, align 8
- call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 2048, i1 false)
- ret i32 %ld
-}
-
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0
-
-attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
new file mode 100644
index 0000000000000..b21623d02142b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
@@ -0,0 +1,221 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -start-before=si-fix-sgpr-copies -O0 -verify-machineinstrs=0 -combiner-disabled=true %s -o - 2> %t.err | FileCheck %s
+# RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+# FIXME: This error will be fixed by supporting arbitrary divergent
+# dynamic allocas by performing a wave umax of the size.
+
+# ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
+
+# CHECK: ; illegal copy v{{[0-9]+}} to s32
+
+--- |
+ ; ModuleID = 'llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll'
+ source_filename = "llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll"
+ target datalayout = "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) #0 {
+ entry:
+ %idx = load i32, ptr addrspace(1) %ptr, align 4
+ %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
+ %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
+ %ld = load i32, ptr addrspace(5) %alloca, align 8
+ br label %loadstoreloop, !amdgpu.uniform !0
+
+ loadstoreloop: ; preds = %entry, %loadstoreloop
+ %0 = phi i32 [ %2, %loadstoreloop ], [ 0, %entry ]
+ %1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, i32 %0
+ store i8 0, ptr addrspace(5) %1, align 1
+ %2 = add i32 %0, 1
+ %3 = icmp uge i32 %2, 2048
+ br i1 %3, label %Flow, label %loadstoreloop, !amdgpu.uniform !0
+
+ Flow: ; preds = %loadstoreloop
+ br label %split, !amdgpu.uniform !0
+
+ split: ; preds = %Flow
+ ret i32 %ld
+ }
+
+ ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+ declare void @llvm.memset.p5.i32(ptr addrspace(5) writeonly captures(none), i8, i32, i1 immarg) #1
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) "target-cpu"="gfx90a" }
+
+ !0 = !{}
+...
+---
+name: move_to_valu_assert_srd_is_physreg_swdev503538
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: av_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 5, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 6, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 7, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 8, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 10, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 11, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 13, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 14, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 15, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 16, class: sreg_64, preferred-register: '', flags: [ ] }
+ - { id: 17, class: av_32, preferred-register: '', flags: [ ] }
+ - { id: 18, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 19, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 20, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 21, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 22, class: sreg_32, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%12' }
+ - { reg: '$vgpr1', virtual-reg: '%13' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack:
+ - { id: 0, name: alloca, type: variable-sized, offset: 0, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 16
+ numWaveDispatchVGPRs: 2
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: false
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ numKernargPreloadSGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0, $vgpr1
+
+ %13:vgpr_32 = COPY $vgpr1
+ %12:vgpr_32 = COPY $vgpr0
+ %16:sreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+ %14:av_64_align2 = COPY %16
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc
+ %17:av_32 = COPY $sgpr32
+ $sgpr32 = COPY %17
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc
+ %0:vgpr_32 = COPY %17
+ %18:sreg_32 = COPY %17
+ %1:av_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, %18, 0, 0, 0, implicit $exec :: (load (s32) from %ir.alloca, align 8, addrspace 5)
+ %15:sreg_32 = S_MOV_B32 0
+ S_BRANCH %bb.1
+
+ bb.1.loadstoreloop:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+
+ %2:sreg_32 = PHI %15, %bb.0, %3, %bb.1
+ %19:vgpr_32 = V_ADD_U32_e64 %0, %2, 0, implicit $exec
+ %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ BUFFER_STORE_BYTE_OFFEN killed %20, killed %19, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.1, addrspace 5)
+ %21:sreg_32 = S_MOV_B32 1
+ %3:sreg_32 = S_ADD_I32 %2, killed %21, implicit-def dead $scc
+ %22:sreg_32 = S_MOV_B32 2048
+ S_CMP_LT_U32 %3, killed %22, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2.Flow:
+ successors: %bb.3(0x80000000)
+
+ S_BRANCH %bb.3
+
+ bb.3.split:
+ $vgpr0 = COPY %1
+ SI_RETURN implicit $vgpr0
+...
|
Can you add |
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather keep the IR test. The move to valu detail is an implementation defect which should be fixed in the future (actually I thought we handled this correctly now)? MIR tests are much less maintainable and generally should be avoided.
Yes, I understand that generally it may be better. But I'm working on removing DAGCombiner from O0 pipeline for AMDGPU (it removes some instructions which are expected to be available for breakpoint). And the test in the IR variant started to fail, since implicitly needs DAGCombiner. I tried to create a reduced tescase(s) with testing only one pass/set of passes but wasn't able to achieve it if we need to keep ERR: check in the test. |
Thanks, will update the patch |
|
You'd be better off changing the IR content. In particular you probably want to stop using the extractelement on zero initializer and turn that into some variable |
This MIR test was generated from swdev503538-move-to-valu-stack-srd-physreg.ll using:
llc -stop-before=si-fix-sgpr-copies -O0The original test was created in PR #120815, but it depends on -O0 AND DAGCombiner, that is switched on by default for -O0. The patch reduces fragility of the test and removes dependency on DAGCombiner.