From 7e7f34d8f19d4b6692ed2bab472b5e5aa17e2beb Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Fri, 27 Sep 2024 17:26:09 +0800 Subject: [PATCH 1/2] [X86][AMX] Fix missing stride register for tileloadd Fixes: #110190 --- llvm/lib/Target/X86/X86LowerTileCopy.cpp | 8 +++++--- llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll | 6 +++--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp index 613722b398f44..1184460acc4af 100644 --- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp +++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp @@ -140,14 +140,16 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { MachineInstr *NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS) .addReg(SrcReg, getKillRegState(SrcMO.isKill())); - MachineOperand &MO = NewMI->getOperand(2); - MO.setReg(GR64Cand ? GR64Cand : X86::RAX); - MO.setIsKill(true); + MachineOperand *MO = &NewMI->getOperand(2); + MO->setReg(GR64Cand ? GR64Cand : X86::RAX); // tileloadd (%sp, %idx), %tmm Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); #undef GET_EGPR_IF_ENABLED NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg), TileSS); + MO = &NewMI->getOperand(3); + MO->setReg(GR64Cand ? GR64Cand : X86::RAX); + MO->setIsKill(true); if (!GR64Cand) { // restore %rax // mov (%sp) %rax diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll index a0085afbaf025..06acad07db9cb 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll @@ -45,7 +45,7 @@ define dso_local void @test1(ptr%buf) nounwind { ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0 ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1 ; CHECK-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload +; CHECK-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload ; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 ; CHECK-NEXT: tilestored %tmm2, (%rbx,%r15) ; CHECK-NEXT: incl %r14d @@ -109,8 +109,8 @@ define dso_local void @test1(ptr%buf) nounwind { ; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b] ; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill ; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00] -; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload -; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00] +; EGPR-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload +; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x04,0x00,0x04,0x00,0x00] ; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0] ; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b] ; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6] diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll index 15e7136f4a503..fbebb955f8d97 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll @@ -148,7 +148,7 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind { ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload +; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-NEXT: jmp .LBB1_4 %4 = shl i32 %2, 4 @@ -212,7 +212,7 @@ define void @multi_use() nounwind { ; CHECK-NEXT: tilezero %tmm0 ; CHECK-NEXT: movabsq $64, %rbp ; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload +; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload ; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm1 ; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm0 ; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70 From 047db689efaecd05b40059a9856f8c1d342a2e12 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Fri, 27 Sep 2024 21:50:39 +0800 Subject: [PATCH 2/2] Address review comment --- llvm/lib/Target/X86/X86LowerTileCopy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp index 1184460acc4af..9cf700d6b65b0 100644 --- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp +++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp @@ -140,14 +140,14 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { MachineInstr *NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS) .addReg(SrcReg, getKillRegState(SrcMO.isKill())); - MachineOperand *MO = &NewMI->getOperand(2); + MachineOperand *MO = &NewMI->getOperand(X86::AddrIndexReg); MO->setReg(GR64Cand ? GR64Cand : X86::RAX); // tileloadd (%sp, %idx), %tmm Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); #undef GET_EGPR_IF_ENABLED NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg), TileSS); - MO = &NewMI->getOperand(3); + MO = &NewMI->getOperand(1 + X86::AddrIndexReg); MO->setReg(GR64Cand ? GR64Cand : X86::RAX); MO->setIsKill(true); if (!GR64Cand) {