Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/Target/X86/X86LowerTileCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,16 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *NewMI =
addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
.addReg(SrcReg, getKillRegState(SrcMO.isKill()));
MachineOperand &MO = NewMI->getOperand(2);
MO.setReg(GR64Cand ? GR64Cand : X86::RAX);
MO.setIsKill(true);
MachineOperand *MO = &NewMI->getOperand(2);
MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
// tileloadd (%sp, %idx), %tmm
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
#undef GET_EGPR_IF_ENABLED
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
TileSS);
MO = &NewMI->getOperand(3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it better to use "1 + X86::AddrIndexReg" instead of "3" as in line X86InstrInfo.cpp#L4768?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, done.

MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
MO->setIsKill(true);
if (!GR64Cand) {
// restore %rax
// mov (%sp) %rax
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ define dso_local void @test1(ptr%buf) nounwind {
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1
; CHECK-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
; CHECK-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2
; CHECK-NEXT: tilestored %tmm2, (%rbx,%r15)
; CHECK-NEXT: incl %r14d
Expand Down Expand Up @@ -109,8 +109,8 @@ define dso_local void @test1(ptr%buf) nounwind {
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
; EGPR-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x04,0x00,0x04,0x00,0x00]
; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: jmp .LBB1_4
%4 = shl i32 %2, 4
Expand Down Expand Up @@ -212,7 +212,7 @@ define void @multi_use() nounwind {
; CHECK-NEXT: tilezero %tmm0
; CHECK-NEXT: movabsq $64, %rbp
; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm1
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm0
; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70
Expand Down