Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions llvm/lib/Target/X86/X86FastPreTileConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,17 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator I;
if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
I = ++LastShapeMI->getIterator();
else
I = ++MI.getIterator();
else {
// Call can overwrite registers like rax, ensure the tile config
// instruction is sinked closer to first instruction that uses tile.
auto UseIt = MI.getIterator();
while (UseIt != MBB.end()) {
if (HasTileOperand(MRI, *UseIt))
break;
++UseIt;
}
I = UseIt;
}
Config(*I);
HasUnconfigTile = false;
continue;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/AMX/amx-across-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,12 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; O0-NEXT: callq foo
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
; O0-NEXT: movl $32, %esi
; O0-NEXT: movl $buf+2048, %edx
; O0-NEXT: # implicit-def: $al
; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; O0-NEXT: movl $32, %esi
; O0-NEXT: movl $buf+2048, %edx
; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
; O0-NEXT: movl $64, %esi
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
Expand Down
151 changes: 151 additions & 0 deletions llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass="fastpretileconfig,regallocfast,fasttileconfig" -o - %s | FileCheck %s

# Test to verify that ldtilecfg instructions are sinked closer to tile defining
# instructions after a call. This ensures call does not overwrite values in
# registers being used for configuring the AMX tile.

...
---
name: test_api
alignment: 16
tracksRegLiveness: true
registers:
- { id: 0, class: gr64, preferred-register: '', flags: [ ] }
- { id: 1, class: gr64, preferred-register: '', flags: [ ] }
- { id: 2, class: gr64, preferred-register: '', flags: [ ] }
- { id: 3, class: gr64, preferred-register: '', flags: [ ] }
- { id: 4, class: tile, preferred-register: '', flags: [ ] }
- { id: 5, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 6, class: gr64, preferred-register: '', flags: [ ] }
- { id: 9, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 10, class: gr64, preferred-register: '', flags: [ ] }
- { id: 13, class: tile, preferred-register: '', flags: [ ] }
- { id: 14, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 15, class: gr64, preferred-register: '', flags: [ ] }
- { id: 18, class: gr64, preferred-register: '', flags: [ ] }
- { id: 19, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 22, class: gr64, preferred-register: '', flags: [ ] }
- { id: 23, class: gr64, preferred-register: '', flags: [ ] }
- { id: 24, class: gr64, preferred-register: '', flags: [ ] }
- { id: 25, class: tile, preferred-register: '', flags: [ ] }
- { id: 26, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 29, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 30, class: gr64, preferred-register: '', flags: [ ] }
- { id: 33, class: tile, preferred-register: '', flags: [ ] }
- { id: 34, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 35, class: gr64, preferred-register: '', flags: [ ] }
- { id: 38, class: gr64_nosp, preferred-register: '', flags: [ ] }
- { id: 39, class: gr64, preferred-register: '', flags: [ ] }
- { id: 40, class: gr16, preferred-register: '', flags: [ ] }
- { id: 41, class: gr16, preferred-register: '', flags: [ ] }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%2' }
frameInfo:
maxAlignment: 1024
stack:
- { id: 0, size: 1024, alignment: 1024 }
- { id: 1, size: 1024, alignment: 1024 }
- { id: 2, size: 32, alignment: 32 }
- { id: 3, size: 32, alignment: 32 }
- { id: 4, size: 8, alignment: 8 }
machineFunctionInfo:
amxProgModel: ManagedRA
body: |
bb.0.entry:
liveins: $rdi, $rsi
; CHECK-LABEL: name: test_api
; CHECK: liveins: $rdi, $rsi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $zmm0 = AVX512_512_SET0
; CHECK-NEXT: VMOVUPSZmr %stack.5, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.5, align 4)
; CHECK-NEXT: MOV8mi %stack.5, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.5, align 4)
; CHECK-NEXT: MOV64mr %stack.8, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.8)
; CHECK-NEXT: renamable $rsi = MOV32ri64 16
; CHECK-NEXT: renamable $rdx = LEA64r %stack.2, 1, $noreg, 0, $noreg
; CHECK-NEXT: renamable $cx = MOV16ri 16
; CHECK-NEXT: MOV16mr %stack.7, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.7)
; CHECK-NEXT: renamable $ax = MOV16ri 2
; CHECK-NEXT: MOV16mr %stack.6, 1, $noreg, 0, $noreg, $ax :: (store (s16) into %stack.6)
; CHECK-NEXT: $al = IMPLICIT_DEF
; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
; CHECK-NEXT: $al = IMPLICIT_DEF
; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4)
; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
; CHECK-NEXT: renamable $rsi = MOV32ri64 64
; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg
; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
; CHECK-NEXT: renamable $rsi = MOV32ri64 64
; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg
; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
; CHECK-NEXT: renamable $rdx = MOV32ri64 16
; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdi, 1, killed renamable $rdx, 0, $noreg, killed renamable $tmm0
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
; CHECK-NEXT: $rsi = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8)
; CHECK-NEXT: $cx = MOV16rm %stack.7, 1, $noreg, 0, $noreg :: (load (s16) from %stack.7)
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: renamable $rdx = COPY $rax
; CHECK-NEXT: $ax = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6)
; CHECK-NEXT: MOV64mr killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $rdx
; CHECK-NEXT: renamable $rdx = MOV64rm %stack.4, 1, $noreg, 0, $noreg
; CHECK-NEXT: renamable $rsi = MOV32ri64 16
; CHECK-NEXT: $al = IMPLICIT_DEF
; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
; CHECK-NEXT: $al = IMPLICIT_DEF
; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4)
; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
; CHECK-NEXT: renamable $rsi = MOV32ri64 64
; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg
; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
; CHECK-NEXT: renamable $rsi = MOV32ri64 64
; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg
; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
; CHECK-NEXT: renamable $rsi = MOV32ri64 16
; CHECK-NEXT: renamable $rdx = LEA64r %stack.4, 1, $noreg, 0, $noreg
; CHECK-NEXT: PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
; CHECK-NEXT: RET64
%2:gr64 = COPY $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY killed %0
%3:gr64 = COPY killed %2
%38:gr64_nosp = MOV32ri64 16
%39:gr64 = LEA64r %stack.2, 1, $noreg, 0, $noreg
%40:gr16 = MOV16ri 16
%41:gr16 = MOV16ri 2
%33:tile = PTILELOADDV %41:gr16, %40:gr16, killed %39, 1, killed %38, 0, $noreg
%34:gr64_nosp = MOV32ri64 64
%35:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg
PTILESTOREDV %41:gr16, %40:gr16, killed %35, 1, killed %34, 0, $noreg, %33
%29:gr64_nosp = MOV32ri64 64
%30:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg
%25:tile = PTILELOADDV %41:gr16, %40:gr16, killed %30, 1, killed %29, 0, $noreg
%26:gr64_nosp = MOV32ri64 16
PTILESTOREDV %41:gr16, %40:gr16, %1, 1, killed %26, 0, $noreg, %25
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
%24:gr64 = COPY $rax
MOV64mr %3, 1, $noreg, 0, $noreg, %24
%22:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg
%19:gr64_nosp = MOV32ri64 16
%13:tile = PTILELOADDV %41:gr16, %40:gr16, %22, 1, killed %19, 0, $noreg
%14:gr64_nosp = MOV32ri64 64
%15:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg
PTILESTOREDV %41:gr16, %40:gr16, killed %15, 1, killed %14, 0, $noreg, %13
%9:gr64_nosp = MOV32ri64 64
%10:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg
%4:tile = PTILELOADDV %41:gr16, %40:gr16, killed %10, 1, killed %9, 0, $noreg
%5:gr64_nosp = MOV32ri64 16
%6:gr64 = LEA64r %stack.4, 1, $noreg, 0, $noreg
PTILESTOREDV %41:gr16, %40:gr16, killed %6, 1, killed %5, 0, $noreg, %4
RET64
...