Skip to content

Commit 6bb14f8

Browse files
committed
[X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/784266
1 parent a761e26 commit 6bb14f8

13 files changed

+770
-27
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
578578
unsigned Opc;
579579
switch (Opcode) {
580580
case X86::PTILELOADDRSV:
581-
Opc = X86::TILELOADDRS;
581+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
582582
break;
583583
case X86::PTILELOADDRST1V:
584-
Opc = X86::TILELOADDRST1;
584+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
585585
break;
586586
case X86::PTILELOADDV:
587587
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
737737
unsigned Opc;
738738
switch (Opcode) {
739739
case X86::PT2RPNTLVWZ0V:
740-
Opc = X86::T2RPNTLVWZ0;
740+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
741741
break;
742742
case X86::PT2RPNTLVWZ0T1V:
743-
Opc = X86::T2RPNTLVWZ0T1;
743+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
744744
break;
745745
case X86::PT2RPNTLVWZ1V:
746-
Opc = X86::T2RPNTLVWZ1;
746+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
747747
break;
748748
case X86::PT2RPNTLVWZ1T1V:
749-
Opc = X86::T2RPNTLVWZ1T1;
749+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
750750
break;
751751
case X86::PT2RPNTLVWZ0RSV:
752-
Opc = X86::T2RPNTLVWZ0RS;
752+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
753753
break;
754754
case X86::PT2RPNTLVWZ0RST1V:
755-
Opc = X86::T2RPNTLVWZ0RST1;
755+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
756756
break;
757757
case X86::PT2RPNTLVWZ1RSV:
758-
Opc = X86::T2RPNTLVWZ1RS;
758+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
759759
break;
760760
case X86::PT2RPNTLVWZ1RST1V:
761-
Opc = X86::T2RPNTLVWZ1RST1;
761+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
762762
break;
763763
default:
764764
llvm_unreachable("Impossible Opcode!");

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3780037800
case X86::PTILESTORED:
3780137801
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
3780237802
break;
37803-
#undef GET_EGPR_IF_ENABLED
3780437803
case X86::PTILELOADDRS:
37805-
Opc = X86::TILELOADDRS;
37804+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
3780637805
break;
3780737806
case X86::PTILELOADDRST1:
37808-
Opc = X86::TILELOADDRST1;
37807+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
3780937808
break;
3781037809
}
37810+
#undef GET_EGPR_IF_ENABLED
3781137811

3781237812
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
3781337813
unsigned CurOp = 0;
@@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3783837838
case X86::PT2RPNTLVWZ1RST1: {
3783937839
const DebugLoc &DL = MI.getDebugLoc();
3784037840
unsigned Opc;
37841+
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
3784137842
switch (MI.getOpcode()) {
3784237843
default:
3784337844
llvm_unreachable("Unexpected instruction!");
3784437845
case X86::PT2RPNTLVWZ0:
37845-
Opc = X86::T2RPNTLVWZ0;
37846+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
3784637847
break;
3784737848
case X86::PT2RPNTLVWZ0T1:
37848-
Opc = X86::T2RPNTLVWZ0T1;
37849+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
3784937850
break;
3785037851
case X86::PT2RPNTLVWZ1:
37851-
Opc = X86::T2RPNTLVWZ1;
37852+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
3785237853
break;
3785337854
case X86::PT2RPNTLVWZ1T1:
37854-
Opc = X86::T2RPNTLVWZ1T1;
37855+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
3785537856
break;
3785637857
case X86::PT2RPNTLVWZ0RS:
37857-
Opc = X86::T2RPNTLVWZ0RS;
37858+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
3785837859
break;
3785937860
case X86::PT2RPNTLVWZ0RST1:
37860-
Opc = X86::T2RPNTLVWZ0RST1;
37861+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
3786137862
break;
3786237863
case X86::PT2RPNTLVWZ1RS:
37863-
Opc = X86::T2RPNTLVWZ1RS;
37864+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
3786437865
break;
3786537866
case X86::PT2RPNTLVWZ1RST1:
37866-
Opc = X86::T2RPNTLVWZ1RST1;
37867+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
3786737868
break;
3786837869
}
37870+
#undef GET_EGPR_IF_ENABLED
3786937871
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
3787037872
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
3787137873

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -349,22 +349,22 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
349349
let SchedRW = [WriteSystem] in {
350350
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
351351
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
352-
[]>, VEX, WIG, T8,PS;
352+
[]>, VEX, T8, PS;
353353

354354
def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
355355
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
356-
[]>, VEX, T8,PS;
356+
[]>, VEX, T8, PS;
357357

358358
def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
359359
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
360-
[]>, VEX, T8,PD;
360+
[]>, VEX, T8, PD;
361361

362362
def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
363363
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
364-
[]>, VEX, T8,PD;
364+
[]>, VEX, T8, PD;
365365

366366
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
367-
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
367+
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
368368
let isPseudo = true in {
369369
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
370370
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
554554
}
555555
} // HasAMXMOVRS, In64BitMode
556556

557+
let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
558+
def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
559+
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
560+
[]>, EVEX, NoCD8, T8, PS;
561+
562+
def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
563+
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
564+
[]>, EVEX, NoCD8, T8, PS;
565+
566+
def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
567+
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
568+
[]>, EVEX, NoCD8, T8, PD;
569+
570+
def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
571+
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
572+
[]>, EVEX, NoCD8, T8, PD;
573+
} // HasAMXTRANSPOSE, HasEGPR, In64BitMode
574+
575+
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
576+
def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
577+
(ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
578+
[]>, EVEX, NoCD8, T_MAP5;
579+
def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
580+
(ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
581+
[]>, EVEX, NoCD8, T_MAP5;
582+
def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
583+
(ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
584+
[]>, EVEX, NoCD8, T_MAP5, PD;
585+
def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
586+
(ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
587+
[]>, EVEX, NoCD8, T_MAP5, PD;
588+
} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode
589+
590+
let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
591+
def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
592+
(ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
593+
[]>, EVEX, NoCD8, T8, XD;
594+
def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
595+
(ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
596+
[]>, EVEX, NoCD8, T8, PD;
597+
} // HasAMXMOVRS, HasEGPR, In64BitMode
598+
557599
multiclass m_tcvtrowd2ps {
558600
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
559601
let SchedRW = [WriteSystem] in {

llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
3+
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
34

45
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
56
; CHECK-LABEL: test_amx_internal:
@@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
3536
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
3637
; CHECK-NEXT: tilerelease
3738
; CHECK-NEXT: retq
39+
;
40+
; APXF-LABEL: test_amx_internal:
41+
; APXF: # %bb.0: # %entry
42+
; APXF-NEXT: pushq %rbp # encoding: [0x55]
43+
; APXF-NEXT: .cfi_def_cfa_offset 16
44+
; APXF-NEXT: .cfi_offset %rbp, -16
45+
; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
46+
; APXF-NEXT: .cfi_def_cfa_register %rbp
47+
; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
48+
; APXF-NEXT: # imm = 0xFC00
49+
; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
50+
; APXF-NEXT: # imm = 0xC00
51+
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
52+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
53+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
54+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
55+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
56+
; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
57+
; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
58+
; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
59+
; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
60+
; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
61+
; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
62+
; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
63+
; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
64+
; APXF-NEXT: # implicit-def: $al
65+
; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
66+
; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
67+
; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
68+
; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
69+
; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
70+
; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
71+
; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
72+
; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
73+
; APXF-NEXT: popq %rbp # encoding: [0x5d]
74+
; APXF-NEXT: .cfi_def_cfa %rsp, 8
75+
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
76+
; APXF-NEXT: retq # encoding: [0xc3]
3877
entry:
3978
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
4079
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
4887
; CHECK-NEXT: movl $32, %eax
4988
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
5089
; CHECK-NEXT: retq
90+
;
91+
; APXF-LABEL: test_amx_old:
92+
; APXF: # %bb.0: # %entry
93+
; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
94+
; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
95+
; APXF-NEXT: retq # encoding: [0xc3]
5196
entry:
5297
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
5398
ret void
@@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
88133
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
89134
; CHECK-NEXT: tilerelease
90135
; CHECK-NEXT: retq
136+
;
137+
; APXF-LABEL: test_amx_t1_internal:
138+
; APXF: # %bb.0: # %entry
139+
; APXF-NEXT: pushq %rbp # encoding: [0x55]
140+
; APXF-NEXT: .cfi_def_cfa_offset 16
141+
; APXF-NEXT: .cfi_offset %rbp, -16
142+
; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
143+
; APXF-NEXT: .cfi_def_cfa_register %rbp
144+
; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
145+
; APXF-NEXT: # imm = 0xFC00
146+
; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
147+
; APXF-NEXT: # imm = 0xC00
148+
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
149+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
150+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
151+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
152+
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
153+
; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
154+
; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
155+
; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
156+
; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
157+
; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
158+
; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
159+
; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
160+
; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
161+
; APXF-NEXT: # implicit-def: $al
162+
; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
163+
; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
164+
; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
165+
; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
166+
; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
167+
; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
168+
; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
169+
; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
170+
; APXF-NEXT: popq %rbp # encoding: [0x5d]
171+
; APXF-NEXT: .cfi_def_cfa %rsp, 8
172+
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
173+
; APXF-NEXT: retq # encoding: [0xc3]
91174
entry:
92175
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
93176
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
101184
; CHECK-NEXT: movl $32, %eax
102185
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
103186
; CHECK-NEXT: retq
187+
;
188+
; APXF-LABEL: test_amx_t1_old:
189+
; APXF: # %bb.0: # %entry
190+
; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
191+
; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
192+
; APXF-NEXT: retq # encoding: [0xc3]
104193
entry:
105194
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
106195
ret void

llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
33
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
4+
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
45

56
define void @test_amx(i64 %stride, i8* %addr1) #0 {
67
; CHECK-LABEL: test_amx:
@@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
1011
; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
1112
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
1213
; CHECK-NEXT: retq
14+
;
15+
; APXF-LABEL: test_amx:
16+
; APXF: # %bb.0:
17+
; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
18+
; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
19+
; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
20+
; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
21+
; APXF-NEXT: retq # encoding: [0xc3]
1322
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
1423
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
1524
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
@@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
8089
; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
8190
; O2-NEXT: tilerelease
8291
; O2-NEXT: retq
92+
;
93+
; APXF-LABEL: test_amx2:
94+
; APXF: # %bb.0:
95+
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
96+
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
97+
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
98+
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
99+
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
100+
; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
101+
; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
102+
; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
103+
; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
104+
; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
105+
; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
106+
; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
107+
; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
108+
; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
109+
; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
110+
; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
111+
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
112+
; APXF-NEXT: retq # encoding: [0xc3]
83113
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
84114
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
85115
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)

0 commit comments

Comments
 (0)