Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PTILELOADDRSV:
Opc = X86::TILELOADDRS;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1V:
Opc = X86::TILELOADDRST1;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
case X86::PTILELOADDV:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
Expand Down Expand Up @@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PT2RPNTLVWZ0V:
Opc = X86::T2RPNTLVWZ0;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1V:
Opc = X86::T2RPNTLVWZ0T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1V:
Opc = X86::T2RPNTLVWZ1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1V:
Opc = X86::T2RPNTLVWZ1T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RSV:
Opc = X86::T2RPNTLVWZ0RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1V:
Opc = X86::T2RPNTLVWZ0RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RSV:
Opc = X86::T2RPNTLVWZ1RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1V:
Opc = X86::T2RPNTLVWZ1RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
default:
llvm_unreachable("Impossible Opcode!");
Expand Down
24 changes: 13 additions & 11 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED:
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
break;
#undef GET_EGPR_IF_ENABLED
case X86::PTILELOADDRS:
Opc = X86::TILELOADDRS;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1:
Opc = X86::TILELOADDRST1;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
}
#undef GET_EGPR_IF_ENABLED

MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
Expand Down Expand Up @@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PT2RPNTLVWZ1RST1: {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instruction!");
case X86::PT2RPNTLVWZ0:
Opc = X86::T2RPNTLVWZ0;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1:
Opc = X86::T2RPNTLVWZ0T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1:
Opc = X86::T2RPNTLVWZ1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1:
Opc = X86::T2RPNTLVWZ1T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RS:
Opc = X86::T2RPNTLVWZ0RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1:
Opc = X86::T2RPNTLVWZ0RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RS:
Opc = X86::T2RPNTLVWZ1RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1:
Opc = X86::T2RPNTLVWZ1RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
}
#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);

Expand Down
52 changes: 47 additions & 5 deletions llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -349,22 +349,22 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
[]>, VEX, WIG, T8,PS;
[]>, VEX, T8, PS;

def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PS;
[]>, VEX, T8, PS;

def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PD;
[]>, VEX, T8, PD;

def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PD;
[]>, VEX, T8, PD;

def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
Expand Down Expand Up @@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
}
} // HasAMXMOVRS, In64BitMode

let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Share code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
[]>, EVEX, NoCD8, T8, PS;

def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
[]>, EVEX, NoCD8, T8, PS;

def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
[]>, EVEX, NoCD8, T8, PD;

def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
[]>, EVEX, NoCD8, T8, PD;
} // HasAMXTRANSPOSE, HasEGPR, In64BitMode

let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T_MAP5;
def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T_MAP5;
def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T_MAP5, PD;
def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T_MAP5, PD;
} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode

let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T8, XD;
def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
[]>, EVEX, NoCD8, T8, PD;
} // HasAMXMOVRS, HasEGPR, In64BitMode

multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefix=EGPR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
Expand Down Expand Up @@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
; APXF-LABEL: test_amx_internal:
; APXF: # %bb.0: # %entry
; APXF-NEXT: pushq %rbp # encoding: [0x55]
; APXF-NEXT: .cfi_def_cfa_offset 16
; APXF-NEXT: .cfi_offset %rbp, -16
; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
; APXF-NEXT: .cfi_def_cfa_register %rbp
; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
; APXF-NEXT: # imm = 0xFC00
; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
; APXF-NEXT: # imm = 0xC00
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
; APXF-NEXT: # implicit-def: $al
; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
; APXF-NEXT: popq %rbp # encoding: [0x5d]
; APXF-NEXT: .cfi_def_cfa %rsp, 8
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
Expand All @@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
; APXF-LABEL: test_amx_old:
; APXF: # %bb.0: # %entry
; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
Expand Down Expand Up @@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
; APXF-LABEL: test_amx_t1_internal:
; APXF: # %bb.0: # %entry
; APXF-NEXT: pushq %rbp # encoding: [0x55]
; APXF-NEXT: .cfi_def_cfa_offset 16
; APXF-NEXT: .cfi_offset %rbp, -16
; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
; APXF-NEXT: .cfi_def_cfa_register %rbp
; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
; APXF-NEXT: # imm = 0xFC00
; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
; APXF-NEXT: # imm = 0xC00
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
; APXF-NEXT: # implicit-def: $al
; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
; APXF-NEXT: popq %rbp # encoding: [0x5d]
; APXF-NEXT: .cfi_def_cfa %rsp, 8
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
Expand All @@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
; APXF-LABEL: test_amx_t1_old:
; APXF: # %bb.0: # %entry
; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF

define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-LABEL: test_amx:
Expand All @@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
; CHECK-NEXT: retq
;
; APXF-LABEL: test_amx:
; APXF: # %bb.0:
; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
; APXF-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
Expand Down Expand Up @@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
; O2-NEXT: tilerelease
; O2-NEXT: retq
;
; APXF-LABEL: test_amx2:
; APXF: # %bb.0:
; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
; APXF-NEXT: retq # encoding: [0xc3]
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
Expand Down
Loading
Loading