diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index fc8a0eaed140d..7fbba7f05e0a5 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, unsigned Opc; switch (Opcode) { case X86::PTILELOADDRSV: - Opc = X86::TILELOADDRS; + Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS); break; case X86::PTILELOADDRST1V: - Opc = X86::TILELOADDRST1; + Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1); break; case X86::PTILELOADDV: Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); @@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, unsigned Opc; switch (Opcode) { case X86::PT2RPNTLVWZ0V: - Opc = X86::T2RPNTLVWZ0; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0); break; case X86::PT2RPNTLVWZ0T1V: - Opc = X86::T2RPNTLVWZ0T1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1); break; case X86::PT2RPNTLVWZ1V: - Opc = X86::T2RPNTLVWZ1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1); break; case X86::PT2RPNTLVWZ1T1V: - Opc = X86::T2RPNTLVWZ1T1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1); break; case X86::PT2RPNTLVWZ0RSV: - Opc = X86::T2RPNTLVWZ0RS; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS); break; case X86::PT2RPNTLVWZ0RST1V: - Opc = X86::T2RPNTLVWZ0RST1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1); break; case X86::PT2RPNTLVWZ1RSV: - Opc = X86::T2RPNTLVWZ1RS; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS); break; case X86::PT2RPNTLVWZ1RST1V: - Opc = X86::T2RPNTLVWZ1RST1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1); break; default: llvm_unreachable("Impossible Opcode!"); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 90e3e15b1fb46..6d69665c17565 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PTILESTORED: Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED); break; -#undef GET_EGPR_IF_ENABLED case X86::PTILELOADDRS: - Opc = X86::TILELOADDRS; + Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS); break; case X86::PTILELOADDRST1: - Opc = X86::TILELOADDRST1; + Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1); break; } +#undef GET_EGPR_IF_ENABLED MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc)); unsigned CurOp = 0; @@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PT2RPNTLVWZ1RST1: { const DebugLoc &DL = MI.getDebugLoc(); unsigned Opc; +#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC) switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instruction!"); case X86::PT2RPNTLVWZ0: - Opc = X86::T2RPNTLVWZ0; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0); break; case X86::PT2RPNTLVWZ0T1: - Opc = X86::T2RPNTLVWZ0T1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1); break; case X86::PT2RPNTLVWZ1: - Opc = X86::T2RPNTLVWZ1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1); break; case X86::PT2RPNTLVWZ1T1: - Opc = X86::T2RPNTLVWZ1T1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1); break; case X86::PT2RPNTLVWZ0RS: - Opc = X86::T2RPNTLVWZ0RS; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS); break; case X86::PT2RPNTLVWZ0RST1: - Opc = X86::T2RPNTLVWZ0RST1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1); break; case X86::PT2RPNTLVWZ1RS: - Opc = X86::T2RPNTLVWZ1RS; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS); break; case X86::PT2RPNTLVWZ1RST1: - Opc = X86::T2RPNTLVWZ1RST1; + Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1); break; } +#undef GET_EGPR_IF_ENABLED MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define); diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index a055ba91d3e17..85046228bc8c5 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -345,26 +345,33 @@ let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSys def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>; } -let Predicates = [HasAMXTRANSPOSE, In64BitMode] in { - let SchedRW = [WriteSystem] in { - def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}", - []>, VEX, WIG, T8,PS; +multiclass T2RPNTLVW_Base op1, bits<8> op2, string rs, string suffix> { + def Z0#rs#suffix : I, PS; + def Z0#rs#T1#suffix : I, PS; + def Z1#rs#suffix : I, PD; + def Z1#rs#T1#suffix : I, PD; +} - def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}", - []>, VEX, T8,PS; +let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in + defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX; - def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}", - []>, VEX, T8,PD; +let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in + defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8; - def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}", - []>, VEX, T8,PD; +let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in + defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX; +let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in + defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8; + +let Predicates = [HasAMXTRANSPOSE, In64BitMode] in { + let SchedRW = [WriteSystem] in { def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src), - "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS; + "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS; let isPseudo = true in { def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), @@ -491,22 +498,6 @@ let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [Write } let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { - def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src1), - "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}", - []>, VEX, T_MAP5; - def T2RPNTLVWZ0RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src1), - "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}", - []>, VEX, T_MAP5; - def T2RPNTLVWZ1RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src1), - "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}", - []>, VEX, T_MAP5, PD; - def T2RPNTLVWZ1RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst), - (ins sibmem:$src1), - "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}", - []>, VEX, T_MAP5, PD; let isPseudo = true in { def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), @@ -529,16 +520,20 @@ let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSy } } // HasAMXMOVRS, HasAMXTRANSPOSE -let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { - def TILELOADDRS : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src1), - "tileloaddrs\t{$src1, $dst|$dst, $src1}", - []>, VEX, T8, XD; - def TILELOADDRST1 : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src1), - "tileloaddrst1\t{$src1, $dst|$dst, $src1}", - []>, VEX, T8, PD; +multiclass TILELOADDRS_Base { + def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), + "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD; + def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), + "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD; +} + +let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in + defm TILELOADDRS : TILELOADDRS_Base<"">, VEX; +let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in + defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8; + +let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { let isPseudo = true, mayLoad = 1 in { def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, diff --git a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll index da212a1850964..1b93ae029f27b 100755 --- a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) { ; CHECK-LABEL: test_amx_internal: @@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) { ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx_internal: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: pushq %rbp # encoding: [0x55] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbp, -16 +; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5] +; EGPR-NEXT: .cfi_def_cfa_register %rbp +; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff] +; EGPR-NEXT: # imm = 0xFC00 +; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00] +; EGPR-NEXT: # imm = 0xC00 +; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00] +; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01] +; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00] +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00] +; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1] +; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8] +; EGPR-NEXT: # implicit-def: $al +; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00] +; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00] +; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32] +; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00] +; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00] +; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32] +; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec] +; EGPR-NEXT: popq %rbp # encoding: [0x5d] +; EGPR-NEXT: .cfi_def_cfa %rsp, 8 +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s) %t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) @@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) { ; CHECK-NEXT: movl $32, %eax ; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2 ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx_old: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00] +; EGPR-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32) ret void @@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) { ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx_t1_internal: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: pushq %rbp # encoding: [0x55] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbp, -16 +; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5] +; EGPR-NEXT: .cfi_def_cfa_register %rbp +; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff] +; EGPR-NEXT: # imm = 0xFC00 +; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00] +; EGPR-NEXT: # imm = 0xC00 +; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00] +; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00] +; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01] +; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00] +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00] +; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1] +; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8] +; EGPR-NEXT: # implicit-def: $al +; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00] +; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00] +; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32] +; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00] +; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00] +; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32] +; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec] +; EGPR-NEXT: popq %rbp # encoding: [0x5d] +; EGPR-NEXT: .cfi_def_cfa %rsp, 8 +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s) %t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) @@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) { ; CHECK-NEXT: movl $32, %eax ; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx_t1_old: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00] +; EGPR-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32) ret void diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll index 146b69773eb18..1f5758c804b2b 100755 --- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0 ; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2 +; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_amx(i64 %stride, i8* %addr1) #0 { ; CHECK-LABEL: test_amx: @@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 { ; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 ; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx: +; EGPR: # %bb.0: +; EGPR-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e] +; EGPR-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e] +; EGPR-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e] +; EGPR-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e] +; EGPR-NEXT: retq # encoding: [0xc3] call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride) call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride) call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride) @@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 { ; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 ; O2-NEXT: tilerelease ; O2-NEXT: retq +; +; EGPR-LABEL: test_amx2: +; EGPR: # %bb.0: +; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] +; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0] +; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0] +; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0] +; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0] +; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00] +; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0] +; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] +; EGPR-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37] +; EGPR-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37] +; EGPR-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37] +; EGPR-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37] +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll index cc4360317db7d..4cfd97afe721b 100644 --- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x float> %xmm) #0 { ; CHECK-LABEL: test_amx: @@ -16,6 +17,21 @@ define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x floa ; CHECK-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 ; CHECK-NEXT: tconjtfp16 %tmm2, %tmm1 ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx: +; EGPR: # %bb.0: +; EGPR-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31] +; EGPR-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31] +; EGPR-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31] +; EGPR-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31] +; EGPR-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb] +; EGPR-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca] +; EGPR-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5] +; EGPR-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca] +; EGPR-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca] +; EGPR-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca] +; EGPR-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca] +; EGPR-NEXT: retq # encoding: [0xc3] call void @llvm.x86.t2rpntlvwz0(i8 1, i8* %addr1, i64 %stride) call void @llvm.x86.t2rpntlvwz0t1(i8 2, i8* %addr1, i64 %stride) call void @llvm.x86.t2rpntlvwz1(i8 1, i8* %addr1, i64 %stride) @@ -78,6 +94,46 @@ define void @test_amx2(i8* %pointer, i8* %base, i64 %stride) #0 { ; CHECK-NEXT: tilerelease ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx2: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rbp # encoding: [0x55] +; EGPR-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00] +; EGPR-NEXT: # imm = 0xB70 +; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d] +; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01] +; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08] +; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00] +; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08] +; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00] +; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08] +; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00] +; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08] +; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00] +; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00] +; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] +; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16] +; EGPR-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8] +; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0] +; EGPR-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0] +; EGPR-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0] +; EGPR-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0] +; EGPR-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0] +; EGPR-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +; EGPR-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill +; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00] +; EGPR-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload +; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00] +; EGPR-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8] +; EGPR-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3] +; EGPR-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17] +; EGPR-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00] +; EGPR-NEXT: # imm = 0xB70 +; EGPR-NEXT: popq %rbp # encoding: [0x5d] +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; EGPR-NEXT: retq # encoding: [0xc3] %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) %b = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8) @@ -117,6 +173,30 @@ define void @test_amx3(i8* %pointer, i8* %base, i64 %stride) #0 { ; CHECK-NEXT: tilerelease ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx3: +; EGPR: # %bb.0: +; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff] +; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00] +; EGPR-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00] +; EGPR-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00] +; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0] +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00] +; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16] +; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16] +; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16] +; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16] +; EGPR-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4] +; EGPR-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17] +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; EGPR-NEXT: retq # encoding: [0xc3] %1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) %2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) %3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) @@ -179,6 +259,72 @@ define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 { ; CHECK-NEXT: tilerelease ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq +; +; EGPR-LABEL: test_amx_spill: +; EGPR: # %bb.0: +; EGPR-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00] +; EGPR-NEXT: # imm = 0x17C8 +; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe] +; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00] +; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08] +; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00] +; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80] +; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] +; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16] +; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16] +; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16] +; EGPR-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +; EGPR-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00] +; EGPR-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00] +; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16] +; EGPR-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00] +; EGPR-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00] +; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16] +; EGPR-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0] +; EGPR-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16] +; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] +; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] +; EGPR-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00] +; EGPR-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00] +; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] +; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] +; EGPR-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00] +; EGPR-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00] +; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] +; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] +; EGPR-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0] +; EGPR-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload +; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00] +; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] +; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] +; EGPR-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16] +; EGPR-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16] +; EGPR-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00] +; EGPR-NEXT: # imm = 0x17C8 +; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] +; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; EGPR-NEXT: retq # encoding: [0xc3] %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) %b1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) %b2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt index 6df44c87d2332..57e3153da401b 100755 --- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt @@ -96,3 +96,99 @@ # ATT: tileloaddrst1 -32(,%rbp,2), %tmm3 # INTEL: tileloaddrst1 tmm3, [2*rbp - 32] 0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 +# INTEL: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] +0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] +0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz0rs 64(%r18), %tmm6 +# INTEL: t2rpntlvwz0rs tmm6, [r18 + 64] +0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40 + +# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32] +0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 +# INTEL: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] +0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] +0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz0rst1 64(%r18), %tmm6 +# INTEL: t2rpntlvwz0rst1 tmm6, [r18 + 64] +0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40 + +# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32] +0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 +# INTEL: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] +0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] +0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz1rs 64(%r18), %tmm6 +# INTEL: t2rpntlvwz1rs tmm6, [r18 + 64] +0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40 + +# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32] +0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 +# INTEL: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] +0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] +0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz1rst1 64(%r18), %tmm6 +# INTEL: t2rpntlvwz1rst1 tmm6, [r18 + 64] +0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40 + +# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32] +0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: tileloaddrs 268435456(%r16,%r14,8), %tmm6 +# INTEL: tileloaddrs tmm6, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: tileloaddrs 291(%r8,%r17,4), %tmm3 +# INTEL: tileloaddrs tmm3, [r8 + 4*r17 + 291] +0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00 + +# ATT: tileloaddrs 64(%r18), %tmm6 +# INTEL: tileloaddrs tmm6, [r18 + 64] +0x62,0xfa,0x7f,0x08,0x4a,0x74,0x22,0x40 + +# ATT: tileloaddrs -32(,%rbp,2), %tmm3 +# INTEL: tileloaddrs tmm3, [2*rbp - 32] +0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# ATT: tileloaddrst1 268435456(%r16,%r14,8), %tmm6 +# INTEL: tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7d,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: tileloaddrst1 291(%r8,%r17,4), %tmm3 +# INTEL: tileloaddrst1 tmm3, [r8 + 4*r17 + 291] +0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00 + +# ATT: tileloaddrst1 64(%r18), %tmm6 +# INTEL: tileloaddrst1 tmm6, [r18 + 64] +0x62,0xfa,0x7d,0x08,0x4a,0x74,0x22,0x40 + +# ATT: tileloaddrst1 -32(,%rbp,2), %tmm3 +# INTEL: tileloaddrst1 tmm3, [2*rbp - 32] +0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff diff --git a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt index 8c6f1be80ba2d..d768630ac1475 100644 --- a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt +++ b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt @@ -49,6 +49,54 @@ # INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32] 0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff +# ATT: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 +# INTEL: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz0 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] +0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32] +0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 +# INTEL: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] +0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32] +0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 +# INTEL: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz1 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] +0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32] +0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff + +# ATT: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 +# INTEL: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] +0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10 + +# ATT: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 +# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] +0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00 + +# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 +# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32] +0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff + # ATT: ttransposed %tmm1, %tmm2 # INTEL: ttransposed tmm2, tmm1 0xc4,0xe2,0x7a,0x5f,0xd1 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s index d780ad4f0e369..92db672e1c82d 100755 --- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s @@ -86,4 +86,92 @@ // CHECK: tileloaddrst1 -32(,%rbp,2), %tmm3 // CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] - tileloaddrst1 -32(,%rbp,2), %tmm3 \ No newline at end of file + tileloaddrst1 -32(,%rbp,2), %tmm3 + +// CHECK: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 +// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 + +// CHECK: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 + +// CHECK: t2rpntlvwz0rs 64(%r18), %tmm6 +// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40] + t2rpntlvwz0rs 64(%r18), %tmm6 + +// CHECK: {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 +// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 + +// CHECK: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 + +// CHECK: t2rpntlvwz0rst1 64(%r18), %tmm6 +// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40] + t2rpntlvwz0rst1 64(%r18), %tmm6 + +// CHECK: {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 +// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 + +// CHECK: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 + +// CHECK: t2rpntlvwz1rs 64(%r18), %tmm6 +// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40] + t2rpntlvwz1rs 64(%r18), %tmm6 + +// CHECK: {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 +// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 + +// CHECK: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 + +// CHECK: t2rpntlvwz1rst1 64(%r18), %tmm6 +// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40] + t2rpntlvwz1rst1 64(%r18), %tmm6 + +// CHECK: {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 + +// CHECK: tileloaddrs 291(%r16,%rax,4), %tmm3 +// CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddrs 291(%r16,%rax,4), %tmm3 + +// CHECK: tileloaddrs 291(%r8,%r17,4), %tmm3 +// CHECK: encoding: [0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00] + tileloaddrs 291(%r8,%r17,4), %tmm3 + +// CHECK: {evex} tileloaddrs -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] + {evex} tileloaddrs -32(,%rbp,2), %tmm3 + +// CHECK: tileloaddrst1 291(%r16,%rax,4), %tmm3 +// CHECK: encoding: [0x62,0xfa,0x7d,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddrst1 291(%r16,%rax,4), %tmm3 + +// CHECK: tileloaddrst1 291(%r8,%r17,4), %tmm3 +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00] + tileloaddrst1 291(%r8,%r17,4), %tmm3 + +// CHECK: {evex} tileloaddrst1 -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] + {evex} tileloaddrst1 -32(,%rbp,2), %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s index ccc7ac51a98a4..140d1aa6b198e 100755 --- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s @@ -95,3 +95,99 @@ // CHECK: tileloaddrst1 tmm3, [2*rbp - 32] // CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] tileloaddrst1 tmm3, [2*rbp - 32] + +// CHECK: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] + +// CHECK: t2rpntlvwz0rs tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40] + t2rpntlvwz0rs tmm6, [r18 + 64] + +// CHECK: {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] + +// CHECK: t2rpntlvwz0rst1 tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40] + t2rpntlvwz0rst1 tmm6, [r18 + 64] + +// CHECK: {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] + +// CHECK: t2rpntlvwz1rs tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40] + t2rpntlvwz1rs tmm6, [r18 + 64] + +// CHECK: {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] + +// CHECK: t2rpntlvwz1rst1 tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40] + t2rpntlvwz1rst1 tmm6, [r18 + 64] + +// CHECK: {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32] + +// CHECK: tileloaddrs tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10] + tileloaddrs tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: tileloaddrs tmm3, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00] + tileloaddrs tmm3, [r8 + 4*r17 + 291] + +// CHECK: tileloaddrs tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x74,0x22,0x40] + tileloaddrs tmm6, [r18 + 64] + +// CHECK: {evex} tileloaddrs tmm3, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] + {evex} tileloaddrs tmm3, [2*rbp - 32] + +// CHECK: tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10] + tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456] + +// CHECK: tileloaddrst1 tmm3, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00] + tileloaddrst1 tmm3, [r8 + 4*r17 + 291] + +// CHECK: tileloaddrst1 tmm6, [r18 + 64] +// CHECK: encoding: [0x62,0xfa,0x7d,0x08,0x4a,0x74,0x22,0x40] + tileloaddrst1 tmm6, [r18 + 64] + +// CHECK: {evex} tileloaddrst1 tmm3, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] + {evex} tileloaddrst1 tmm3, [2*rbp - 32] diff --git a/llvm/test/MC/X86/amx-transpose-att.s b/llvm/test/MC/X86/amx-transpose-att.s index 21bbf258ac6ef..5158470f8c905 100644 --- a/llvm/test/MC/X86/amx-transpose-att.s +++ b/llvm/test/MC/X86/amx-transpose-att.s @@ -48,6 +48,54 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 +// CHECK: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 +// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 + +// CHECK: t2rpntlvwz0 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0 291(%r8,%r17,4), %tmm2 + +// CHECK: {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 +// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 + +// CHECK: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 + +// CHECK: {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 +// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 + +// CHECK: t2rpntlvwz1 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1 291(%r8,%r17,4), %tmm2 + +// CHECK: {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2 + +// CHECK: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 +// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 + +// CHECK: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 + +// CHECK: {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 + // CHECK: ttransposed %tmm1, %tmm5 // CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9] ttransposed %tmm1, %tmm5 diff --git a/llvm/test/MC/X86/amx-transpose-intel.s b/llvm/test/MC/X86/amx-transpose-intel.s index a772232ddbbf2..0d2c22f67a173 100644 --- a/llvm/test/MC/X86/amx-transpose-intel.s +++ b/llvm/test/MC/X86/amx-transpose-intel.s @@ -48,6 +48,54 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] t2rpntlvwz1t1 tmm2, [2*rbp - 32] +// CHECK: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] + +// CHECK: {evex} t2rpntlvwz0 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0 tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] + +// CHECK: {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] + +// CHECK: {evex} t2rpntlvwz1 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1 tmm2, [2*rbp - 32] + +// CHECK: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] + t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] + +// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] +// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] + t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] + +// CHECK: {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32] +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] + {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32] + // CHECK: ttransposed tmm5, tmm1 // CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9] ttransposed tmm5, tmm1 diff --git a/llvm/test/TableGen/x86-instr-mapping.inc b/llvm/test/TableGen/x86-instr-mapping.inc index 55d392f5e271f..4f64d4b8d93d0 100644 --- a/llvm/test/TableGen/x86-instr-mapping.inc +++ b/llvm/test/TableGen/x86-instr-mapping.inc @@ -167,6 +167,16 @@ static const X86TableEntry X86CompressEVEXTable[] = { { X86::SHRX64rm_EVEX, X86::SHRX64rm }, { X86::SHRX64rr_EVEX, X86::SHRX64rr }, { X86::STTILECFG_EVEX, X86::STTILECFG }, + { X86::T2RPNTLVWZ0RST1_EVEX, X86::T2RPNTLVWZ0RST1 }, + { X86::T2RPNTLVWZ0RS_EVEX, X86::T2RPNTLVWZ0RS }, + { X86::T2RPNTLVWZ0T1_EVEX, X86::T2RPNTLVWZ0T1 }, + { X86::T2RPNTLVWZ0_EVEX, X86::T2RPNTLVWZ0 }, + { X86::T2RPNTLVWZ1RST1_EVEX, X86::T2RPNTLVWZ1RST1 }, + { X86::T2RPNTLVWZ1RS_EVEX, X86::T2RPNTLVWZ1RS }, + { X86::T2RPNTLVWZ1T1_EVEX, X86::T2RPNTLVWZ1T1 }, + { X86::T2RPNTLVWZ1_EVEX, X86::T2RPNTLVWZ1 }, + { X86::TILELOADDRST1_EVEX, X86::TILELOADDRST1 }, + { X86::TILELOADDRS_EVEX, X86::TILELOADDRS }, { X86::TILELOADDT1_EVEX, X86::TILELOADDT1 }, { X86::TILELOADD_EVEX, X86::TILELOADD }, { X86::TILESTORED_EVEX, X86::TILESTORED },