1- // ===- X86EvexToVex.cpp ---------------------------------------------------===//
2- // Compress EVEX instructions to VEX encoding when possible to reduce code size
1+ // ===- X86CompressEVEX.cpp ------------------------------------------------===//
32//
43// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
54// See https://llvm.org/LICENSE.txt for license information.
65// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
76//
87// ===----------------------------------------------------------------------===//
98//
10- // / \file
11- // / This file defines the pass that goes over all AVX-512 instructions which
12- // / are encoded using the EVEX prefix and if possible replaces them by their
13- // / corresponding VEX encoding which is usually shorter by 2 bytes.
14- // / EVEX instructions may be encoded via the VEX prefix when the AVX-512
15- // / instruction has a corresponding AVX/AVX2 opcode, when vector length
16- // / accessed by instruction is less than 512 bits and when it does not use
17- // the xmm or the mask registers or xmm/ymm registers with indexes higher
18- // than 15.
19- // / The pass applies code reduction on the generated code for AVX-512 instrs.
9+ // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10+ // when possible in order to reduce code size or facilitate HW decoding.
2011//
12+ // Possible compression:
13+ // a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14+ // b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy)
15+ // c. NDD (EVEX) -> non-NDD (legacy)
16+ // d. NF_ND (EVEX) -> NF (EVEX)
17+ //
18+ // Compression a, b and c always reduce code size (some exception)
19+ // fourth type of compression can help hardware decode although the instruction
20+ // length remains unchanged.
21+ //
22+ // Compression a, b and c can always reduce code size, with some exceptions
23+ // such as promoted 16-bit CRC32 which is as long as the legacy version.
24+ //
25+ // legacy:
26+ // crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
27+ // promoted:
28+ // crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
29+ //
30+ // From performance perspective, these should be same (same uops and same EXE
31+ // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
32+ // can execute in more places (broader HW install base). So we will still do
33+ // the compression.
34+ //
35+ // Compression d can help hardware decode (HW may skip reading the NDD
36+ // register) although the instruction length remains unchanged.
2137// ===----------------------------------------------------------------------===//
2238
2339#include " MCTargetDesc/X86BaseInfo.h"
3854
3955using namespace llvm ;
4056
41- // Including the generated EVEX2VEX tables.
42- struct X86EvexToVexCompressTableEntry {
43- uint16_t EvexOpc ;
44- uint16_t VexOpc ;
57+ // Including the generated EVEX compression tables.
58+ struct X86CompressEVEXTableEntry {
59+ uint16_t OldOpc ;
60+ uint16_t NewOpc ;
4561
46- bool operator <(const X86EvexToVexCompressTableEntry &RHS) const {
47- return EvexOpc < RHS.EvexOpc ;
62+ bool operator <(const X86CompressEVEXTableEntry &RHS) const {
63+ return OldOpc < RHS.OldOpc ;
4864 }
4965
50- friend bool operator <(const X86EvexToVexCompressTableEntry &TE,
51- unsigned Opc) {
52- return TE.EvexOpc < Opc;
66+ friend bool operator <(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
67+ return TE.OldOpc < Opc;
5368 }
5469};
55- #include " X86GenEVEX2VEXTables .inc"
70+ #include " X86GenCompressEVEXTables .inc"
5671
57- #define EVEX2VEX_DESC " Compressing EVEX instrs to VEX encoding when possible"
58- #define EVEX2VEX_NAME " x86-evex-to-vex- compress"
72+ #define COMP_EVEX_DESC " Compressing EVEX instrs when possible"
73+ #define COMP_EVEX_NAME " x86-compress-evex "
5974
60- #define DEBUG_TYPE EVEX2VEX_NAME
75+ #define DEBUG_TYPE COMP_EVEX_NAME
6176
6277namespace {
6378
64- class EvexToVexInstPass : public MachineFunctionPass {
79+ class CompressEVEXPass : public MachineFunctionPass {
6580public:
6681 static char ID;
67- EvexToVexInstPass () : MachineFunctionPass(ID) {}
68- StringRef getPassName () const override { return EVEX2VEX_DESC ; }
82+ CompressEVEXPass () : MachineFunctionPass(ID) {}
83+ StringRef getPassName () const override { return COMP_EVEX_DESC ; }
6984
70- // / Loop over all of the basic blocks, replacing EVEX instructions
71- // / by equivalent VEX instructions when possible for reducing code size.
7285 bool runOnMachineFunction (MachineFunction &MF) override ;
7386
7487 // This pass runs after regalloc and doesn't support VReg operands.
@@ -80,7 +93,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
8093
8194} // end anonymous namespace
8295
83- char EvexToVexInstPass ::ID = 0 ;
96+ char CompressEVEXPass ::ID = 0 ;
8497
8598static bool usesExtendedRegister (const MachineInstr &MI) {
8699 auto isHiRegIdx = [](unsigned Reg) {
@@ -112,8 +125,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
112125 return false ;
113126}
114127
115- static bool checkVEXInstPredicate (unsigned EvexOpc , const X86Subtarget &ST) {
116- switch (EvexOpc ) {
128+ static bool checkVEXInstPredicate (unsigned OldOpc , const X86Subtarget &ST) {
129+ switch (OldOpc ) {
117130 default :
118131 return true ;
119132 case X86::VCVTNEPS2BF16Z128rm:
@@ -151,15 +164,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
151164}
152165
153166// Do any custom cleanup needed to finalize the conversion.
154- static bool performCustomAdjustments (MachineInstr &MI, unsigned VexOpc ) {
155- (void )VexOpc ;
167+ static bool performCustomAdjustments (MachineInstr &MI, unsigned NewOpc ) {
168+ (void )NewOpc ;
156169 unsigned Opc = MI.getOpcode ();
157170 switch (Opc) {
158171 case X86::VALIGNDZ128rri:
159172 case X86::VALIGNDZ128rmi:
160173 case X86::VALIGNQZ128rri:
161174 case X86::VALIGNQZ128rmi: {
162- assert ((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
175+ assert ((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
163176 " Unexpected new opcode!" );
164177 unsigned Scale =
165178 (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4 ;
@@ -175,8 +188,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
175188 case X86::VSHUFI32X4Z256rri:
176189 case X86::VSHUFI64X2Z256rmi:
177190 case X86::VSHUFI64X2Z256rri: {
178- assert ((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
179- VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
191+ assert ((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
192+ NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
180193 " Unexpected new opcode!" );
181194 MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
182195 int64_t ImmVal = Imm.getImm ();
@@ -200,7 +213,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
200213 case X86::VRNDSCALESDZm_Int:
201214 case X86::VRNDSCALESSZr_Int:
202215 case X86::VRNDSCALESSZm_Int:
203- const MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands ()- 1 );
216+ const MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
204217 int64_t ImmVal = Imm.getImm ();
205218 // Ensure that only bits 3:0 of the immediate are used.
206219 if ((ImmVal & 0xf ) != ImmVal)
@@ -239,28 +252,28 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
239252 return false ;
240253
241254 // Use the VEX.L bit to select the 128 or 256-bit table.
242- ArrayRef<X86EvexToVexCompressTableEntry > Table =
255+ ArrayRef<X86CompressEVEXTableEntry > Table =
243256 (Desc.TSFlags & X86II::VEX_L) ? ArrayRef (X86EvexToVex256CompressTable)
244257 : ArrayRef (X86EvexToVex128CompressTable);
245258
246- unsigned EvexOpc = MI.getOpcode ();
247- const auto *I = llvm::lower_bound (Table, EvexOpc );
248- if (I == Table.end () || I->EvexOpc != EvexOpc )
259+ unsigned Opc = MI.getOpcode ();
260+ const auto *I = llvm::lower_bound (Table, Opc );
261+ if (I == Table.end () || I->OldOpc != Opc )
249262 return false ;
250263
251264 if (usesExtendedRegister (MI))
252265 return false ;
253- if (!checkVEXInstPredicate (EvexOpc , ST))
266+ if (!checkVEXInstPredicate (Opc , ST))
254267 return false ;
255- if (!performCustomAdjustments (MI, I->VexOpc ))
268+ if (!performCustomAdjustments (MI, I->NewOpc ))
256269 return false ;
257270
258- MI.setDesc (ST.getInstrInfo ()->get (I->VexOpc ));
271+ MI.setDesc (ST.getInstrInfo ()->get (I->NewOpc ));
259272 MI.setAsmPrinterFlag (X86::AC_EVEX_2_VEX);
260273 return true ;
261274}
262275
263- bool EvexToVexInstPass ::runOnMachineFunction (MachineFunction &MF) {
276+ bool CompressEVEXPass ::runOnMachineFunction (MachineFunction &MF) {
264277#ifndef NDEBUG
265278 // Make sure the tables are sorted.
266279 static std::atomic<bool > TableChecked (false );
@@ -289,8 +302,8 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
289302 return Changed;
290303}
291304
292- INITIALIZE_PASS (EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC , false , false )
305+ INITIALIZE_PASS (CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC , false , false )
293306
294- FunctionPass *llvm::createX86EvexToVexInsts () {
295- return new EvexToVexInstPass ();
307+ FunctionPass *llvm::createX86CompressEVEXPass () {
308+ return new CompressEVEXPass ();
296309}
0 commit comments