Skip to content

Commit d148407

Browse files
[X86][AVX512] Add pseudos for AVX512_*_SETALLONES (#169009)
Introduce `AVX512_128_SETALLONES`, `AVX512_256_SETALLONES` pseudos to generate all-ones vectors. Post-RA expansion: - Use VEX vpcmpeqd for XMM/YMM0–15 when available (matches current codegen as `AVX512_128/256_SETALLONES` will be preferred over `AVX1/2_SETALLONES` for AVX512VL target). - Use EVEX `vpternlogd imm=0xFF` for high regs. Includes MIR tests for both VEX and EVEX paths.
1 parent a27842c commit d148407

File tree

5 files changed

+95
-2
lines changed

5 files changed

+95
-2
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
300300
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
301301
def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
302302
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
303+
let AddedComplexity = 1, Predicates = [HasVLX] in {
304+
def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins),
305+
"", [(set VR128X:$dst, (v4i32 immAllOnesV))]>;
306+
def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins),
307+
"", [(set VR256X:$dst, (v8i32 immAllOnesV))]>;
308+
}
303309
}
304310

305311
let Predicates = [HasAVX512] in {

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,8 @@ bool X86InstrInfo::isReMaterializableImpl(
778778
case X86::AVX512_128_SET0:
779779
case X86::AVX512_256_SET0:
780780
case X86::AVX512_512_SET0:
781+
case X86::AVX512_128_SETALLONES:
782+
case X86::AVX512_256_SETALLONES:
781783
case X86::AVX512_512_SETALLONES:
782784
case X86::AVX512_FsFLD0SD:
783785
case X86::AVX512_FsFLD0SH:
@@ -6246,9 +6248,31 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
62466248
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
62476249
return true;
62486250
}
6251+
case X86::AVX512_128_SETALLONES:
6252+
case X86::AVX512_256_SETALLONES:
62496253
case X86::AVX512_512_SETALLONES: {
62506254
Register Reg = MIB.getReg(0);
6251-
MIB->setDesc(get(X86::VPTERNLOGDZrri));
6255+
unsigned Opc;
6256+
switch (MI.getOpcode()) {
6257+
case X86::AVX512_128_SETALLONES: {
6258+
if (X86::VR128RegClass.contains(Reg))
6259+
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDrr));
6260+
6261+
Opc = X86::VPTERNLOGDZ128rri;
6262+
break;
6263+
}
6264+
case X86::AVX512_256_SETALLONES: {
6265+
if (X86::VR256RegClass.contains(Reg))
6266+
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
6267+
6268+
Opc = X86::VPTERNLOGDZ256rri;
6269+
break;
6270+
}
6271+
case X86::AVX512_512_SETALLONES:
6272+
Opc = X86::VPTERNLOGDZrri;
6273+
break;
6274+
}
6275+
MIB->setDesc(get(Opc));
62526276
// VPTERNLOGD needs 3 register inputs and an immediate.
62536277
// 0xff will return 1s for any input.
62546278
MIB.addReg(Reg, RegState::Undef)
@@ -8190,13 +8214,15 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
81908214
case X86::AVX1_SETALLONES:
81918215
case X86::AVX_SET0:
81928216
case X86::AVX512_256_SET0:
8217+
case X86::AVX512_256_SETALLONES:
81938218
Alignment = Align(32);
81948219
break;
81958220
case X86::V_SET0:
81968221
case X86::V_SETALLONES:
81978222
case X86::AVX512_128_SET0:
81988223
case X86::FsFLD0F128:
81998224
case X86::AVX512_FsFLD0F128:
8225+
case X86::AVX512_128_SETALLONES:
82008226
Alignment = Align(16);
82018227
break;
82028228
case X86::MMX_SET0:
@@ -8255,6 +8281,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
82558281
case X86::AVX512_128_SET0:
82568282
case X86::AVX512_256_SET0:
82578283
case X86::AVX512_512_SET0:
8284+
case X86::AVX512_128_SETALLONES:
8285+
case X86::AVX512_256_SETALLONES:
82588286
case X86::AVX512_512_SETALLONES:
82598287
case X86::FsFLD0SH:
82608288
case X86::AVX512_FsFLD0SH:
@@ -8315,6 +8343,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
83158343
break;
83168344
case X86::AVX1_SETALLONES:
83178345
case X86::AVX2_SETALLONES:
8346+
case X86::AVX512_256_SETALLONES:
83188347
IsAllOnes = true;
83198348
[[fallthrough]];
83208349
case X86::AVX512_256_SET0:
@@ -8328,6 +8357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
83288357
2);
83298358
break;
83308359
case X86::V_SETALLONES:
8360+
case X86::AVX512_128_SETALLONES:
83318361
IsAllOnes = true;
83328362
[[fallthrough]];
83338363
case X86::V_SET0:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc %s -mtriple=i386-- -start-before=postrapseudos -o - | FileCheck %s
3+
4+
--- |
5+
target triple = "i386-unknown-unknown"
6+
7+
define void @setallones() #0 {
8+
; CHECK-LABEL: setallones:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11+
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
12+
entry:
13+
unreachable
14+
}
15+
16+
attributes #0 = { "target-features"="+avx512f,+avx512vl" }
17+
---
18+
name: setallones
19+
tracksRegLiveness: true
20+
liveins: []
21+
body: |
22+
bb.0:
23+
$xmm0 = AVX512_128_SETALLONES
24+
$ymm1 = AVX512_256_SETALLONES
25+
26+
...
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc %s -mtriple=x86_64-- -start-before=postrapseudos -o - | FileCheck %s
3+
4+
--- |
5+
target triple = "x86_64-unknown-unknown"
6+
7+
define void @setallones() #0 {
8+
; CHECK-LABEL: setallones:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vpcmpeqd %xmm14, %xmm14, %xmm14
11+
; CHECK-NEXT: vpternlogd {{.*#+}} xmm16 = -1
12+
; CHECK-NEXT: vpcmpeqd %ymm15, %ymm15, %ymm15
13+
; CHECK-NEXT: vpternlogd {{.*#+}} ymm17 = -1
14+
entry:
15+
unreachable
16+
}
17+
18+
attributes #0 = { "target-features"="+avx512f,+avx512vl" }
19+
---
20+
name: setallones
21+
tracksRegLiveness: true
22+
liveins: []
23+
body: |
24+
bb.0:
25+
$xmm14 = AVX512_128_SETALLONES
26+
$xmm16 = AVX512_128_SETALLONES
27+
$ymm15 = AVX512_256_SETALLONES
28+
$ymm17 = AVX512_256_SETALLONES
29+
30+
...

llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
106106
; AVX512: # %bb.0:
107107
; AVX512-NEXT: subq $24, %rsp
108108
; AVX512-NEXT: .cfi_def_cfa_offset 32
109-
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
109+
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
110+
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
110111
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
111112
; AVX512-NEXT: callq use.v4.i32@PLT
112113
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload

0 commit comments

Comments
 (0)