Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10809,39 +10809,27 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
if (!ST.hasSSE1())
return;

// PXOR is safe to use because it doesn't affect flags.
BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
BuildMI(MBB, Iter, DL, get(X86::V_SET0), Reg);
} else if (X86::VR256RegClass.contains(Reg)) {
// YMM#
if (!ST.hasAVX())
return;

// VPXOR is safe to use because it doesn't affect flags.
BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
BuildMI(MBB, Iter, DL, get(X86::AVX_SET0), Reg);
} else if (X86::VR512RegClass.contains(Reg)) {
// ZMM#
if (!ST.hasAVX512())
return;

// VPXORY is safe to use because it doesn't affect flags.
BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
BuildMI(MBB, Iter, DL, get(X86::AVX512_512_SET0), Reg);
} else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg)) {
if (!ST.hasVLX())
return;

// KXOR is safe to use because it doesn't affect flags.
unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
BuildMI(MBB, Iter, DL, get(Op), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
unsigned Op = ST.hasBWI() ? X86::KSET0Q : X86::KSET0W;
BuildMI(MBB, Iter, DL, get(Op), Reg);
}
}

Expand Down
216 changes: 216 additions & 0 deletions llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=SSE
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl,+avx512bw -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512BW

define void @zero_xmm(<4 x i32> %arg) #0 {
; SSE-LABEL: zero_xmm:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, 0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: zero_xmm:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps %xmm0, 0
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: zero_xmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps %xmm0, 0
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: retq
store <4 x i32> %arg, ptr null, align 32
ret void
}

define void @zero_ymm(<8 x i32> %arg) #0 {
; SSE-LABEL: zero_ymm:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm1, 16
; SSE-NEXT: movaps %xmm0, 0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: zero_ymm:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps %ymm0, 0
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-LABEL: zero_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps %ymm0, 0
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
store <8 x i32> %arg, ptr null, align 32
ret void
}

define void @zero_zmm(<16 x i32> %arg) #0 {
; SSE-LABEL: zero_zmm:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm3, 48
; SSE-NEXT: movaps %xmm2, 32
; SSE-NEXT: movaps %xmm1, 16
; SSE-NEXT: movaps %xmm0, 0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: xorps %xmm3, %xmm3
; SSE-NEXT: retq
;
; AVX-LABEL: zero_zmm:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps %ymm1, 32
; AVX-NEXT: vmovaps %ymm0, 0
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-LABEL: zero_zmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovups %zmm0, 0
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
store <16 x i32> %arg, ptr null, align 32
ret void
}

define void @zero_k(<8 x i32> %arg, <8 x i1> %mask) #0 {
; SSE-LABEL: zero_k:
; SSE: # %bb.0:
; SSE-NEXT: psllw $15, %xmm2
; SSE-NEXT: packsswb %xmm2, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
; SSE-NEXT: testb $1, %al
; SSE-NEXT: jne .LBB3_1
; SSE-NEXT: # %bb.2: # %else
; SSE-NEXT: testb $2, %al
; SSE-NEXT: jne .LBB3_3
; SSE-NEXT: .LBB3_4: # %else2
; SSE-NEXT: testb $4, %al
; SSE-NEXT: jne .LBB3_5
; SSE-NEXT: .LBB3_6: # %else4
; SSE-NEXT: testb $8, %al
; SSE-NEXT: jne .LBB3_7
; SSE-NEXT: .LBB3_8: # %else6
; SSE-NEXT: testb $16, %al
; SSE-NEXT: jne .LBB3_9
; SSE-NEXT: .LBB3_10: # %else8
; SSE-NEXT: testb $32, %al
; SSE-NEXT: jne .LBB3_11
; SSE-NEXT: .LBB3_12: # %else10
; SSE-NEXT: testb $64, %al
; SSE-NEXT: jne .LBB3_13
; SSE-NEXT: .LBB3_14: # %else12
; SSE-NEXT: testb $-128, %al
; SSE-NEXT: je .LBB3_16
; SSE-NEXT: .LBB3_15: # %cond.store13
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
; SSE-NEXT: movd %xmm0, 28
; SSE-NEXT: .LBB3_16: # %else14
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: retq
; SSE-NEXT: .LBB3_1: # %cond.store
; SSE-NEXT: movd %xmm0, 0
; SSE-NEXT: testb $2, %al
; SSE-NEXT: je .LBB3_4
; SSE-NEXT: .LBB3_3: # %cond.store1
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
; SSE-NEXT: movd %xmm2, 4
; SSE-NEXT: testb $4, %al
; SSE-NEXT: je .LBB3_6
; SSE-NEXT: .LBB3_5: # %cond.store3
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; SSE-NEXT: movd %xmm2, 8
; SSE-NEXT: testb $8, %al
; SSE-NEXT: je .LBB3_8
; SSE-NEXT: .LBB3_7: # %cond.store5
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE-NEXT: movd %xmm0, 12
; SSE-NEXT: testb $16, %al
; SSE-NEXT: je .LBB3_10
; SSE-NEXT: .LBB3_9: # %cond.store7
; SSE-NEXT: movd %xmm1, 16
; SSE-NEXT: testb $32, %al
; SSE-NEXT: je .LBB3_12
; SSE-NEXT: .LBB3_11: # %cond.store9
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
; SSE-NEXT: movd %xmm0, 20
; SSE-NEXT: testb $64, %al
; SSE-NEXT: je .LBB3_14
; SSE-NEXT: .LBB3_13: # %cond.store11
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE-NEXT: movd %xmm0, 24
; SSE-NEXT: testb $-128, %al
; SSE-NEXT: jne .LBB3_15
; SSE-NEXT: jmp .LBB3_16
;
; AVX1-LABEL: zero_k:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vmaskmovps %ymm0, %ymm1, 0
; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: zero_k:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
; AVX2-NEXT: vpmaskmovd %ymm0, %ymm1, 0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: zero_k:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpmovsxwd %xmm1, %ymm1
; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-NEXT: vmovdqa32 %ymm0, 0 {%k1}
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: kxorw %k0, %k0, %k1
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: zero_k:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovw2m %xmm1, %k1
; AVX512BW-NEXT: vmovdqa32 %ymm0, 0 {%k1}
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: kxorq %k0, %k0, %k1
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr null, i32 32, <8 x i1> %mask)
ret void
}

attributes #0 = { "zero-call-used-regs"="used" }
Loading