Skip to content

Commit bd10f49

Browse files
committed
[CodeGenPrepare][X86] hasMultipleConditionRegisters - don't sink larger than legal integer comparisons
x86 will expand large integer comparisons - we're better off just storing the result instead of recomputing the comparison split across multiple registers. I've tweaked hasMultipleConditionRegisters to (optionally) take the CmpInst operand type as well as the result type. I don't like the way that X86 and other targets are overriding hasMultipleConditionRegisters just to avoid comparison sinking, but I'm not sure if there's a better approach to take - especially as shouldNormalizeToSelectSequence uses it as well. Comments welcome! Fixes #166534
1 parent 438a18c commit bd10f49

File tree

10 files changed

+81
-113
lines changed

10 files changed

+81
-113
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,10 @@ class LLVM_ABI TargetLoweringBase {
527527
/// and conditional branches. With multiple condition registers, the code
528528
/// generator will not aggressively sink comparisons into the blocks of their
529529
/// users.
530-
virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }
530+
virtual bool hasMultipleConditionRegisters(EVT ResVT,
531+
std::optional<EVT> CmpVT) const {
532+
return false;
533+
}
531534

532535
/// Return true if the target has BitExtract instructions.
533536
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
@@ -2493,7 +2496,7 @@ class LLVM_ABI TargetLoweringBase {
24932496
EVT VT) const {
24942497
// If a target has multiple condition registers, then it likely has logical
24952498
// operations on those registers.
2496-
if (hasMultipleConditionRegisters(VT))
2499+
if (hasMultipleConditionRegisters(VT, std::nullopt))
24972500
return false;
24982501
// Only do the transform if the value won't be split into multiple
24992502
// registers.

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,11 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
18401840
///
18411841
/// Return true if any changes are made.
18421842
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1843-
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1843+
std::optional<EVT> CmpVT;
1844+
if (Cmp->getOperand(0)->getType()->isIntegerTy())
1845+
CmpVT = EVT::getEVT(Cmp->getOperand(0)->getType());
1846+
1847+
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()), CmpVT))
18441848
return false;
18451849

18461850
// Avoid sinking soft-FP comparisons, since this can move them into a loop.

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -915,8 +915,9 @@ class AArch64TargetLowering : public TargetLowering {
915915
return VecOp.getOpcode() == ISD::SETCC;
916916
}
917917

918-
bool hasMultipleConditionRegisters(EVT VT) const override {
919-
return VT.isScalableVector();
918+
bool hasMultipleConditionRegisters(EVT ResVT,
919+
std::optional<EVT> CmpVT) const override {
920+
return ResVT.isScalableVector();
920921
}
921922
};
922923

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ class AMDGPUTargetLowering : public TargetLowering {
395395
return MVT::i32;
396396
}
397397

398-
bool hasMultipleConditionRegisters(EVT VT) const override {
398+
bool hasMultipleConditionRegisters(EVT ResVT,
399+
std::optional<EVT> CmpVT) const override {
399400
// FIXME: This is only partially true. If we have to do vector compares, any
400401
// SGPR pair can be a condition register. If we have a uniform condition, we
401402
// are better off doing SALU operations, where there is only one SCC. For

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20116,6 +20116,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2011620116
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
2011720117
}
2011820118

20119-
bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
20119+
bool PPCTargetLowering::hasMultipleConditionRegisters(
20120+
EVT ResVT, std::optional<EVT> CmpVT) const {
2012020121
return Subtarget.useCRBits();
2012120122
}

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,8 @@ namespace llvm {
11941194
bool IsVarArg) const;
11951195
bool supportsTailCallFor(const CallBase *CB) const;
11961196

1197-
bool hasMultipleConditionRegisters(EVT VT) const override;
1197+
bool hasMultipleConditionRegisters(EVT ResVT,
1198+
std::optional<EVT> CmpVT) const override;
11981199

11991200
private:
12001201
struct ReuseLoadInfo {

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3403,6 +3403,14 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
34033403
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
34043404
}
34053405

3406+
bool X86TargetLowering::hasMultipleConditionRegisters(
3407+
EVT ResVT, std::optional<EVT> CmpVT) const {
3408+
if (CmpVT.has_value())
3409+
return CmpVT->isScalarInteger() &&
3410+
CmpVT->getSizeInBits() > (Subtarget.is64Bit() ? 64 : 32);
3411+
return TargetLowering::hasMultipleConditionRegisters(ResVT, CmpVT);
3412+
}
3413+
34063414
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
34073415
bool) const {
34083416
// TODO: Allow vectors?

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,6 +1542,11 @@ namespace llvm {
15421542
/// supported.
15431543
bool shouldScalarizeBinop(SDValue) const override;
15441544

1545+
/// If returns true the code generator will not aggressively sink
1546+
/// comparisons into the blocks of their users.
1547+
bool hasMultipleConditionRegisters(EVT ResVT,
1548+
std::optional<EVT> CmpVT) const override;
1549+
15451550
/// Extract of a scalar FP value from index 0 of a vector is free.
15461551
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
15471552
EVT EltVT = VT.getScalarType();

llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
3434
; CHECK-NEXT: .cfi_offset %edi, -16
3535
; CHECK-NEXT: .cfi_offset %ebx, -12
3636
; CHECK-NEXT: xorl %eax, %eax
37-
; CHECK-NEXT: xorl %edi, %edi
37+
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
3838
; CHECK-NEXT: testb %al, %al
39-
; CHECK-NEXT: Ltmp0:
40-
; CHECK-NEXT: ## implicit-def: $ebx
39+
; CHECK-NEXT: Ltmp0: ## EH_LABEL
40+
; CHECK-NEXT: ## implicit-def: $edi
4141
; CHECK-NEXT: calll __Znam
42-
; CHECK-NEXT: Ltmp1:
42+
; CHECK-NEXT: Ltmp1: ## EH_LABEL
4343
; CHECK-NEXT: ## %bb.1: ## %bb11
4444
; CHECK-NEXT: movl %eax, %esi
45-
; CHECK-NEXT: movb $1, %al
46-
; CHECK-NEXT: testb %al, %al
45+
; CHECK-NEXT: movb $1, %bl
46+
; CHECK-NEXT: testb %bl, %bl
4747
; CHECK-NEXT: jne LBB0_2
4848
; CHECK-NEXT: ## %bb.7: ## %bb31
4949
; CHECK-NEXT: ## implicit-def: $eax
@@ -53,23 +53,20 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
5353
; CHECK-NEXT: ## Child Loop BB0_13 Depth 2
5454
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
5555
; CHECK-NEXT: ## Child Loop BB0_21 Depth 2
56-
; CHECK-NEXT: movb $1, %al
57-
; CHECK-NEXT: testb %al, %al
56+
; CHECK-NEXT: testb %bl, %bl
5857
; CHECK-NEXT: jne LBB0_9
5958
; CHECK-NEXT: ## %bb.10: ## %bb41
6059
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
61-
; CHECK-NEXT: Ltmp2:
60+
; CHECK-NEXT: Ltmp2: ## EH_LABEL
6261
; CHECK-NEXT: xorl %eax, %eax
6362
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
6463
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
6564
; CHECK-NEXT: movl %esi, (%esp)
6665
; CHECK-NEXT: calll _Pjii
67-
; CHECK-NEXT: Ltmp3:
66+
; CHECK-NEXT: Ltmp3: ## EH_LABEL
6867
; CHECK-NEXT: ## %bb.11: ## %bb42
6968
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
70-
; CHECK-NEXT: xorl %eax, %eax
71-
; CHECK-NEXT: decl %eax
72-
; CHECK-NEXT: testl %eax, %eax
69+
; CHECK-NEXT: testb %bl, %bl
7370
; CHECK-NEXT: jne LBB0_18
7471
; CHECK-NEXT: ## %bb.12: ## %bb45.preheader
7572
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
@@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
7875
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
7976
; CHECK-NEXT: ## => This Loop Header: Depth=2
8077
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
81-
; CHECK-NEXT: movb $1, %cl
82-
; CHECK-NEXT: testb %cl, %cl
78+
; CHECK-NEXT: testb %bl, %bl
8379
; CHECK-NEXT: jne LBB0_19
8480
; CHECK-NEXT: ## %bb.14: ## %bb48
8581
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
@@ -88,14 +84,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
8884
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
8985
; CHECK-NEXT: xorl %ecx, %ecx
9086
; CHECK-NEXT: movl %esi, %edx
91-
; CHECK-NEXT: movl %edi, %ebx
87+
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
9288
; CHECK-NEXT: LBB0_16: ## %bb49
9389
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
9490
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2
9591
; CHECK-NEXT: ## => This Inner Loop Header: Depth=3
9692
; CHECK-NEXT: incl %ecx
9793
; CHECK-NEXT: addl $4, %edx
98-
; CHECK-NEXT: decl %ebx
94+
; CHECK-NEXT: decl %edi
9995
; CHECK-NEXT: jne LBB0_16
10096
; CHECK-NEXT: LBB0_17: ## %bb57
10197
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
@@ -107,13 +103,12 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
107103
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
108104
; CHECK-NEXT: movl $0, (%esp)
109105
; CHECK-NEXT: calll ___bzero
110-
; CHECK-NEXT: movb $1, %al
111-
; CHECK-NEXT: testb %al, %al
106+
; CHECK-NEXT: testb %bl, %bl
112107
; CHECK-NEXT: jne LBB0_22
113108
; CHECK-NEXT: ## %bb.20: ## %bb61.preheader
114109
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
115110
; CHECK-NEXT: movl %esi, %eax
116-
; CHECK-NEXT: movl %edi, %ecx
111+
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
117112
; CHECK-NEXT: LBB0_21: ## %bb61
118113
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
119114
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
@@ -126,32 +121,32 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
126121
; CHECK-NEXT: decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
127122
; CHECK-NEXT: jmp LBB0_8
128123
; CHECK-NEXT: LBB0_18: ## %bb43
129-
; CHECK-NEXT: Ltmp5:
130-
; CHECK-NEXT: movl %esi, %ebx
124+
; CHECK-NEXT: Ltmp5: ## EH_LABEL
125+
; CHECK-NEXT: movl %esi, %edi
131126
; CHECK-NEXT: calll _OnOverFlow
132-
; CHECK-NEXT: Ltmp6:
127+
; CHECK-NEXT: Ltmp6: ## EH_LABEL
133128
; CHECK-NEXT: jmp LBB0_3
134129
; CHECK-NEXT: LBB0_2: ## %bb29
135-
; CHECK-NEXT: Ltmp7:
136-
; CHECK-NEXT: movl %esi, %ebx
130+
; CHECK-NEXT: Ltmp7: ## EH_LABEL
131+
; CHECK-NEXT: movl %esi, %edi
137132
; CHECK-NEXT: calll _OnOverFlow
138-
; CHECK-NEXT: Ltmp8:
133+
; CHECK-NEXT: Ltmp8: ## EH_LABEL
139134
; CHECK-NEXT: LBB0_3: ## %bb30
140135
; CHECK-NEXT: ud2
141136
; CHECK-NEXT: LBB0_4: ## %bb20.loopexit
142-
; CHECK-NEXT: Ltmp4:
137+
; CHECK-NEXT: Ltmp4: ## EH_LABEL
143138
; CHECK-NEXT: LBB0_9:
144-
; CHECK-NEXT: movl %esi, %ebx
139+
; CHECK-NEXT: movl %esi, %edi
145140
; CHECK-NEXT: LBB0_6: ## %bb23
146-
; CHECK-NEXT: testl %ebx, %ebx
141+
; CHECK-NEXT: testl %edi, %edi
147142
; CHECK-NEXT: addl $28, %esp
148143
; CHECK-NEXT: popl %esi
149144
; CHECK-NEXT: popl %edi
150145
; CHECK-NEXT: popl %ebx
151146
; CHECK-NEXT: popl %ebp
152147
; CHECK-NEXT: retl
153148
; CHECK-NEXT: LBB0_5: ## %bb20.loopexit.split-lp
154-
; CHECK-NEXT: Ltmp9:
149+
; CHECK-NEXT: Ltmp9: ## EH_LABEL
155150
; CHECK-NEXT: jmp LBB0_6
156151
; CHECK-NEXT: Lfunc_end0:
157152
bb:

llvm/test/CodeGen/X86/pr166534.ll

Lines changed: 24 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,57 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2
33
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4
4-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
5-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
66

77
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
88
; SSE2-LABEL: pr166534:
99
; SSE2: # %bb.0: # %entry
10-
; SSE2-NEXT: movq (%rdi), %rax
11-
; SSE2-NEXT: movq 8(%rdi), %r8
1210
; SSE2-NEXT: movdqu (%rdi), %xmm0
13-
; SSE2-NEXT: movq (%rsi), %r9
14-
; SSE2-NEXT: movq 8(%rsi), %rdi
1511
; SSE2-NEXT: movdqu (%rsi), %xmm1
1612
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1713
; SSE2-NEXT: pmovmskb %xmm1, %esi
18-
; SSE2-NEXT: xorl %r10d, %r10d
14+
; SSE2-NEXT: xorl %eax, %eax
1915
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
20-
; SSE2-NEXT: sete %r10b
21-
; SSE2-NEXT: orq %r10, (%rdx)
16+
; SSE2-NEXT: sete %al
17+
; SSE2-NEXT: orq %rax, (%rdx)
2218
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
2319
; SSE2-NEXT: jne .LBB0_2
2420
; SSE2-NEXT: # %bb.1: # %if.then
25-
; SSE2-NEXT: xorq %r9, %rax
26-
; SSE2-NEXT: xorq %rdi, %r8
27-
; SSE2-NEXT: xorl %edx, %edx
28-
; SSE2-NEXT: orq %rax, %r8
29-
; SSE2-NEXT: sete %dl
30-
; SSE2-NEXT: orq %rdx, (%rcx)
21+
; SSE2-NEXT: orq %rax, (%rcx)
3122
; SSE2-NEXT: .LBB0_2: # %if.end
3223
; SSE2-NEXT: retq
3324
;
3425
; SSE4-LABEL: pr166534:
3526
; SSE4: # %bb.0: # %entry
36-
; SSE4-NEXT: movq (%rdi), %rax
37-
; SSE4-NEXT: movq 8(%rdi), %r8
3827
; SSE4-NEXT: movdqu (%rdi), %xmm0
39-
; SSE4-NEXT: movq (%rsi), %r9
40-
; SSE4-NEXT: movq 8(%rsi), %rdi
4128
; SSE4-NEXT: movdqu (%rsi), %xmm1
4229
; SSE4-NEXT: pxor %xmm0, %xmm1
43-
; SSE4-NEXT: xorl %esi, %esi
30+
; SSE4-NEXT: xorl %eax, %eax
4431
; SSE4-NEXT: ptest %xmm1, %xmm1
45-
; SSE4-NEXT: sete %sil
46-
; SSE4-NEXT: orq %rsi, (%rdx)
32+
; SSE4-NEXT: sete %al
33+
; SSE4-NEXT: orq %rax, (%rdx)
4734
; SSE4-NEXT: ptest %xmm1, %xmm1
4835
; SSE4-NEXT: jne .LBB0_2
4936
; SSE4-NEXT: # %bb.1: # %if.then
50-
; SSE4-NEXT: xorq %r9, %rax
51-
; SSE4-NEXT: xorq %rdi, %r8
52-
; SSE4-NEXT: xorl %edx, %edx
53-
; SSE4-NEXT: orq %rax, %r8
54-
; SSE4-NEXT: sete %dl
55-
; SSE4-NEXT: orq %rdx, (%rcx)
37+
; SSE4-NEXT: orq %rax, (%rcx)
5638
; SSE4-NEXT: .LBB0_2: # %if.end
5739
; SSE4-NEXT: retq
5840
;
59-
; AVX2-LABEL: pr166534:
60-
; AVX2: # %bb.0: # %entry
61-
; AVX2-NEXT: movq (%rdi), %rax
62-
; AVX2-NEXT: movq 8(%rdi), %r8
63-
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
64-
; AVX2-NEXT: movq (%rsi), %rdi
65-
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
66-
; AVX2-NEXT: movq 8(%rsi), %rsi
67-
; AVX2-NEXT: xorl %r9d, %r9d
68-
; AVX2-NEXT: vptest %xmm0, %xmm0
69-
; AVX2-NEXT: sete %r9b
70-
; AVX2-NEXT: orq %r9, (%rdx)
71-
; AVX2-NEXT: vptest %xmm0, %xmm0
72-
; AVX2-NEXT: jne .LBB0_2
73-
; AVX2-NEXT: # %bb.1: # %if.then
74-
; AVX2-NEXT: xorq %rdi, %rax
75-
; AVX2-NEXT: xorq %rsi, %r8
76-
; AVX2-NEXT: xorl %edx, %edx
77-
; AVX2-NEXT: orq %rax, %r8
78-
; AVX2-NEXT: sete %dl
79-
; AVX2-NEXT: orq %rdx, (%rcx)
80-
; AVX2-NEXT: .LBB0_2: # %if.end
81-
; AVX2-NEXT: retq
82-
;
83-
; AVX512-LABEL: pr166534:
84-
; AVX512: # %bb.0: # %entry
85-
; AVX512-NEXT: movq (%rdi), %rax
86-
; AVX512-NEXT: movq 8(%rdi), %r8
87-
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
88-
; AVX512-NEXT: movq (%rsi), %r9
89-
; AVX512-NEXT: movq 8(%rsi), %rdi
90-
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
91-
; AVX512-NEXT: xorl %esi, %esi
92-
; AVX512-NEXT: vptest %xmm0, %xmm0
93-
; AVX512-NEXT: sete %sil
94-
; AVX512-NEXT: orq %rsi, (%rdx)
95-
; AVX512-NEXT: vptest %xmm0, %xmm0
96-
; AVX512-NEXT: jne .LBB0_2
97-
; AVX512-NEXT: # %bb.1: # %if.then
98-
; AVX512-NEXT: xorq %r9, %rax
99-
; AVX512-NEXT: xorq %rdi, %r8
100-
; AVX512-NEXT: xorl %edx, %edx
101-
; AVX512-NEXT: orq %rax, %r8
102-
; AVX512-NEXT: sete %dl
103-
; AVX512-NEXT: orq %rdx, (%rcx)
104-
; AVX512-NEXT: .LBB0_2: # %if.end
105-
; AVX512-NEXT: retq
41+
; AVX-LABEL: pr166534:
42+
; AVX: # %bb.0: # %entry
43+
; AVX-NEXT: vmovdqu (%rdi), %xmm0
44+
; AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
45+
; AVX-NEXT: xorl %eax, %eax
46+
; AVX-NEXT: vptest %xmm0, %xmm0
47+
; AVX-NEXT: sete %al
48+
; AVX-NEXT: orq %rax, (%rdx)
49+
; AVX-NEXT: vptest %xmm0, %xmm0
50+
; AVX-NEXT: jne .LBB0_2
51+
; AVX-NEXT: # %bb.1: # %if.then
52+
; AVX-NEXT: orq %rax, (%rcx)
53+
; AVX-NEXT: .LBB0_2: # %if.end
54+
; AVX-NEXT: retq
10655
entry:
10756
%a = load i128, ptr %pa, align 8
10857
%b = load i128, ptr %pb, align 8

0 commit comments

Comments
 (0)