Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 559a395

Browse files
committed
[SelectionDAG] Support 'bit preserving' floating points bitcasts on computeKnownBits/ComputeNumSignBits
For cases where we know the floating point representations match the bitcasted integer equivalent, allow bitcasting to these types. This is especially useful for the X86 floating point compare results which return all/zero bits but as a floating point type. Differential Revision: https://reviews.llvm.org/D39289 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316831 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 248c60f commit 559a395

File tree

8 files changed

+355
-459
lines changed

8 files changed

+355
-459
lines changed

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
20962096
Known.Zero = ~Known.One;
20972097
return;
20982098
}
2099+
if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
2100+
// We know all of the bits for a constant fp!
2101+
Known.One = C->getValueAPF().bitcastToAPInt();
2102+
Known.Zero = ~Known.One;
2103+
return;
2104+
}
20992105

21002106
if (Depth == 6)
21012107
return; // Limit search depth.
@@ -2219,10 +2225,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
22192225
}
22202226
case ISD::BITCAST: {
22212227
SDValue N0 = Op.getOperand(0);
2222-
unsigned SubBitWidth = N0.getScalarValueSizeInBits();
2228+
EVT SubVT = N0.getValueType();
2229+
unsigned SubBitWidth = SubVT.getScalarSizeInBits();
22232230

2224-
// Ignore bitcasts from floating point.
2225-
if (!N0.getValueType().isInteger())
2231+
// Ignore bitcasts from unsupported types.
2232+
if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
22262233
break;
22272234

22282235
// Fast handling of 'identity' bitcasts.
@@ -2960,7 +2967,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
29602967
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
29612968
unsigned Depth) const {
29622969
EVT VT = Op.getValueType();
2963-
assert(VT.isInteger() && "Invalid VT!");
2970+
assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
29642971
unsigned VTBits = VT.getScalarSizeInBits();
29652972
unsigned NumElts = DemandedElts.getBitWidth();
29662973
unsigned Tmp, Tmp2;
@@ -3041,10 +3048,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
30413048

30423049
case ISD::BITCAST: {
30433050
SDValue N0 = Op.getOperand(0);
3044-
unsigned SrcBits = N0.getScalarValueSizeInBits();
3051+
EVT SrcVT = N0.getValueType();
3052+
unsigned SrcBits = SrcVT.getScalarSizeInBits();
30453053

3046-
// Ignore bitcasts from floating point.
3047-
if (!N0.getValueType().isInteger())
3054+
// Ignore bitcasts from unsupported types..
3055+
if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
30483056
break;
30493057

30503058
// Fast handling of 'identity' bitcasts.

test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
12451245
; KNL: ## BB#0:
12461246
; KNL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
12471247
; KNL-NEXT: vpmovsxdq %xmm0, %xmm0
1248-
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1248+
; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
12491249
; KNL-NEXT: retq
12501250
;
12511251
; SKX-LABEL: test46:

test/CodeGen/X86/bitcast-and-setcc-256.ll

Lines changed: 13 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -274,50 +274,19 @@ define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
274274
}
275275

276276
define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
277-
; SSE2-LABEL: v8f32:
278-
; SSE2: # BB#0:
279-
; SSE2-NEXT: cmpltps %xmm1, %xmm3
280-
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
281-
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
282-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
283-
; SSE2-NEXT: cmpltps %xmm0, %xmm2
284-
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
285-
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
286-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
287-
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
288-
; SSE2-NEXT: cmpltps %xmm5, %xmm7
289-
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm7[0,2,2,3,4,5,6,7]
290-
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
291-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
292-
; SSE2-NEXT: cmpltps %xmm4, %xmm6
293-
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[0,2,2,3,4,5,6,7]
294-
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
295-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
296-
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
297-
; SSE2-NEXT: pand %xmm0, %xmm2
298-
; SSE2-NEXT: packsswb %xmm0, %xmm2
299-
; SSE2-NEXT: pmovmskb %xmm2, %eax
300-
; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
301-
; SSE2-NEXT: ret{{[l|q]}}
302-
;
303-
; SSSE3-LABEL: v8f32:
304-
; SSSE3: # BB#0:
305-
; SSSE3-NEXT: cmpltps %xmm1, %xmm3
306-
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
307-
; SSSE3-NEXT: pshufb %xmm1, %xmm3
308-
; SSSE3-NEXT: cmpltps %xmm0, %xmm2
309-
; SSSE3-NEXT: pshufb %xmm1, %xmm2
310-
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
311-
; SSSE3-NEXT: cmpltps %xmm5, %xmm7
312-
; SSSE3-NEXT: pshufb %xmm1, %xmm7
313-
; SSSE3-NEXT: cmpltps %xmm4, %xmm6
314-
; SSSE3-NEXT: pshufb %xmm1, %xmm6
315-
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
316-
; SSSE3-NEXT: pand %xmm2, %xmm6
317-
; SSSE3-NEXT: packsswb %xmm0, %xmm6
318-
; SSSE3-NEXT: pmovmskb %xmm6, %eax
319-
; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
320-
; SSSE3-NEXT: ret{{[l|q]}}
277+
; SSE2-SSSE3-LABEL: v8f32:
278+
; SSE2-SSSE3: # BB#0:
279+
; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
280+
; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
281+
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
282+
; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
283+
; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
284+
; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
285+
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
286+
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
287+
; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
288+
; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
289+
; SSE2-SSSE3-NEXT: ret{{[l|q]}}
321290
;
322291
; AVX12-LABEL: v8f32:
323292
; AVX12: # BB#0:

test/CodeGen/X86/bitcast-and-setcc-512.ll

Lines changed: 30 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -135,40 +135,39 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
135135
define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
136136
; SSE-LABEL: v8f64:
137137
; SSE: # BB#0:
138+
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
138139
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
139140
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
140-
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
141141
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
142-
; SSE-NEXT: cmpltpd %xmm3, %xmm7
143-
; SSE-NEXT: cmpltpd %xmm2, %xmm6
144-
; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
145-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
146-
; SSE-NEXT: pshufb %xmm2, %xmm6
147142
; SSE-NEXT: cmpltpd %xmm1, %xmm5
148143
; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
149144
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
150145
; SSE-NEXT: cmpltpd %xmm0, %xmm4
151146
; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
152147
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
153148
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
154-
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
149+
; SSE-NEXT: cmpltpd %xmm3, %xmm7
150+
; SSE-NEXT: cmpltpd %xmm2, %xmm6
151+
; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
152+
; SSE-NEXT: packssdw %xmm6, %xmm6
153+
; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm0[0,1,2,3],xmm6[4,5,6,7]
155154
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
156-
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
157-
; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
158-
; SSE-NEXT: pshufb %xmm2, %xmm8
155+
; SSE-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2,2,3]
156+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm11[0,2,2,3,4,5,6,7]
159157
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
160158
; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
161159
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
160+
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
162161
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
163-
; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
164-
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[0,2,2,3,4,5,6,7]
165-
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
166-
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
167-
; SSE-NEXT: pand %xmm0, %xmm2
168-
; SSE-NEXT: psllw $15, %xmm2
169-
; SSE-NEXT: psraw $15, %xmm2
170-
; SSE-NEXT: packsswb %xmm0, %xmm2
171-
; SSE-NEXT: pmovmskb %xmm2, %eax
162+
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
163+
; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
164+
; SSE-NEXT: packssdw %xmm8, %xmm8
165+
; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm1[0,1,2,3],xmm8[4,5,6,7]
166+
; SSE-NEXT: pand %xmm6, %xmm8
167+
; SSE-NEXT: psllw $15, %xmm8
168+
; SSE-NEXT: psraw $15, %xmm8
169+
; SSE-NEXT: packsswb %xmm0, %xmm8
170+
; SSE-NEXT: pmovmskb %xmm8, %eax
172171
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
173172
; SSE-NEXT: ret{{[l|q]}}
174173
;
@@ -718,37 +717,23 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x floa
718717
; SSE-LABEL: v16f32:
719718
; SSE: # BB#0:
720719
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
721-
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
722720
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
721+
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
723722
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
724723
; SSE-NEXT: cmpltps %xmm3, %xmm7
725-
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
726-
; SSE-NEXT: pshufb %xmm3, %xmm7
727724
; SSE-NEXT: cmpltps %xmm2, %xmm6
728-
; SSE-NEXT: pshufb %xmm3, %xmm6
729-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
730-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
731-
; SSE-NEXT: pshufb %xmm2, %xmm6
725+
; SSE-NEXT: packssdw %xmm7, %xmm6
732726
; SSE-NEXT: cmpltps %xmm1, %xmm5
733-
; SSE-NEXT: pshufb %xmm3, %xmm5
734727
; SSE-NEXT: cmpltps %xmm0, %xmm4
735-
; SSE-NEXT: pshufb %xmm3, %xmm4
736-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
737-
; SSE-NEXT: pshufb %xmm2, %xmm4
738-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
728+
; SSE-NEXT: packssdw %xmm5, %xmm4
729+
; SSE-NEXT: packsswb %xmm6, %xmm4
739730
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
740-
; SSE-NEXT: pshufb %xmm3, %xmm11
741-
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
742-
; SSE-NEXT: pshufb %xmm3, %xmm9
743-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
744-
; SSE-NEXT: pshufb %xmm2, %xmm9
745731
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
746-
; SSE-NEXT: pshufb %xmm3, %xmm10
732+
; SSE-NEXT: packssdw %xmm11, %xmm10
733+
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
747734
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
748-
; SSE-NEXT: pshufb %xmm3, %xmm8
749-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
750-
; SSE-NEXT: pshufb %xmm2, %xmm8
751-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
735+
; SSE-NEXT: packssdw %xmm9, %xmm8
736+
; SSE-NEXT: packsswb %xmm10, %xmm8
752737
; SSE-NEXT: pand %xmm4, %xmm8
753738
; SSE-NEXT: pmovmskb %xmm8, %eax
754739
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
@@ -759,22 +744,17 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x floa
759744
; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
760745
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
761746
; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
762-
; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
763-
; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
764747
; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
765748
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
766749
; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
767-
; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
768-
; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
750+
; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
769751
; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
770752
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
771753
; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
772-
; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
773754
; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
774-
; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
775-
; AVX12-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
776-
; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
777-
; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
755+
; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3
756+
; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
757+
; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
778758
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
779759
; AVX12-NEXT: vpmovmskb %xmm0, %eax
780760
; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>

0 commit comments

Comments
 (0)