Skip to content

Commit 46c9489

Browse files
authored
[GlobalISel] Add G_EXTRACT_VECTOR_ELT for computeKnownBits (#164825)
The code is ported from `SelectionDAG::computeKnownBits`. As a result of adding `G_EXTRACT_VECTOR_ELT` to `GlobalISel`, the code generated for some of the existing regression tests changes. The changes in `neon-extadd-extract.ll` and `knownbits-buildvector.mir` look good to me. Though I'm struggling to understand the diff in `sdiv.i64.ll`. Is there a test that checks if the generated amdgcn produces the correct result? Or tools that help with executing it (I do have an AMD GPU)? **Edit**: Related to #150515
1 parent 1ff0098 commit 46c9489

File tree

5 files changed

+199
-34
lines changed

5 files changed

+199
-34
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,38 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
643643
Known.Zero.setBitsFrom(LowBits);
644644
break;
645645
}
646+
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
647+
GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
648+
Register InVec = Extract.getVectorReg();
649+
Register EltNo = Extract.getIndexReg();
650+
651+
auto ConstEltNo = getIConstantVRegVal(EltNo, MRI);
652+
653+
LLT VecVT = MRI.getType(InVec);
654+
// computeKnownBits not yet implemented for scalable vectors.
655+
if (VecVT.isScalableVector())
656+
break;
657+
658+
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
659+
const unsigned NumSrcElts = VecVT.getNumElements();
660+
// A return type different from the vector's element type may lead to
661+
// issues with pattern selection. Bail out to avoid that.
662+
if (BitWidth > EltBitWidth)
663+
break;
664+
665+
Known.Zero.setAllBits();
666+
Known.One.setAllBits();
667+
668+
// If we know the element index, just demand that vector element, else for
669+
// an unknown element index, ignore DemandedElts and demand them all.
670+
APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
671+
if (ConstEltNo && ConstEltNo->ult(NumSrcElts))
672+
DemandedSrcElts =
673+
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
674+
675+
computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1);
676+
break;
677+
}
646678
case TargetOpcode::G_SHUFFLE_VECTOR: {
647679
APInt DemandedLHS, DemandedRHS;
648680
// Collect the known bits that are shared by every vector element referenced

llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ body: |
2222
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
2323
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
2424
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
25-
; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:1
25+
; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4
2626
%0:_(s8) = G_CONSTANT i8 3
2727
%1:_(s8) = G_CONSTANT i8 10
2828
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -filetype=null 2>&1 | FileCheck %s
3+
4+
---
5+
name: all_knownbits_const_idx
6+
body: |
7+
bb.0:
8+
; CHECK-LABEL: name: @all_knownbits_const_idx
9+
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
10+
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
11+
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
12+
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
13+
; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4
14+
%0:_(s8) = G_CONSTANT i8 3
15+
%1:_(s8) = G_CONSTANT i8 10
16+
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
17+
%idx:_(s64) = G_CONSTANT i64 1
18+
%3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx
19+
...
20+
---
21+
name: all_knownbits
22+
body: |
23+
bb.0:
24+
; CHECK-LABEL: name: @all_knownbits
25+
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
26+
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
27+
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
28+
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
29+
; CHECK-NEXT: %4:_ KnownBits:0000?01? SignBits:4
30+
%0:_(s8) = G_CONSTANT i8 3
31+
%1:_(s8) = G_CONSTANT i8 10
32+
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
33+
%idx:_(s64) = COPY $d0
34+
%3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx
35+
...
36+
---
37+
name: no_knownbits_const_idx
38+
body: |
39+
bb.0:
40+
; CHECK-LABEL: name: @no_knownbits_const_idx
41+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
42+
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
43+
; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
44+
%0:_(<2 x s8>) = COPY $h0
45+
%idx:_(s64) = G_CONSTANT i64 1
46+
%1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx
47+
...
48+
---
49+
name: no_knownbits
50+
body: |
51+
bb.0:
52+
; CHECK-LABEL: name: @no_knownbits
53+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
54+
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
55+
; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
56+
%0:_(<2 x s8>) = COPY $h0
57+
%idx:_(s64) = COPY $d1
58+
%1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx
59+
...
60+
---
61+
name: zext_const_idx
62+
body: |
63+
bb.0:
64+
; CHECK-LABEL: name: @zext_const_idx
65+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
66+
; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8
67+
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
68+
; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
69+
%0:_(<2 x s8>) = COPY $h0
70+
%zext0:_(<2 x s16>) = G_ZEXT %0
71+
%idx:_(s64) = G_CONSTANT i64 1
72+
%1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx
73+
...
74+
---
75+
name: zext
76+
body: |
77+
bb.0:
78+
79+
; CHECK-LABEL: name: @zext
80+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
81+
; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8
82+
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
83+
; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
84+
%0:_(<2 x s8>) = COPY $h0
85+
%zext0:_(<2 x s16>) = G_ZEXT %0
86+
%idx:_(s64) = COPY $d1
87+
%1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx
88+
...
89+
---
90+
name: sext_const_idx
91+
body: |
92+
bb.0:
93+
; CHECK-LABEL: name: @sext_const_idx
94+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
95+
; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9
96+
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
97+
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
98+
%0:_(<2 x s8>) = COPY $h0
99+
%sext0:_(<2 x s16>) = G_SEXT %0
100+
%idx:_(s64) = G_CONSTANT i64 1
101+
%1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx
102+
...
103+
---
104+
name: sext
105+
body: |
106+
bb.0:
107+
; CHECK-LABEL: name: @sext
108+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
109+
; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9
110+
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
111+
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
112+
%0:_(<2 x s8>) = COPY $h0
113+
%sext0:_(<2 x s16>) = G_SEXT %0
114+
%idx:_(s64) = COPY $d1
115+
%1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx
116+
...
117+
---
118+
# Verifies known bit computation bails if return type differs from vector
119+
# element type. Without bailing, the 8 lowest bits of %4 would be known.
120+
name: bail_on_different_return_type
121+
body: |
122+
bb.0:
123+
; CHECK-LABEL: name: @bail_on_different_return_type
124+
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
125+
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
126+
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
127+
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
128+
; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
129+
%0:_(s8) = G_CONSTANT i8 3
130+
%1:_(s8) = G_CONSTANT i8 10
131+
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
132+
%idx:_(s64) = G_CONSTANT i64 1
133+
%3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %idx

llvm/test/CodeGen/AArch64/neon-extadd-extract.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
734734
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
735735
; CHECK-GI-NEXT: fmov x8, d0
736736
; CHECK-GI-NEXT: fmov x9, d1
737-
; CHECK-GI-NEXT: mul x8, x8, x9
737+
; CHECK-GI-NEXT: umull x8, w8, w9
738738
; CHECK-GI-NEXT: fmov d0, x8
739739
; CHECK-GI-NEXT: ret
740740
entry:

llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2757,47 +2757,47 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
27572757
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27582758
; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v4
27592759
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
2760-
; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v6
2761-
; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
2760+
; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v0
2761+
; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v6
2762+
; CGP-NEXT: v_and_b32_e32 v9, 0xffffff, v2
27622763
; CGP-NEXT: v_rcp_f32_e32 v1, v1
2763-
; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v0
2764-
; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
2765-
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v1
2766-
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4
2767-
; CGP-NEXT: v_mul_lo_u32 v6, v6, v5
2768-
; CGP-NEXT: v_rcp_f32_e32 v7, v1
2769-
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0
2770-
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7
2764+
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1
27712765
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0
2772-
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v1
2773-
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0
2774-
; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v4
2775-
; CGP-NEXT: v_mul_lo_u32 v5, v0, v6
2766+
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v3
2767+
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5
2768+
; CGP-NEXT: v_mul_lo_u32 v7, v1, v6
2769+
; CGP-NEXT: v_rcp_f32_e32 v8, v0
2770+
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
2771+
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v8
2772+
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v1
2773+
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v0
2774+
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v6, 0
2775+
; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v5
2776+
; CGP-NEXT: v_mul_lo_u32 v6, v0, v2
27762777
; CGP-NEXT: v_mul_lo_u32 v0, v1, v3
27772778
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v1
2778-
; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0
2779-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3
2779+
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v0
2780+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3
27802781
; CGP-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc
2781-
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v5, 0
2782-
; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v2
2783-
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v8, v3
2784-
; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v1
2785-
; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v6, 0
2786-
; CGP-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
2782+
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v6, 0
2783+
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v3
2784+
; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v2, v1
2785+
; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v6, 0
2786+
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
27872787
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v7
2788-
; CGP-NEXT: v_mul_lo_u32 v6, v2, v4
2788+
; CGP-NEXT: v_mul_lo_u32 v4, v2, v5
27892789
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
27902790
; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc
27912791
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
2792-
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
2793-
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
2794-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
2795-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
2796-
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v4
2797-
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
2798-
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
2799-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
2800-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
2792+
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v4
2793+
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2794+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
2795+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2796+
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v5
2797+
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
2798+
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2799+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
2800+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
28012801
; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2
28022802
; CGP-NEXT: s_setpc_b64 s[30:31]
28032803
%num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>

0 commit comments

Comments
 (0)