Skip to content

Commit 93a8ca8

Browse files
authored
[AArch64][GISel] Don't crash in known-bits when copying from vectors to non-vectors (#168081)
Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register. Fixes #167842.
1 parent 1fb8e3d commit 93a8ca8

File tree

2 files changed

+65
-2
lines changed

2 files changed

+65
-2
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
247247
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
248248
const MachineOperand &Src = MI.getOperand(Idx);
249249
Register SrcReg = Src.getReg();
250+
LLT SrcTy = MRI.getType(SrcReg);
250251
// Look through trivial copies and phis but don't look through trivial
251252
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
252253
// analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
255256
// We can't use NoSubRegister by name as it's defined by each target but
256257
// it's always defined to be 0 by tablegen.
257258
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
258-
MRI.getType(SrcReg).isValid()) {
259+
SrcTy.isValid()) {
260+
// In case we're forwarding from a vector register to a non-vector
261+
// register we need to update the demanded elements to reflect this
262+
// before recursing.
263+
APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
264+
? APInt::getAllOnes(SrcTy.getNumElements())
265+
: DemandedElts; // Known to be APInt(1, 1)
259266
// For COPYs we don't do anything, don't increase the depth.
260-
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
267+
computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
261268
Depth + (Opcode != TargetOpcode::COPY));
262269
Known2 = Known2.anyextOrTrunc(BitWidth);
263270
Known = Known.intersectWith(Known2);
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -global-isel -o - %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-unknown"
5+
6+
; Check we don't crash here when computing known bits.
7+
8+
define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
9+
; CHECK-LABEL: test:
10+
; CHECK: // %bb.0: // %entry
11+
; CHECK-NEXT: sub sp, sp, #16
12+
; CHECK-NEXT: .cfi_def_cfa_offset 16
13+
; CHECK-NEXT: mov w12, wzr
14+
; CHECK-NEXT: mov x8, sp
15+
; CHECK-NEXT: mov w9, #2 // =0x2
16+
; CHECK-NEXT: mov w10, #0 // =0x0
17+
; CHECK-NEXT: .LBB0_1: // %loop
18+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
19+
; CHECK-NEXT: mov w11, w12
20+
; CHECK-NEXT: mov w12, w12
21+
; CHECK-NEXT: str q0, [sp]
22+
; CHECK-NEXT: and x12, x12, #0x7
23+
; CHECK-NEXT: umull x12, w12, w9
24+
; CHECK-NEXT: ldrb w12, [x8, x12]
25+
; CHECK-NEXT: cmp w12, #0
26+
; CHECK-NEXT: cset w12, eq
27+
; CHECK-NEXT: fmov s1, w12
28+
; CHECK-NEXT: mov v1.b[1], w10
29+
; CHECK-NEXT: mov v1.b[2], w10
30+
; CHECK-NEXT: mov v1.b[3], w10
31+
; CHECK-NEXT: fmov w12, s1
32+
; CHECK-NEXT: tbz w0, #0, .LBB0_1
33+
; CHECK-NEXT: // %bb.2: // %exit
34+
; CHECK-NEXT: fmov s0, w11
35+
; CHECK-NEXT: mov v0.s[1], wzr
36+
; CHECK-NEXT: mov v0.s[2], wzr
37+
; CHECK-NEXT: mov v0.s[3], wzr
38+
; CHECK-NEXT: add sp, sp, #16
39+
; CHECK-NEXT: ret
40+
entry:
41+
br label %loop
42+
43+
exit:
44+
%result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
45+
ret <4 x i32> %result
46+
47+
loop:
48+
%index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
49+
%extracted = extractelement <8 x i16> %in, i32 %index
50+
%masked = and i16 %extracted, 255
51+
%maskedIsZero = icmp eq i16 %masked, 0
52+
%maskedIsZero.zext = zext i1 %maskedIsZero to i8
53+
%insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
54+
%insert.bitcast = bitcast <4 x i8> %insert to i32
55+
br i1 %continue, label %exit, label %loop
56+
}

0 commit comments

Comments
 (0)