Skip to content

Commit e0861f0

Browse files
committed
[AArch64][GISel] Don't crash in known-bits when copying from vectors to non-vectors
1 parent e5c418f commit e0861f0

File tree

2 files changed

+59
-2
lines changed

2 files changed

+59
-2
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
247247
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
248248
const MachineOperand &Src = MI.getOperand(Idx);
249249
Register SrcReg = Src.getReg();
250+
LLT SrcTy = MRI.getType(SrcReg);
250251
// Look through trivial copies and phis but don't look through trivial
251252
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
252253
// analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
255256
// We can't use NoSubRegister by name as it's defined by each target but
256257
// it's always defined to be 0 by tablegen.
257258
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
258-
MRI.getType(SrcReg).isValid()) {
259+
SrcTy.isValid()) {
260+
// In case we're forwarding from a vector register to a non-vector
261+
// register we need to update the demanded elements to reflect this
262+
// before recursing.
263+
APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
264+
? APInt::getAllOnes(SrcTy.getNumElements())
265+
: DemandedElts; // Known to be APInt(1, 1)
259266
// For COPYs we don't do anything, don't increase the depth.
260-
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
267+
computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
261268
Depth + (Opcode != TargetOpcode::COPY));
262269
Known2 = Known2.anyextOrTrunc(BitWidth);
263270
Known = Known.intersectWith(Known2);
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -O3 -o - %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64-unknown-unknown"
6+
7+
; Check we don't crash here when computing known bits.
8+
9+
define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
10+
; CHECK-LABEL: test:
11+
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: sub sp, sp, #32
13+
; CHECK-NEXT: .cfi_def_cfa_offset 32
14+
; CHECK-NEXT: mov w9, wzr
15+
; CHECK-NEXT: .LBB0_1: // %loop
16+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
17+
; CHECK-NEXT: mov w8, w9
18+
; CHECK-NEXT: mov x9, sp
19+
; CHECK-NEXT: str q0, [sp]
20+
; CHECK-NEXT: bfi x9, x8, #1, #3
21+
; CHECK-NEXT: movi v1.2d, #0000000000000000
22+
; CHECK-NEXT: ldrh w9, [x9]
23+
; CHECK-NEXT: tst w9, #0xff
24+
; CHECK-NEXT: cset w9, eq
25+
; CHECK-NEXT: mov v1.h[0], w9
26+
; CHECK-NEXT: xtn v1.8b, v1.8h
27+
; CHECK-NEXT: fmov w9, s1
28+
; CHECK-NEXT: tbz w0, #0, .LBB0_1
29+
; CHECK-NEXT: // %bb.2: // %exit
30+
; CHECK-NEXT: movi v0.2d, #0000000000000000
31+
; CHECK-NEXT: mov v0.s[0], w8
32+
; CHECK-NEXT: add sp, sp, #32
33+
; CHECK-NEXT: ret
34+
entry:
35+
br label %loop
36+
37+
exit:
38+
%result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
39+
ret <4 x i32> %result
40+
41+
loop:
42+
%index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
43+
%extracted = extractelement <8 x i16> %in, i32 %index
44+
%masked = and i16 %extracted, 255
45+
%maskedIsZero = icmp eq i16 %masked, 0
46+
%maskedIsZero.zext = zext i1 %maskedIsZero to i8
47+
%insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
48+
%insert.bitcast = bitcast <4 x i8> %insert to i32
49+
br i1 %continue, label %exit, label %loop
50+
}

0 commit comments

Comments
 (0)