Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
const MachineOperand &Src = MI.getOperand(Idx);
Register SrcReg = Src.getReg();
LLT SrcTy = MRI.getType(SrcReg);
// Look through trivial copies and phis but don't look through trivial
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
// analysis is currently unable to determine the bit width of a
Expand All @@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
// We can't use NoSubRegister by name as it's defined by each target but
// it's always defined to be 0 by tablegen.
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
MRI.getType(SrcReg).isValid()) {
SrcTy.isValid()) {
// In case we're forwarding from a vector register to a non-vector
// register we need to update the demanded elements to reflect this
// before recursing.
APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
? APInt::getAllOnes(SrcTy.getNumElements())
: DemandedElts; // Known to be APInt(1, 1)
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
Known2 = Known2.anyextOrTrunc(BitWidth);
Known = Known.intersectWith(Known2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -O3 -o - %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-unknown"

; Check we don't crash here when computing known bits.

define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mov w8, w9
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: bfi x9, x8, #1, #3
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: ldrh w9, [x9]
; CHECK-NEXT: tst w9, #0xff
; CHECK-NEXT: cset w9, eq
; CHECK-NEXT: mov v1.h[0], w9
; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: tbz w0, #0, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov v0.s[0], w8
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
entry:
br label %loop

exit:
%result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
ret <4 x i32> %result

loop:
%index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
%extracted = extractelement <8 x i16> %in, i32 %index
%masked = and i16 %extracted, 255
%maskedIsZero = icmp eq i16 %masked, 0
%maskedIsZero.zext = zext i1 %maskedIsZero to i8
%insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
%insert.bitcast = bitcast <4 x i8> %insert to i32
br i1 %continue, label %exit, label %loop
}
Loading