From 375c14d8a3659f8186457d64e0475b037d3ab7da Mon Sep 17 00:00:00 2001 From: Usha Gupta Date: Thu, 29 May 2025 11:26:07 +0000 Subject: [PATCH] [GlobalISel] Add G_CONCAT_VECTOR computeKnownBits --- .../CodeGen/GlobalISel/GISelValueTracking.cpp | 25 +++++++ .../AArch64/GlobalISel/knownbits-concat.mir | 69 +++++++++++++++++++ llvm/test/CodeGen/AArch64/aarch64-smull.ll | 7 +- 3 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-concat.mir diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 67b1a449f8483..692befde71cb1 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -675,6 +675,31 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, } break; } + case TargetOpcode::G_CONCAT_VECTORS: { + if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector()) + break; + // Split DemandedElts and test each of the demanded subvectors. + Known.Zero.setAllBits(); + Known.One.setAllBits(); + unsigned NumSubVectorElts = + MRI.getType(MI.getOperand(1).getReg()).getNumElements(); + unsigned NumSubVectors = MI.getNumOperands() - 1; + + for (unsigned i = 0; i != NumSubVectors; ++i) { + APInt DemandedSub = + DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); + if (!!DemandedSub) { + computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedSub, + Depth + 1); + + Known = Known.intersectWith(Known2); + } + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + break; + } } LLVM_DEBUG(dumpResult(MI, Known, Depth)); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-concat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-concat.mir new file mode 100644 index 0000000000000..85129e9639a6a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-concat.mir @@ -0,0 +1,69 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple aarch64 -passes="print" %s -filetype=null 2>&1 | FileCheck %s + +--- +name: no_knownbits +body: | + bb.0: + + ; CHECK-LABEL: name: @no_knownbits + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %1:_(<2 x s8>) = COPY $h1 + %2:_(<4 x s8>) = G_CONCAT_VECTORS %0, %1 +... +--- +name: zext_concat +body: | + bb.0: + + ; CHECK-LABEL: name: @zext_concat + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %zext1:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %res:_ KnownBits:00000000???????? SignBits:8 + %0:_(<2 x s8>) = COPY $h0 + %1:_(<2 x s8>) = COPY $h1 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %zext1:_(<2 x s16>) = G_ZEXT %1 + %res:_(<4 x s16>) = G_CONCAT_VECTORS %zext0, %zext1 +... +--- + +name: sext_concat +body: | + bb.0: + + ; CHECK-LABEL: name: @sext_concat + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %sext1:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %res:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %1:_(<2 x s8>) = COPY $h1 + %sext0:_(<2 x s16>) = G_SEXT %0 + %sext1:_(<2 x s16>) = G_SEXT %1 + %res:_(<4 x s16>) = G_CONCAT_VECTORS %sext0, %sext1 +... +--- + +name: mixed_ext +body: | + bb.0: + + ; CHECK-LABEL: name: @mixed_ext + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %sext1:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %res:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %1:_(<2 x s8>) = COPY $h1 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %sext1:_(<2 x s16>) = G_SEXT %1 + %res:_(<4 x s16>) = G_CONCAT_VECTORS %zext0, %sext1 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 951001c84aed0..d6fd4c4110a12 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -199,7 +199,6 @@ define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind { ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: fmov w10, s2 ; CHECK-GI-NEXT: fmov w11, s3 -; CHECK-GI-NEXT: ldr d2, [x1] ; CHECK-GI-NEXT: uxtb w9, w9 ; CHECK-GI-NEXT: uxtb w10, w10 ; CHECK-GI-NEXT: uxtb w11, w11 @@ -208,9 +207,9 @@ define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind { ; CHECK-GI-NEXT: mov v1.h[1], w11 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 -; CHECK-GI-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: ret %load.A = load <4 x i8>, ptr %A %load.B = load <4 x i16>, ptr %B