From 92f4a2bf0a9d3962a25d15c1df5b6824950c74dc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Oct 2024 18:09:53 +0100 Subject: [PATCH] [x86] combineMUL - when looking for a vector multiply by splat constant, ensure we're only accepting ConstantInt splat scalars. Fixes #111170 (cherry picked from commit 9459d729d22b7bfedad9d3a4237162077c6984a4) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++- llvm/test/CodeGen/X86/pr111170.ll | 33 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr111170.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 10f269f803778..4e3a181f9a3ae 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47899,7 +47899,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (VT.isVector()) if (auto *RawC = getTargetConstantFromNode(N->getOperand(1))) if (auto *SplatC = RawC->getSplatValue()) - C = &(SplatC->getUniqueInteger()); + if (auto *SplatCI = dyn_cast(SplatC)) + C = &(SplatCI->getValue()); if (!C || C->getBitWidth() != VT.getScalarSizeInBits()) return SDValue(); diff --git a/llvm/test/CodeGen/X86/pr111170.ll b/llvm/test/CodeGen/X86/pr111170.ll new file mode 100644 index 0000000000000..145bf7119edcb --- /dev/null +++ b/llvm/test/CodeGen/X86/pr111170.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-pc-windows-msvc -mcpu=corei7-avx | FileCheck %s + +define void @PR111170(<16 x i32> %x_load, ptr %offsetsPtr.i) { +; CHECK-LABEL: PR111170: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.80259693E-44,2.80259693E-44,2.80259693E-44,2.80259693E-44] +; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm3 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 +; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpmulld %xmm2, %xmm0, %xmm4 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpmulld %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqu %xmm0, 16(%eax) +; CHECK-NEXT: vmovdqu %xmm4, (%eax) +; CHECK-NEXT: vmovdqu %xmm1, 48(%eax) +; CHECK-NEXT: vmovdqu %xmm3, 32(%eax) +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqu %xmm0, 16 +; CHECK-NEXT: vmovdqu %xmm0, 0 +; CHECK-NEXT: vmovdqu %xmm0, 48 +; CHECK-NEXT: vmovdqu %xmm0, 32 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retl + %mul__x_load = mul <16 x i32> , %x_load + store <16 x i32> %mul__x_load, ptr %offsetsPtr.i, align 4 + %blend1.i12.i = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> zeroinitializer, <8 x float> , <8 x float> zeroinitializer) + %blend.i13.i = shufflevector <8 x float> zeroinitializer, <8 x float> %blend1.i12.i, <16 x i32> + %blendAsInt.i14.i = bitcast <16 x float> %blend.i13.i to <16 x i32> + store <16 x i32> %blendAsInt.i14.i, ptr null, align 4 + ret void +}