From 5a5e22964adc0232a82c0d20a62ce7f8e7902676 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 20 Apr 2025 14:30:52 +0700 Subject: [PATCH] Reapply "[SPARC] Use umulxhi to do extending 64x64->128 multiply when we have VIS3" (#135897) Update the tests to reflect the change in instruction ordering. Otherwise there are no changes from the previous commit. This reverts commit 5e9650ec2deb2f2bb6d5ad28e83bb6cd3c4189e4. --- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 6 +- llvm/lib/Target/Sparc/SparcInstrVIS.td | 10 ++++ llvm/test/CodeGen/SPARC/multiply-extension.ll | 59 +++++++++++++++++++ .../SPARC/smulo-128-legalisation-lowering.ll | 44 ++++++++++++++ .../SPARC/umulo-128-legalisation-lowering.ll | 33 +++++++++++ 5 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SPARC/multiply-extension.ll diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 83108653397fb..0bfa202c9f92e 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1857,8 +1857,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, if (Subtarget->is64Bit()) { setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, + Subtarget->isVIS3() ? Legal : Expand); + setOperationAction(ISD::MULHS, MVT::i64, + Subtarget->isVIS3() ? Legal : Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td index fdb3468a70392..d411daedf1b2f 100644 --- a/llvm/lib/Target/Sparc/SparcInstrVIS.td +++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td @@ -295,6 +295,16 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>; let Predicates = [HasVIS3] in { def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>; +def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>; +// Signed "MULXHI". +// Based on the formula presented in OSA2011 ยง7.140, but with bitops to select +// the values to be added. +// TODO: This expansion should probably be moved to DAG legalization phase. +def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)), + (SUBrr (UMULXHI $lhs, $rhs), + (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs), + (ANDrr (SRAXri $rhs, 63), $lhs)))>; + def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>; def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>; // 32-bit LZCNT. diff --git a/llvm/test/CodeGen/SPARC/multiply-extension.ll b/llvm/test/CodeGen/SPARC/multiply-extension.ll new file mode 100644 index 0000000000000..9f46842e68f82 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/multiply-extension.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9 +; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis3 | FileCheck %s -check-prefix=VIS3 + +define i128 @signed_multiply_extend(i64 %0, i64 %1) nounwind { +; V9-LABEL: signed_multiply_extend: +; V9: ! %bb.0: +; V9-NEXT: save %sp, -176, %sp +; V9-NEXT: mov %i1, %o1 +; V9-NEXT: mov %i0, %o3 +; V9-NEXT: srax %i0, 63, %o2 +; V9-NEXT: call __multi3 +; V9-NEXT: srax %i1, 63, %o0 +; V9-NEXT: mov %o0, %i0 +; V9-NEXT: ret +; V9-NEXT: restore %g0, %o1, %o1 +; +; VIS3-LABEL: signed_multiply_extend: +; VIS3: ! %bb.0: +; VIS3-NEXT: srax %o0, 63, %o2 +; VIS3-NEXT: and %o2, %o1, %o2 +; VIS3-NEXT: srax %o1, 63, %o3 +; VIS3-NEXT: and %o3, %o0, %o3 +; VIS3-NEXT: add %o3, %o2, %o2 +; VIS3-NEXT: umulxhi %o1, %o0, %o3 +; VIS3-NEXT: sub %o3, %o2, %o2 +; VIS3-NEXT: mulx %o1, %o0, %o1 +; VIS3-NEXT: retl +; VIS3-NEXT: mov %o2, %o0 + %3 = sext i64 %0 to i128 + %4 = sext i64 %1 to i128 + %5 = mul nsw i128 %4, %3 + ret i128 %5 +} + +define i128 @unsigned_multiply_extend(i64 %0, i64 %1) nounwind { +; V9-LABEL: unsigned_multiply_extend: +; V9: ! %bb.0: +; V9-NEXT: save %sp, -176, %sp +; V9-NEXT: mov %i1, %o1 +; V9-NEXT: mov %i0, %o3 +; V9-NEXT: mov %g0, %o0 +; V9-NEXT: call __multi3 +; V9-NEXT: mov %g0, %o2 +; V9-NEXT: mov %o0, %i0 +; V9-NEXT: ret +; V9-NEXT: restore %g0, %o1, %o1 +; +; VIS3-LABEL: unsigned_multiply_extend: +; VIS3: ! %bb.0: +; VIS3-NEXT: umulxhi %o1, %o0, %o2 +; VIS3-NEXT: mulx %o1, %o0, %o1 +; VIS3-NEXT: retl +; VIS3-NEXT: mov %o2, %o0 + %3 = zext i64 %0 to i128 + %4 = zext i64 %1 to i128 + %5 = mul nuw i128 %4, %3 + ret i128 %5 +} diff --git a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll index 07e4c408a3ff0..1e5ab7922de08 100644 --- a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC ; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64 +; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu -mattr=vis3 | FileCheck %s --check-prefixes=SPARC64-VIS3 define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind { ; SPARC-LABEL: muloti_test: @@ -213,6 +214,49 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind { ; SPARC64-NEXT: srl %i3, 0, %i2 ; SPARC64-NEXT: ret ; SPARC64-NEXT: restore +; +; SPARC64-VIS3-LABEL: muloti_test: +; SPARC64-VIS3: .register %g2, #scratch +; SPARC64-VIS3-NEXT: .register %g3, #scratch +; SPARC64-VIS3-NEXT: ! %bb.0: ! %start +; SPARC64-VIS3-NEXT: save %sp, -128, %sp +; SPARC64-VIS3-NEXT: mov %g0, %i5 +; SPARC64-VIS3-NEXT: umulxhi %i0, %i3, %i4 +; SPARC64-VIS3-NEXT: srax %i0, 63, %g2 +; SPARC64-VIS3-NEXT: mulx %g2, %i3, %g3 +; SPARC64-VIS3-NEXT: add %i4, %g3, %i4 +; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %g3 +; SPARC64-VIS3-NEXT: mulx %i0, %i3, %g4 +; SPARC64-VIS3-NEXT: addcc %g4, %g3, %g3 +; SPARC64-VIS3-NEXT: addxccc %i4, %g0, %g4 +; SPARC64-VIS3-NEXT: umulxhi %i1, %i2, %i4 +; SPARC64-VIS3-NEXT: srax %i2, 63, %g5 +; SPARC64-VIS3-NEXT: mulx %i1, %g5, %l0 +; SPARC64-VIS3-NEXT: add %i4, %l0, %l0 +; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i4 +; SPARC64-VIS3-NEXT: addcc %i4, %g3, %i4 +; SPARC64-VIS3-NEXT: addxccc %l0, %g0, %g3 +; SPARC64-VIS3-NEXT: srax %g3, 63, %l0 +; SPARC64-VIS3-NEXT: addcc %g4, %g3, %g3 +; SPARC64-VIS3-NEXT: srax %g4, 63, %g4 +; SPARC64-VIS3-NEXT: addxccc %g4, %l0, %g4 +; SPARC64-VIS3-NEXT: and %g5, %i0, %g5 +; SPARC64-VIS3-NEXT: and %g2, %i2, %g2 +; SPARC64-VIS3-NEXT: add %g2, %g5, %g2 +; SPARC64-VIS3-NEXT: umulxhi %i0, %i2, %g5 +; SPARC64-VIS3-NEXT: sub %g5, %g2, %g2 +; SPARC64-VIS3-NEXT: mulx %i0, %i2, %i0 +; SPARC64-VIS3-NEXT: addcc %i0, %g3, %i0 +; SPARC64-VIS3-NEXT: addxccc %g2, %g4, %i2 +; SPARC64-VIS3-NEXT: srax %i4, 63, %g2 +; SPARC64-VIS3-NEXT: xor %i2, %g2, %i2 +; SPARC64-VIS3-NEXT: xor %i0, %g2, %i0 +; SPARC64-VIS3-NEXT: or %i0, %i2, %i0 +; SPARC64-VIS3-NEXT: movrnz %i0, 1, %i5 +; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i1 +; SPARC64-VIS3-NEXT: srl %i5, 0, %i2 +; SPARC64-VIS3-NEXT: ret +; SPARC64-VIS3-NEXT: restore %g0, %i4, %o0 start: %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %l, i128 %r) %1 = extractvalue { i128, i1 } %0, 0 diff --git a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll index f3835790210a0..6d197c88bfecd 100644 --- a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC ; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64 +; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu -mattr=vis3 | FileCheck %s --check-prefixes=SPARC64-VIS3 define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind { ; SPARC-LABEL: muloti_test: @@ -199,6 +200,38 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind { ; SPARC64-NEXT: srl %i1, 0, %i2 ; SPARC64-NEXT: ret ; SPARC64-NEXT: restore %g0, %o1, %o1 +; +; SPARC64-VIS3-LABEL: muloti_test: +; SPARC64-VIS3: .register %g2, #scratch +; SPARC64-VIS3-NEXT: .register %g3, #scratch +; SPARC64-VIS3-NEXT: ! %bb.0: ! %start +; SPARC64-VIS3-NEXT: save %sp, -128, %sp +; SPARC64-VIS3-NEXT: mov %g0, %i5 +; SPARC64-VIS3-NEXT: mov %g0, %g2 +; SPARC64-VIS3-NEXT: mov %g0, %g3 +; SPARC64-VIS3-NEXT: mov %g0, %g4 +; SPARC64-VIS3-NEXT: mov %g0, %g5 +; SPARC64-VIS3-NEXT: mulx %i2, %i1, %i4 +; SPARC64-VIS3-NEXT: mulx %i0, %i3, %l0 +; SPARC64-VIS3-NEXT: add %l0, %i4, %i4 +; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %l0 +; SPARC64-VIS3-NEXT: add %l0, %i4, %i4 +; SPARC64-VIS3-NEXT: cmp %i4, %l0 +; SPARC64-VIS3-NEXT: movrnz %i2, 1, %g2 +; SPARC64-VIS3-NEXT: movrnz %i0, 1, %g3 +; SPARC64-VIS3-NEXT: and %g3, %g2, %g2 +; SPARC64-VIS3-NEXT: umulxhi %i0, %i3, %i0 +; SPARC64-VIS3-NEXT: movrnz %i0, 1, %g4 +; SPARC64-VIS3-NEXT: movcs %xcc, 1, %i5 +; SPARC64-VIS3-NEXT: or %g2, %g4, %i0 +; SPARC64-VIS3-NEXT: umulxhi %i2, %i1, %i2 +; SPARC64-VIS3-NEXT: movrnz %i2, 1, %g5 +; SPARC64-VIS3-NEXT: or %i0, %g5, %i0 +; SPARC64-VIS3-NEXT: or %i0, %i5, %i0 +; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i1 +; SPARC64-VIS3-NEXT: srl %i0, 0, %i2 +; SPARC64-VIS3-NEXT: ret +; SPARC64-VIS3-NEXT: restore %g0, %i4, %o0 start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) %1 = extractvalue { i128, i1 } %0, 0