From 75a5f8c059fd3837a704fdf598a9d35cdf5fa4c0 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Fri, 20 Dec 2024 11:07:06 +0100 Subject: [PATCH 1/2] [RISCV] add test case for XCVmem addressing mode heuristic --- llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll | 33 +++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll diff --git a/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll new file mode 100644 index 0000000000000..2e169c17b56fe --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=riscv32 -mattr=+m,+xcvmem -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + +define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) { +; CHECK-LABEL: test_heuristic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a2, a2, 1 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: beqz a2, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: cv.lbu a0, a1(a3) +; CHECK-NEXT: ret +entry: + %1 = getelementptr i8, ptr %b, i32 %e + br label %loop + +loop: ; preds = %loop, %entry + %2 = phi ptr [ %b, %entry ], [ %7, %loop ] + %3 = phi ptr [ %1, %entry ], [ %8, %loop ] + %4 = load i8, ptr %2, align 1 + %5 = load i8, ptr %3, align 1 + %6 = zext i8 %5 to i32 + %7 = getelementptr i8, ptr %2, i32 1 + %8 = getelementptr i8, ptr %3, i32 1 + br i1 %0, label %exit, label %loop + +exit: ; preds = %loop + ret i32 %6 +} From 342ea51242e39be9b5ec2a76bcfb10298047c1cb Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Thu, 19 Dec 2024 08:21:08 +0100 Subject: [PATCH 2/2] [RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVendorXCVmem For a simple matmult kernel this heuristic reduces the length of the critical basic block from 15 to 20 instructions, resulting in a 20% speedup. [RISCV] Address PR comment [RISCV] Add !ST->is64Bit() check --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 9 +++++++++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 3 +++ llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll | 5 +++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 49192bd638022..2f9beb0b3983c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return std::max(1U, RegWidth.getFixedValue() / ElemWidth); } +TTI::AddressingModeKind +RISCVTTIImpl::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { + if (ST->hasVendorXCVmem() && !ST->is64Bit()) + return TTI::AMK_PostIndexed; + + return BasicTTIImplBase::getPreferredAddressingMode(L, SE); +} + bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) { // RISC-V specific here are "instruction number 1st priority". diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index bd90bfed6e2c9..9b364391f0fa4 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -388,6 +388,9 @@ class RISCVTTIImpl : public BasicTTIImplBase { llvm_unreachable("unknown register class"); } + TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { if (Vector) return RISCVRegisterClass::VRRC; diff --git a/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll index 2e169c17b56fe..c8832bf49dd6a 100644 --- a/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll +++ b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll @@ -5,14 +5,15 @@ define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) { ; CHECK-LABEL: test_heuristic: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add a3, a0, a1 ; CHECK-NEXT: andi a2, a2, 1 ; CHECK-NEXT: .LBB0_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: cv.lbu a1, (a3), 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: beqz a2, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: cv.lbu a0, a1(a3) +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret entry: %1 = getelementptr i8, ptr %b, i32 %e