From 54bd095e51ede4c9c0437fef2d4275db22ae8fb4 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Thu, 31 Jul 2025 21:42:32 +0530 Subject: [PATCH 1/9] [RISCV] Implement EmitTargetCodeForMemset for Xqcilsm --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 9 + llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 8 + .../Target/RISCV/RISCVSelectionDAGInfo.cpp | 101 ++ llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h | 6 + llvm/test/CodeGen/RISCV/xqcilsm-memset.ll | 929 ++++++++++++++++++ 5 files changed, 1053 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/xqcilsm-memset.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f223fdbef4359..b778c33083685 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1845,6 +1845,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } + case RISCVISD::QC_SETWMI: { + SDValue Chain = Node->getOperand(0); + SDVTList VTs = Node->getVTList(); + SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), Node->getOperand(4), Chain}; + MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops); + ReplaceNode(Node, New); + return; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 52656134b7774..2479ced164927 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -14,6 +14,14 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; + +def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + def uimm5nonzero : RISCVOp, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "NonZero">; diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 6ecddad72c078..edfa2992711a0 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RISCVSelectionDAGInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" #define GET_SDNODE_DESC #include "RISCVGenSDNodeInfo.inc" @@ -62,3 +64,102 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, } #endif } + +SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const { + const RISCVSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + // We currently do this only for Xqcilsm + if (!Subtarget.hasVendorXqcilsm()) + return SDValue(); + + // Do this only if we know the size at compile time. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + + uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); + + // Do this only if it is word aligned and we write multiple of 4 bytes. + if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0)) + return SDValue(); + + SmallVector OutChains; + SDValue SizeWords, OffsetSetwmi; + SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + int NumberOfWords = NumberOfBytesToWrite / 4; + + // Helper for constructing the QC_SETWMI instruction + auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue { + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi}; + return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops); + }; + + bool IsZeroVal = + isa(Src) && cast(Src)->isZero(); + + // If i8 type and constant non-zero value. + if ((Src.getValueType() == MVT::i8) && !IsZeroVal) + // Replicate byte to word by multiplication with 0x01010101. + SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, + DAG.getConstant(16843009, dl, MVT::i32)); + + // We limit a QC_SETWMI to 16 words or less to improve interruptibility. + // So for 1-16 words we use a single QC_SETWMI: + // + // QC_SETWMI reg1, N, 0(reg2) + // + // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the + // second for the remainder: + // + // QC_SETWMI reg1, 16, 0(reg2) + // QC_SETWMI reg1, 32-N, 64(reg2) + // + // For 33-48 words, we would like to use (16, 16, n), but that means the last + // QC_SETWMI needs an offset of 128 which the instruction doesnt support. + // So in this case we use a length of 15 for the second instruction and we do + // the rest with the third instruction. + // This means the maximum inlined number of words is 47 (for now): + // + // QC_SETWMI R2, R0, 16, 0 + // QC_SETWMI R2, R0, 15, 64 + // QC_SETWMI R2, R0, N, 124 + // + // For 48 words or more, call the target independent memset + if (NumberOfWords <= 16) { + // 1 - 16 words + SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); + SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + return getSetwmiNode(SizeWords, OffsetSetwmi); + } else if (NumberOfWords <= 47) { + if (NumberOfWords <= 32) { + // 17 - 32 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } else { + // 33 - 47 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + } + + // >= 48 words. Call target independent memset. + return SDValue(); +} diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h index 641189f8661c1..08c8d11f2b108 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h @@ -34,6 +34,12 @@ class RISCVSelectionDAGInfo : public SelectionDAGGenTargetInfo { void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const override; + bool hasPassthruOp(unsigned Opcode) const { return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask; } diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll new file mode 100644 index 0000000000000..b0107cc1a4e03 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -0,0 +1,929 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I + +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS + +%struct.anon = type { [16 x i32] } +%struct.anon.0 = type { [47 x i32] } +%struct.anon.1 = type { [48 x i32] } +%struct.anon.2 = type { [64 x i8] } +%struct.struct1_t = type { [16 x i32] } + +@struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4 +@struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4 +@struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1 +@arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4 +@struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4 + +define void @test1(ptr nocapture %p, i32 %n) nounwind { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test1: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: mv a2, a1 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false) + ret void +} + +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) + +define void @test2(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2a(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false) + ret void +} + +define void @test2b(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, 165 +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false) + ret void +} + +define void @test2c(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2d(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, -91 +; RV32I-NEXT: lui a2, 1048570 +; RV32I-NEXT: lui a3, 678490 +; RV32I-NEXT: addi a2, a2, 1445 +; RV32I-NEXT: addi a3, a3, 1445 +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sh a2, 8(a0) +; RV32I-NEXT: sb a1, 10(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test2d: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, -91 +; RV32IXQCISLS-NEXT: lui a2, 1048570 +; RV32IXQCISLS-NEXT: lui a3, 678490 +; RV32IXQCISLS-NEXT: addi a2, a2, 1445 +; RV32IXQCISLS-NEXT: addi a3, a3, 1445 +; RV32IXQCISLS-NEXT: sw a3, 0(a0) +; RV32IXQCISLS-NEXT: sw a3, 4(a0) +; RV32IXQCISLS-NEXT: sh a2, 8(a0) +; RV32IXQCISLS-NEXT: sb a1, 10(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false) + ret void +} + + +define ptr @test3(ptr %p) nounwind { +; RV32I-LABEL: test3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 256 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a2, 256 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false) + ret ptr %p +} + +define ptr @test3a(ptr %p) nounwind { +; RV32I-LABEL: test3a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false) + ret ptr %p +} + +define void @test4() nounwind { +; RV32I-LABEL: test4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct1) +; RV32I-NEXT: addi a0, a0, %lo(struct1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false) + ret void +} + +define void @test4a(ptr nocapture %s) nounwind { +; RV32I-LABEL: test4a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 166 +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 682602 +; RV32IXQCISLS-NEXT: addi a1, a1, 1702 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false) + ret void +} + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) + +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +define void @test4b() nounwind { +; RV32I-LABEL: test4b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, %hi(struct4b) +; RV32I-NEXT: addi a0, a0, %lo(struct4b) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call memset +; RV32I-NEXT: lui a0, %hi(struct4b1) +; RV32I-NEXT: addi a0, a0, %lo(struct4b1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, %hi(struct4b) +; RV32IXQCISLS-NEXT: addi a1, a1, %lo(struct4b) +; RV32IXQCISLS-NEXT: lui a0, %hi(struct4b1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct4b1) +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a1) +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false) + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false) + ret void +} + +define void @test5() nounwind { +; RV32I-LABEL: test5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct2) +; RV32I-NEXT: addi a0, a0, %lo(struct2) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test5: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct2) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct2) +; RV32IXQCISLS-NEXT: li a2, 64 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false) + ret void +} + +define i32 @test6() nounwind { +; RV32I-LABEL: test6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define i32 @test6a() nounwind { +; RV32I-LABEL: test6a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: lw a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 0, ptr %x, align 4 + %x.0.x.0. = load volatile i32, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) + ret i32 %x.0.x.0. +} + +define zeroext i8 @test6b_c() nounwind { +; RV32I-LABEL: test6b_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 12(sp) +; RV32I-NEXT: lbu a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sb zero, 12(sp) +; RV32IXQCISLS-NEXT: lbu a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i8, align 4 + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false) + %x.0.x.0. = load volatile i8, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %x) + ret i8 %x.0.x.0. +} + +define signext i16 @test6b_s() nounwind { +; RV32I-LABEL: test6b_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 12(sp) +; RV32I-NEXT: lh a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_s: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sh zero, 12(sp) +; RV32IXQCISLS-NEXT: lh a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i16, align 4 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x) + store i16 0, ptr %x, align 4 + %x.0.x.0. = load volatile i16, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %x) + ret i16 %x.0.x.0. +} + +define i32 @test6b_l() nounwind { +; RV32I-LABEL: test6b_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_l: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: lw a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 0, ptr %x, align 4 + %x.0.x.0. = load volatile i32, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) + ret i32 %x.0.x.0. +} + +define i64 @test6b_ll() nounwind { +; RV32I-LABEL: test6b_ll: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_ll: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 8(sp) +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: lw a0, 8(sp) +; RV32IXQCISLS-NEXT: lw a1, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 8 %x, i8 0, i32 8, i1 false) + %x.0.x.0. = load volatile i64, ptr %x, align 8 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %x) + ret i64 %x.0.x.0. +} + +define zeroext i8 @test6c_c() nounwind { +; RV32I-LABEL: test6c_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 15(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6c_c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sb zero, 15(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i8 + call void @llvm.memset.p0.i32(ptr align 1 %x, i8 0, i32 1, i1 false) + %0 = load i8, ptr %x, align 1 + ret i8 %0 +} + +define signext i16 @test6c_s() nounwind { +; RV32I-LABEL: test6c_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 14(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6c_s: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sh zero, 14(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i16 + call void @llvm.memset.p0.i32(ptr align 2 %x, i8 0, i32 2, i1 false) + %0 = load i16, ptr %x, align 2 + ret i16 %0 +} + +define i32 @test6c_l() nounwind { +; RV32I-LABEL: test6c_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6c_l: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define i64 @test6c_ll() nounwind { +; RV32I-LABEL: test6c_ll: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6c_ll: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 8(sp) +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i64, align 8 + call void @llvm.memset.p0.i32(ptr align 8 %x, i8 0, i32 8, i1 false) + %0 = load i64, ptr %x, align 8 + ret i64 %0 +} + +define void @test7() nounwind { +; RV32I-LABEL: test7: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test7: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: sw zero, 4(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 8, i1 false) + ret void +} + +define void @test7a() nounwind { +; RV32I-LABEL: test7a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test7a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: ret +entry: + call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 0, i1 false) + ret void +} + +define void @test7a_unalign() nounwind { +; RV32I-LABEL: test7a_unalign: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: li a1, -1 +; RV32I-NEXT: sw a1, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sb a1, 16(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test7a_unalign: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: li a1, -1 +; RV32IXQCISLS-NEXT: sw a1, %lo(arr1)(a0) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: sw a1, 4(a0) +; RV32IXQCISLS-NEXT: sw a1, 8(a0) +; RV32IXQCISLS-NEXT: sw a1, 12(a0) +; RV32IXQCISLS-NEXT: sb a1, 16(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 17, i1 false) + ret void +} + +define void @test7b() nounwind { +; RV32I-LABEL: test7b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 255 +; RV32I-NEXT: li a2, 68 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test7b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: li a1, -1 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 1, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 68, i1 false) + ret void +} + +define void @test7c() nounwind { +; RV32I-LABEL: test7c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 128 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test7c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: lui a1, 526344 +; RV32IXQCISLS-NEXT: addi a1, a1, 128 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -128, i32 128, i1 false) + ret void +} + +define void @test7d() nounwind { +; RV32I-LABEL: test7d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: li a2, 148 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test7d: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: lui a1, 53457 +; RV32IXQCISLS-NEXT: addi a1, a1, -755 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 6, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 13, i32 148, i1 false) + ret void +} + +define void @test7e() nounwind { +; RV32I-LABEL: test7e: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 239 +; RV32I-NEXT: li a2, 100 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test7e: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: lui a1, 982783 +; RV32IXQCISLS-NEXT: addi a1, a1, -17 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 9, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -17, i32 100, i1 false) + ret void +} + +define void @test8() nounwind { +; RV32I-LABEL: test8: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw zero, 8(a0) +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test8: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: sw zero, 4(a0) +; RV32IXQCISLS-NEXT: sw zero, 8(a0) +; RV32IXQCISLS-NEXT: sw zero, 12(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 16, i1 false) + ret void +} + +define void @test9() nounwind { +; RV32I-LABEL: test9: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 20(a0) +; RV32I-NEXT: sw zero, 24(a0) +; RV32I-NEXT: sw zero, 28(a0) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw zero, 8(a0) +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: sw zero, 16(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test9: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: sw zero, 20(a0) +; RV32IXQCISLS-NEXT: sw zero, 24(a0) +; RV32IXQCISLS-NEXT: sw zero, 28(a0) +; RV32IXQCISLS-NEXT: sw zero, 4(a0) +; RV32IXQCISLS-NEXT: sw zero, 8(a0) +; RV32IXQCISLS-NEXT: sw zero, 12(a0) +; RV32IXQCISLS-NEXT: sw zero, 16(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 32, i1 false) + ret void +} + +define void @test10() nounwind { +; RV32I-LABEL: test10: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 60 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test10: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 60, i1 false) + ret void +} + +define void @test11() nounwind { +; RV32I-LABEL: test11: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test11: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 64, i1 false) + ret void +} + +define void @test12() nounwind { +; RV32I-LABEL: test12: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 120 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test12: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 14, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 120, i1 false) + ret void +} + +define void @test13() nounwind { +; RV32I-LABEL: test13: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 124 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test13: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 124, i1 false) + ret void +} + +define void @test14() nounwind { +; RV32I-LABEL: test14: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 180 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test14: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 14, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 180, i1 false) + ret void +} + +define void @test15() nounwind { +; RV32I-LABEL: test15: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 184 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test15: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 184, i1 false) + ret void +} + +define void @test15a() nounwind { +; RV32I-LABEL: test15a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test15a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: li a1, 165 +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -91, i32 192, i1 false) + ret void +} + +define void @test15b() nounwind { +; RV32I-LABEL: test15b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test15b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 188, i1 false) + ret void +} + +define void @test15c() nounwind { +; RV32I-LABEL: test15c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test15c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 192, i1 false) + ret void +} From a84d52a30866f295749e7786864b64a8a79e6760 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Thu, 31 Jul 2025 22:44:08 +0530 Subject: [PATCH 2/9] Fix check prefix and comment --- .../Target/RISCV/RISCVSelectionDAGInfo.cpp | 2 +- llvm/test/CodeGen/RISCV/xqcilsm-memset.ll | 618 +++++++++--------- 2 files changed, 308 insertions(+), 312 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index edfa2992711a0..ce1e3a72c4e34 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -115,7 +115,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( // second for the remainder: // // QC_SETWMI reg1, 16, 0(reg2) - // QC_SETWMI reg1, 32-N, 64(reg2) + // QC_SETWMI reg1, N, 64(reg2) // // For 33-48 words, we would like to use (16, 16, n), but that means the last // QC_SETWMI needs an offset of 128 which the instruction doesnt support. diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll index b0107cc1a4e03..3496438fde5b6 100644 --- a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=RV32I - ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \ -; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS +; RUN: | FileCheck %s -check-prefixes=RV32IXQCILSM %struct.anon = type { [16 x i32] } %struct.anon.0 = type { [47 x i32] } @@ -25,11 +24,11 @@ define void @test1(ptr nocapture %p, i32 %n) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test1: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: mv a2, a1 -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test1: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: mv a2, a1 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false) ret void @@ -44,13 +43,13 @@ define void @test2(ptr nocapture %p) nounwind { ; RV32I-NEXT: li a2, 128 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test2: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a1, 678490 -; RV32IXQCISLS-NEXT: addi a1, a1, 1445 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test2: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) ret void @@ -63,14 +62,14 @@ define void @test2a(ptr nocapture %p) nounwind { ; RV32I-NEXT: li a2, 188 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test2a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a1, 678490 -; RV32IXQCISLS-NEXT: addi a1, a1, 1445 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 124(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test2a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 124(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false) ret void @@ -83,11 +82,11 @@ define void @test2b(ptr nocapture %p) nounwind { ; RV32I-NEXT: li a2, 192 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test2b: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: li a1, 165 -; RV32IXQCISLS-NEXT: li a2, 192 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test2b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a1, 165 +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false) ret void @@ -100,13 +99,13 @@ define void @test2c(ptr nocapture %p) nounwind { ; RV32I-NEXT: li a2, 128 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test2c: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a1, 678490 -; RV32IXQCISLS-NEXT: addi a1, a1, 1445 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test2c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) ret void @@ -126,18 +125,18 @@ define void @test2d(ptr nocapture %p) nounwind { ; RV32I-NEXT: sb a1, 10(a0) ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test2d: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: li a1, -91 -; RV32IXQCISLS-NEXT: lui a2, 1048570 -; RV32IXQCISLS-NEXT: lui a3, 678490 -; RV32IXQCISLS-NEXT: addi a2, a2, 1445 -; RV32IXQCISLS-NEXT: addi a3, a3, 1445 -; RV32IXQCISLS-NEXT: sw a3, 0(a0) -; RV32IXQCISLS-NEXT: sw a3, 4(a0) -; RV32IXQCISLS-NEXT: sh a2, 8(a0) -; RV32IXQCISLS-NEXT: sb a1, 10(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test2d: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a1, -91 +; RV32IXQCILSM-NEXT: lui a2, 1048570 +; RV32IXQCILSM-NEXT: lui a3, 678490 +; RV32IXQCILSM-NEXT: addi a2, a2, 1445 +; RV32IXQCILSM-NEXT: addi a3, a3, 1445 +; RV32IXQCILSM-NEXT: sw a3, 0(a0) +; RV32IXQCILSM-NEXT: sw a3, 4(a0) +; RV32IXQCILSM-NEXT: sh a2, 8(a0) +; RV32IXQCILSM-NEXT: sb a1, 10(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false) ret void @@ -151,11 +150,11 @@ define ptr @test3(ptr %p) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test3: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: li a2, 256 -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test3: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a2, 256 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false) ret ptr %p @@ -168,11 +167,11 @@ define ptr @test3a(ptr %p) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test3a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test3a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false) ret ptr %p @@ -187,12 +186,12 @@ define void @test4() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test4: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(struct1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test4: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(struct1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false) ret void @@ -205,12 +204,12 @@ define void @test4a(ptr nocapture %s) nounwind { ; RV32I-NEXT: li a2, 64 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test4a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a1, 682602 -; RV32IXQCISLS-NEXT: addi a1, a1, 1702 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test4a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 682602 +; RV32IXQCILSM-NEXT: addi a1, a1, 1702 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false) ret void @@ -238,18 +237,18 @@ define void @test4b() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test4b: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a1, %hi(struct4b) -; RV32IXQCISLS-NEXT: addi a1, a1, %lo(struct4b) -; RV32IXQCISLS-NEXT: lui a0, %hi(struct4b1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct4b1) -; RV32IXQCISLS-NEXT: li a2, 192 -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a1) -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test4b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, %hi(struct4b) +; RV32IXQCILSM-NEXT: addi a1, a1, %lo(struct4b) +; RV32IXQCILSM-NEXT: lui a0, %hi(struct4b1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct4b1) +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 124(a1) +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false) tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false) @@ -265,13 +264,13 @@ define void @test5() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test5: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(struct2) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct2) -; RV32IXQCISLS-NEXT: li a2, 64 -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test5: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(struct2) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct2) +; RV32IXQCILSM-NEXT: li a2, 64 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false) ret void @@ -286,13 +285,13 @@ define i32 @test6() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: li a0, 0 -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i32, align 4 call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) @@ -302,21 +301,20 @@ entry: define i32 @test6a() nounwind { ; RV32I-LABEL: test6a: -; RV32I: # %bb.0: # %entry +; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: lw a0, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: lw a0, 12(sp) -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret -entry: +; RV32IXQCILSM-LABEL: test6a: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: lw a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret %x = alloca i32, align 4 call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) store i32 0, ptr %x, align 4 @@ -327,21 +325,20 @@ entry: define zeroext i8 @test6b_c() nounwind { ; RV32I-LABEL: test6b_c: -; RV32I: # %bb.0: # %entry +; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sb zero, 12(sp) ; RV32I-NEXT: lbu a0, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6b_c: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sb zero, 12(sp) -; RV32IXQCISLS-NEXT: lbu a0, 12(sp) -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret -entry: +; RV32IXQCILSM-LABEL: test6b_c: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sb zero, 12(sp) +; RV32IXQCILSM-NEXT: lbu a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret %x = alloca i8, align 4 call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x) call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false) @@ -359,13 +356,13 @@ define signext i16 @test6b_s() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6b_s: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sh zero, 12(sp) -; RV32IXQCISLS-NEXT: lh a0, 12(sp) -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6b_s: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sh zero, 12(sp) +; RV32IXQCILSM-NEXT: lh a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i16, align 4 call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x) @@ -384,13 +381,14 @@ define i32 @test6b_l() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32IXQCILSM-LABEL: test6b_l: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: lw a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret ; RV32IXQCISLS-LABEL: test6b_l: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: lw a0, 12(sp) -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret entry: %x = alloca i32, align 4 call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) @@ -411,15 +409,15 @@ define i64 @test6b_ll() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6b_ll: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 8(sp) -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: lw a0, 8(sp) -; RV32IXQCISLS-NEXT: lw a1, 12(sp) -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6b_ll: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 8(sp) +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: lw a0, 8(sp) +; RV32IXQCILSM-NEXT: lw a1, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i64, align 8 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %x) @@ -438,13 +436,13 @@ define zeroext i8 @test6c_c() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6c_c: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sb zero, 15(sp) -; RV32IXQCISLS-NEXT: li a0, 0 -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6c_c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sb zero, 15(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i8 call void @llvm.memset.p0.i32(ptr align 1 %x, i8 0, i32 1, i1 false) @@ -461,13 +459,13 @@ define signext i16 @test6c_s() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6c_s: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sh zero, 14(sp) -; RV32IXQCISLS-NEXT: li a0, 0 -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6c_s: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sh zero, 14(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i16 call void @llvm.memset.p0.i32(ptr align 2 %x, i8 0, i32 2, i1 false) @@ -484,13 +482,13 @@ define i32 @test6c_l() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6c_l: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: li a0, 0 -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6c_l: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i32, align 4 call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) @@ -509,15 +507,15 @@ define i64 @test6c_ll() nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test6c_ll: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: addi sp, sp, -16 -; RV32IXQCISLS-NEXT: sw zero, 8(sp) -; RV32IXQCISLS-NEXT: sw zero, 12(sp) -; RV32IXQCISLS-NEXT: li a0, 0 -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: addi sp, sp, 16 -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test6c_ll: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 8(sp) +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret entry: %x = alloca i64, align 8 call void @llvm.memset.p0.i32(ptr align 8 %x, i8 0, i32 8, i1 false) @@ -527,21 +525,20 @@ entry: define void @test7() nounwind { ; RV32I-LABEL: test7: -; RV32I: # %bb.0: # %entry +; RV32I: # %bb.0: ; RV32I-NEXT: lui a0, %hi(arr1) ; RV32I-NEXT: sw zero, %lo(arr1)(a0) ; RV32I-NEXT: addi a0, a0, %lo(arr1) ; RV32I-NEXT: sw zero, 4(a0) ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test7: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: sw zero, 4(a0) -; RV32IXQCISLS-NEXT: ret -entry: +; RV32IXQCILSM-LABEL: test7: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: ret tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 8, i1 false) ret void } @@ -551,9 +548,9 @@ define void @test7a() nounwind { ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test7a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: ret entry: call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 0, i1 false) ret void @@ -572,17 +569,17 @@ define void @test7a_unalign() nounwind { ; RV32I-NEXT: sb a1, 16(a0) ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test7a_unalign: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: li a1, -1 -; RV32IXQCISLS-NEXT: sw a1, %lo(arr1)(a0) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: sw a1, 4(a0) -; RV32IXQCISLS-NEXT: sw a1, 8(a0) -; RV32IXQCISLS-NEXT: sw a1, 12(a0) -; RV32IXQCISLS-NEXT: sb a1, 16(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7a_unalign: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: li a1, -1 +; RV32IXQCILSM-NEXT: sw a1, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw a1, 4(a0) +; RV32IXQCILSM-NEXT: sw a1, 8(a0) +; RV32IXQCILSM-NEXT: sw a1, 12(a0) +; RV32IXQCILSM-NEXT: sb a1, 16(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 17, i1 false) ret void @@ -597,14 +594,14 @@ define void @test7b() nounwind { ; RV32I-NEXT: li a2, 68 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test7b: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: li a1, -1 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 1, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a1, -1 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 1, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 68, i1 false) ret void @@ -619,15 +616,15 @@ define void @test7c() nounwind { ; RV32I-NEXT: li a2, 128 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test7c: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: lui a1, 526344 -; RV32IXQCISLS-NEXT: addi a1, a1, 128 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 526344 +; RV32IXQCILSM-NEXT: addi a1, a1, 128 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -128, i32 128, i1 false) ret void @@ -642,16 +639,16 @@ define void @test7d() nounwind { ; RV32I-NEXT: li a2, 148 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test7d: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: lui a1, 53457 -; RV32IXQCISLS-NEXT: addi a1, a1, -755 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 6, 124(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7d: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 53457 +; RV32IXQCILSM-NEXT: addi a1, a1, -755 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 6, 124(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 13, i32 148, i1 false) ret void @@ -666,15 +663,15 @@ define void @test7e() nounwind { ; RV32I-NEXT: li a2, 100 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test7e: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: lui a1, 982783 -; RV32IXQCISLS-NEXT: addi a1, a1, -17 -; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi a1, 9, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test7e: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 982783 +; RV32IXQCILSM-NEXT: addi a1, a1, -17 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 9, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -17, i32 100, i1 false) ret void @@ -691,15 +688,15 @@ define void @test8() nounwind { ; RV32I-NEXT: sw zero, 12(a0) ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test8: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: sw zero, 4(a0) -; RV32IXQCISLS-NEXT: sw zero, 8(a0) -; RV32IXQCISLS-NEXT: sw zero, 12(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test8: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: sw zero, 8(a0) +; RV32IXQCILSM-NEXT: sw zero, 12(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 16, i1 false) ret void @@ -720,19 +717,19 @@ define void @test9() nounwind { ; RV32I-NEXT: sw zero, 16(a0) ; RV32I-NEXT: ret ; -; RV32IXQCISLS-LABEL: test9: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: sw zero, %lo(arr1)(a0) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: sw zero, 20(a0) -; RV32IXQCISLS-NEXT: sw zero, 24(a0) -; RV32IXQCISLS-NEXT: sw zero, 28(a0) -; RV32IXQCISLS-NEXT: sw zero, 4(a0) -; RV32IXQCISLS-NEXT: sw zero, 8(a0) -; RV32IXQCISLS-NEXT: sw zero, 12(a0) -; RV32IXQCISLS-NEXT: sw zero, 16(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test9: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 20(a0) +; RV32IXQCILSM-NEXT: sw zero, 24(a0) +; RV32IXQCILSM-NEXT: sw zero, 28(a0) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: sw zero, 8(a0) +; RV32IXQCILSM-NEXT: sw zero, 12(a0) +; RV32IXQCILSM-NEXT: sw zero, 16(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 32, i1 false) ret void @@ -747,12 +744,12 @@ define void @test10() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test10: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 0(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test10: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 0(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 60, i1 false) ret void @@ -767,12 +764,12 @@ define void @test11() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test11: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test11: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 64, i1 false) ret void @@ -787,13 +784,13 @@ define void @test12() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test12: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 14, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test12: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 14, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 120, i1 false) ret void @@ -808,13 +805,13 @@ define void @test13() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test13: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test13: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 124, i1 false) ret void @@ -829,14 +826,14 @@ define void @test14() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test14: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 14, 124(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test14: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 14, 124(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 180, i1 false) ret void @@ -851,14 +848,14 @@ define void @test15() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test15: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 124(a0) -; RV32IXQCISLS-NEXT: ret +; RV32IXQCILSM-LABEL: test15: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 124(a0) +; RV32IXQCILSM-NEXT: ret entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 184, i1 false) ret void @@ -873,13 +870,13 @@ define void @test15a() nounwind { ; RV32I-NEXT: li a2, 192 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test15a: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: li a1, 165 -; RV32IXQCISLS-NEXT: li a2, 192 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test15a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a1, 165 +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -91, i32 192, i1 false) ret void @@ -887,22 +884,21 @@ entry: define void @test15b() nounwind { ; RV32I-LABEL: test15b: -; RV32I: # %bb.0: # %entry +; RV32I: # %bb.0: ; RV32I-NEXT: lui a0, %hi(arr1) ; RV32I-NEXT: addi a0, a0, %lo(arr1) ; RV32I-NEXT: li a2, 188 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test15b: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a0) -; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a0) -; RV32IXQCISLS-NEXT: ret -entry: +; RV32IXQCILSM-LABEL: test15b: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 124(a0) +; RV32IXQCILSM-NEXT: ret tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 188, i1 false) ret void } @@ -916,13 +912,13 @@ define void @test15c() nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: tail memset ; -; RV32IXQCISLS-LABEL: test15c: -; RV32IXQCISLS: # %bb.0: # %entry -; RV32IXQCISLS-NEXT: lui a0, %hi(arr1) -; RV32IXQCISLS-NEXT: addi a0, a0, %lo(arr1) -; RV32IXQCISLS-NEXT: li a2, 192 -; RV32IXQCISLS-NEXT: li a1, 0 -; RV32IXQCISLS-NEXT: tail memset +; RV32IXQCILSM-LABEL: test15c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset entry: tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 192, i1 false) ret void From 44a01d195a25cf3dca3da3ec017332ffd8eeaa95 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 1 Aug 2025 08:33:32 +0530 Subject: [PATCH 3/9] Address comments --- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 10 +-- .../Target/RISCV/RISCVSelectionDAGInfo.cpp | 61 +++++++++---------- llvm/test/CodeGen/RISCV/xqcilsm-memset.ll | 25 -------- 3 files changed, 35 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 2479ced164927..1b0041b5ca7e3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -14,12 +14,12 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 3>, - SDTCisPtrTy<2>, - SDTCisVT<3, XLenVT>]>; +def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; -def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple, +def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def uimm5nonzero : RISCVOp, diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index ce1e3a72c4e34..be0a16e3748e0 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -69,8 +69,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { - const RISCVSubtarget &Subtarget = - DAG.getMachineFunction().getSubtarget(); + const auto &Subtarget = DAG.getSubtarget(); // We currently do this only for Xqcilsm if (!Subtarget.hasVendorXqcilsm()) return SDValue(); @@ -83,7 +82,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); // Do this only if it is word aligned and we write multiple of 4 bytes. - if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0)) + if (!(Alignment.value() >= 4) || !((NumberOfBytesToWrite & 3) == 0)) return SDValue(); SmallVector OutChains; @@ -104,7 +103,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( if ((Src.getValueType() == MVT::i8) && !IsZeroVal) // Replicate byte to word by multiplication with 0x01010101. SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, - DAG.getConstant(16843009, dl, MVT::i32)); + DAG.getConstant(0x01010101ul, dl, MVT::i32)); // We limit a QC_SETWMI to 16 words or less to improve interruptibility. // So for 1-16 words we use a single QC_SETWMI: @@ -128,38 +127,38 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( // QC_SETWMI R2, R0, N, 124 // // For 48 words or more, call the target independent memset + if ( NumberOfWords >= 48) + return SDValue(); + if (NumberOfWords <= 16) { // 1 - 16 words SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); return getSetwmiNode(SizeWords, OffsetSetwmi); - } else if (NumberOfWords <= 47) { - if (NumberOfWords <= 32) { - // 17 - 32 words - SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - } else { - // 33 - 47 words - SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } - // >= 48 words. Call target independent memset. - return SDValue(); + if (NumberOfWords <= 32) { + // 17 - 32 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } else { + // 33 - 47 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll index 3496438fde5b6..988bb6ffb8915 100644 --- a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -299,30 +299,6 @@ entry: ret i32 %0 } -define i32 @test6a() nounwind { -; RV32I-LABEL: test6a: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: lw a0, 12(sp) -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32IXQCILSM-LABEL: test6a: -; RV32IXQCILSM: # %bb.0: -; RV32IXQCILSM-NEXT: addi sp, sp, -16 -; RV32IXQCILSM-NEXT: sw zero, 12(sp) -; RV32IXQCILSM-NEXT: lw a0, 12(sp) -; RV32IXQCILSM-NEXT: addi sp, sp, 16 -; RV32IXQCILSM-NEXT: ret - %x = alloca i32, align 4 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) - store i32 0, ptr %x, align 4 - %x.0.x.0. = load volatile i32, ptr %x, align 4 - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) - ret i32 %x.0.x.0. -} - define zeroext i8 @test6b_c() nounwind { ; RV32I-LABEL: test6b_c: ; RV32I: # %bb.0: @@ -388,7 +364,6 @@ define i32 @test6b_l() nounwind { ; RV32IXQCILSM-NEXT: lw a0, 12(sp) ; RV32IXQCILSM-NEXT: addi sp, sp, 16 ; RV32IXQCILSM-NEXT: ret -; RV32IXQCISLS-LABEL: test6b_l: entry: %x = alloca i32, align 4 call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) From fd143e9a6b88a0da0b86adda75b66a20ed3c492b Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 1 Aug 2025 08:39:48 +0530 Subject: [PATCH 4/9] Clang format --- llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index be0a16e3748e0..d151e12151697 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -102,8 +102,9 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( // If i8 type and constant non-zero value. if ((Src.getValueType() == MVT::i8) && !IsZeroVal) // Replicate byte to word by multiplication with 0x01010101. - SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, - DAG.getConstant(0x01010101ul, dl, MVT::i32)); + SrcValueReplicated = + DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, + DAG.getConstant(0x01010101ul, dl, MVT::i32)); // We limit a QC_SETWMI to 16 words or less to improve interruptibility. // So for 1-16 words we use a single QC_SETWMI: @@ -127,7 +128,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( // QC_SETWMI R2, R0, N, 124 // // For 48 words or more, call the target independent memset - if ( NumberOfWords >= 48) + if (NumberOfWords >= 48) return SDValue(); if (NumberOfWords <= 16) { From 18ee0a91124924f78aa87f77ccecd8f66663dbd1 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 1 Aug 2025 17:00:23 +0530 Subject: [PATCH 5/9] Add memoperands --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 1 + .../Target/RISCV/RISCVSelectionDAGInfo.cpp | 47 +++++++------------ 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b778c33083685..efe34f6d5ffc2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1851,6 +1851,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2), Node->getOperand(3), Node->getOperand(4), Chain}; MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops); + CurDAG->setNodeMemRefs(New, {cast(Node)->getMemOperand()}); ReplaceNode(Node, New); return; } diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index d151e12151697..935a2982c2776 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -86,21 +86,24 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( return SDValue(); SmallVector OutChains; - SDValue SizeWords, OffsetSetwmi; SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); int NumberOfWords = NumberOfBytesToWrite / 4; + MachineFunction &MF = DAG.getMachineFunction(); // Helper for constructing the QC_SETWMI instruction - auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue { - SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi}; - return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops); + auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue { + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, + DAG.getTargetConstant(SizeWords, dl, MVT::i32), + DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)}; + MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand( + DstPtrInfo, MachineMemOperand::MOStore, SizeWords * 4, Align(4)); + return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl, + DAG.getVTList(MVT::Other), Ops, MVT::i32, + BaseMemOperand); }; - bool IsZeroVal = - isa(Src) && cast(Src)->isZero(); - // If i8 type and constant non-zero value. - if ((Src.getValueType() == MVT::i8) && !IsZeroVal) + if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src)) // Replicate byte to word by multiplication with 0x01010101. SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, @@ -133,33 +136,19 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( if (NumberOfWords <= 16) { // 1 - 16 words - SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); - SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); - return getSetwmiNode(SizeWords, OffsetSetwmi); + return getSetwmiNode(NumberOfWords, 0); } if (NumberOfWords <= 32) { // 17 - 32 words - SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64)); + OutChains.push_back(getSetwmiNode(16, 0)); } else { // 33 - 47 words - SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); - - SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); - OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); - OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124)); + OutChains.push_back(getSetwmiNode(15, 64)); + OutChains.push_back(getSetwmiNode(16, 0)); } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } From c7ab9f76929a4a3cdb039708948e635aa5ad06be Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 1 Aug 2025 21:29:38 +0530 Subject: [PATCH 6/9] Use tuimm --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 ---------- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 9 +++++++++ llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index efe34f6d5ffc2..f223fdbef4359 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1845,16 +1845,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } - case RISCVISD::QC_SETWMI: { - SDValue Chain = Node->getOperand(0); - SDVTList VTs = Node->getVTList(); - SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2), - Node->getOperand(3), Node->getOperand(4), Chain}; - MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops); - CurDAG->setNodeMemRefs(New, {cast(Node)->getMemOperand()}); - ReplaceNode(Node, New); - return; - } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 1b0041b5ca7e3..399fb2c9092ed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -35,6 +35,8 @@ def uimm5nonzero : RISCVOp, }]; } +def tuimm5nonzero : TImmLeaf(Imm);}]>; + def uimm5gt3 : RISCVOp, ImmLeaf 3) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "GT3">; @@ -100,6 +102,8 @@ def uimm5slist : RISCVOp, ImmLeaf(Imm);}]>; + def uimm10 : RISCVUImmLeafOp<10>; def uimm11 : RISCVUImmLeafOp<11>; @@ -1574,6 +1578,11 @@ def : QCISELECTIICCPat ; def : QCISELECTIICCPat ; } // Predicates = [HasVendorXqcics, IsRV32] +let Predicates = [HasVendorXqcilsm, IsRV32] in { +def : Pat<(qc_setwmi i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), + (QC_SETWMI i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; +} // Predicates = [HasVendorXqcilsm, IsRV32] + //===----------------------------------------------------------------------===/i // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 935a2982c2776..bc3213b797166 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -82,7 +82,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); // Do this only if it is word aligned and we write multiple of 4 bytes. - if (!(Alignment.value() >= 4) || !((NumberOfBytesToWrite & 3) == 0)) + if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0)) return SDValue(); SmallVector OutChains; From be7306a1668763b99dbd442390c7063797fe3ecd Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 1 Aug 2025 22:35:16 +0530 Subject: [PATCH 7/9] Use GPR --- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 399fb2c9092ed..44a8245dc2a75 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1579,8 +1579,8 @@ def : QCISELECTIICCPat ; } // Predicates = [HasVendorXqcics, IsRV32] let Predicates = [HasVendorXqcilsm, IsRV32] in { -def : Pat<(qc_setwmi i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), - (QC_SETWMI i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; +def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), + (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; } // Predicates = [HasVendorXqcilsm, IsRV32] //===----------------------------------------------------------------------===/i From b3881c2b553083b91fe7483db353aa22cfb3c44e Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Sat, 2 Aug 2025 06:50:10 +0530 Subject: [PATCH 8/9] MachinePointerInfo offset --- llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index bc3213b797166..cd94307fb6a27 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -96,7 +96,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( DAG.getTargetConstant(SizeWords, dl, MVT::i32), DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)}; MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand( - DstPtrInfo, MachineMemOperand::MOStore, SizeWords * 4, Align(4)); + DstPtrInfo.getWithOffset(OffsetSetwmi), MachineMemOperand::MOStore, + SizeWords * 4, Align(4)); return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, BaseMemOperand); From aebf23cea3393695c9ec5e36efe7180778ea79a3 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Mon, 4 Aug 2025 11:41:45 +0530 Subject: [PATCH 9/9] Add volatile memop and fix typos --- llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index cd94307fb6a27..041dd07b48bf0 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -81,7 +81,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); - // Do this only if it is word aligned and we write multiple of 4 bytes. + // Do this only if it is word aligned and we write a multiple of 4 bytes. if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0)) return SDValue(); @@ -89,6 +89,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); int NumberOfWords = NumberOfBytesToWrite / 4; MachineFunction &MF = DAG.getMachineFunction(); + auto Volatile = + isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; // Helper for constructing the QC_SETWMI instruction auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue { @@ -96,8 +98,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( DAG.getTargetConstant(SizeWords, dl, MVT::i32), DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)}; MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand( - DstPtrInfo.getWithOffset(OffsetSetwmi), MachineMemOperand::MOStore, - SizeWords * 4, Align(4)); + DstPtrInfo.getWithOffset(OffsetSetwmi), + MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4)); return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, BaseMemOperand); @@ -122,7 +124,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( // QC_SETWMI reg1, N, 64(reg2) // // For 33-48 words, we would like to use (16, 16, n), but that means the last - // QC_SETWMI needs an offset of 128 which the instruction doesnt support. + // QC_SETWMI needs an offset of 128 which the instruction doesn't support. // So in this case we use a length of 15 for the second instruction and we do // the rest with the third instruction. // This means the maximum inlined number of words is 47 (for now):