|
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
9 | 9 | #include "RISCVSelectionDAGInfo.h" |
| 10 | +#include "RISCVSubtarget.h" |
| 11 | +#include "llvm/CodeGen/SelectionDAG.h" |
10 | 12 |
|
11 | 13 | #define GET_SDNODE_DESC |
12 | 14 | #include "RISCVGenSDNodeInfo.inc" |
@@ -62,3 +64,102 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, |
62 | 64 | } |
63 | 65 | #endif |
64 | 66 | } |
| 67 | + |
| 68 | +SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( |
| 69 | + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
| 70 | + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, |
| 71 | + MachinePointerInfo DstPtrInfo) const { |
| 72 | + const RISCVSubtarget &Subtarget = |
| 73 | + DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
| 74 | + // We currently do this only for Xqcilsm |
| 75 | + if (!Subtarget.hasVendorXqcilsm()) |
| 76 | + return SDValue(); |
| 77 | + |
| 78 | + // Do this only if we know the size at compile time. |
| 79 | + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
| 80 | + if (!ConstantSize) |
| 81 | + return SDValue(); |
| 82 | + |
| 83 | + uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); |
| 84 | + |
| 85 | + // Do this only if it is word aligned and we write multiple of 4 bytes. |
| 86 | + if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0)) |
| 87 | + return SDValue(); |
| 88 | + |
| 89 | + SmallVector<SDValue, 8> OutChains; |
| 90 | + SDValue SizeWords, OffsetSetwmi; |
| 91 | + SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); |
| 92 | + int NumberOfWords = NumberOfBytesToWrite / 4; |
| 93 | + |
| 94 | + // Helper for constructing the QC_SETWMI instruction |
| 95 | + auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue { |
| 96 | + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi}; |
| 97 | + return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops); |
| 98 | + }; |
| 99 | + |
| 100 | + bool IsZeroVal = |
| 101 | + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); |
| 102 | + |
| 103 | + // If i8 type and constant non-zero value. |
| 104 | + if ((Src.getValueType() == MVT::i8) && !IsZeroVal) |
| 105 | + // Replicate byte to word by multiplication with 0x01010101. |
| 106 | + SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, |
| 107 | + DAG.getConstant(16843009, dl, MVT::i32)); |
| 108 | + |
| 109 | + // We limit a QC_SETWMI to 16 words or less to improve interruptibility. |
| 110 | + // So for 1-16 words we use a single QC_SETWMI: |
| 111 | + // |
| 112 | + // QC_SETWMI reg1, N, 0(reg2) |
| 113 | + // |
| 114 | + // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the |
| 115 | + // second for the remainder: |
| 116 | + // |
| 117 | + // QC_SETWMI reg1, 16, 0(reg2) |
| 118 | + // QC_SETWMI reg1, 32-N, 64(reg2) |
| 119 | + // |
| 120 | + // For 33-48 words, we would like to use (16, 16, n), but that means the last |
| 121 | + // QC_SETWMI needs an offset of 128 which the instruction doesnt support. |
| 122 | + // So in this case we use a length of 15 for the second instruction and we do |
| 123 | + // the rest with the third instruction. |
| 124 | + // This means the maximum inlined number of words is 47 (for now): |
| 125 | + // |
| 126 | + // QC_SETWMI R2, R0, 16, 0 |
| 127 | + // QC_SETWMI R2, R0, 15, 64 |
| 128 | + // QC_SETWMI R2, R0, N, 124 |
| 129 | + // |
| 130 | + // For 48 words or more, call the target independent memset |
| 131 | + if (NumberOfWords <= 16) { |
| 132 | + // 1 - 16 words |
| 133 | + SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); |
| 134 | + SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); |
| 135 | + return getSetwmiNode(SizeWords, OffsetSetwmi); |
| 136 | + } else if (NumberOfWords <= 47) { |
| 137 | + if (NumberOfWords <= 32) { |
| 138 | + // 17 - 32 words |
| 139 | + SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); |
| 140 | + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); |
| 141 | + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); |
| 142 | + |
| 143 | + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); |
| 144 | + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); |
| 145 | + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); |
| 146 | + } else { |
| 147 | + // 33 - 47 words |
| 148 | + SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); |
| 149 | + OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); |
| 150 | + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); |
| 151 | + |
| 152 | + SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); |
| 153 | + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); |
| 154 | + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); |
| 155 | + |
| 156 | + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); |
| 157 | + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); |
| 158 | + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); |
| 159 | + } |
| 160 | + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
| 161 | + } |
| 162 | + |
| 163 | + // >= 48 words. Call target independent memset. |
| 164 | + return SDValue(); |
| 165 | +} |
0 commit comments