From bdee11d86fb414f390eb19c15d6b0d16caf453cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Thu, 7 Aug 2025 16:45:34 +0200 Subject: [PATCH 1/6] Improve clamped subtract for localloc --- src/coreclr/jit/codegenriscv64.cpp | 46 +++++++++++------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 33aa6649261d7a..cc1931effcd0b2 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1560,40 +1560,27 @@ void CodeGen::genLclHeap(GenTree* tree) // case SP is on the last byte of the guard page. Thus you must // touch SP-0 first not SP-0x1000. // - // // Note that we go through a few hoops so that SP never points to // illegal pages at any time during the tickling process. - // - // sltu RA, SP, regCnt - // sub regCnt, SP, regCnt // regCnt now holds ultimate SP - // beq RA, REG_R0, Skip - // addi regCnt, REG_R0, 0 - // - // Skip: - // lui regPageSize, eeGetPageSize()>>12 - // addi regTmp, SP, 0 - // Loop: - // lw r0, 0(regTmp) // tickle the page - read from the page - // sub regTmp, regTmp, regPageSize - // bgeu regTmp, regCnt, Loop - // - // Done: - // addi SP, regCnt, 0 - // if (tempReg == REG_NA) tempReg = internalRegisters.Extract(tree); assert(regCnt != tempReg); - emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); - - // sub regCnt, SP, regCnt // regCnt now holds ultimate SP - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); - - // Overflow, set regCnt to lowest possible value - emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2); - emit->emitIns_R_R(INS_mov, EA_PTRSIZE, regCnt, REG_R0); - + // regCnt now holds the number of bytes to localloc, after the sequence it will be set to the ultimate SP + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = max(sp, cnt); + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); // cnt = temp - cnt; + } + else + { + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); // cnt = sp - cnt; // ultimate SP + // If the subtraction above overflowed (i.e. sp < ultimateSP), set regCnt to lowest possible value (i.e. 0) + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = overflow ? 1 : 0; + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask; + emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt; + } regNumber rPageSize = internalRegisters.GetSingle(tree); noway_assert(rPageSize != tempReg); @@ -1638,8 +1625,9 @@ void CodeGen::genLclHeap(GenTree* tree) } else // stackAdjustment == 0 { - // Move the final value of SP to targetReg - emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_SPBASE); + // Move the final value of SP to targetReg (if necessary; targetReg may be same as regCnt which also holds SP) + regNumber spSrc = (regCnt == REG_NA) ? REG_SPBASE : regCnt; + emit->emitIns_Mov(EA_PTRSIZE, targetReg, spSrc, true); } BAILOUT: From eb7d246e8a14e4f665cd338175cd649f4cbd54e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Thu, 7 Aug 2025 17:02:06 +0200 Subject: [PATCH 2/6] Clamped increment for IncSaturate --- src/coreclr/jit/codegenriscv64.cpp | 9 +++++---- src/coreclr/jit/lsrariscv64.cpp | 8 ++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index cc1931effcd0b2..ee0fe60ec5880b 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1002,14 +1002,15 @@ void CodeGen::genCodeForIncSaturate(GenTree* tree) GenTree* operand = tree->gtGetOp1(); assert(!operand->isContained()); + regNumber tempReg = internalRegisters.GetSingle(tree); // The src must be a register. regNumber operandReg = genConsumeReg(operand); emitAttr attr = emitActualTypeSize(tree); + assert(EA_SIZE(attr) == EA_PTRSIZE); + assert(tempReg != operandReg); - GetEmitter()->emitIns_R_R_I(INS_addi, attr, targetReg, operandReg, 1); - // bne targetReg, zero, 2 * 4 - GetEmitter()->emitIns_R_R_I(INS_bne, attr, targetReg, REG_R0, 8); - GetEmitter()->emitIns_R_R(INS_not, attr, targetReg, targetReg); + GetEmitter()->emitIns_R_R_I(INS_sltiu, attr, tempReg, operandReg, SIZE_T_MAX); // temp = (operand < max) ? 1 : 0; + GetEmitter()->emitIns_R_R_I(INS_add, attr, targetReg, operandReg, tempReg); // target = operand + temp; genProduceReg(tree); } diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index a2a6152309f121..03a525deaebdc4 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -787,6 +787,14 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_INC_SATURATE: + assert(dstCount == 1); + srcCount = 1; + BuildUse(tree->gtGetOp1()); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; } // end switch (tree->OperGet()) if (tree->IsUnusedValue() && (dstCount != 0)) From 0d122101c3f7ad6047b6b283cde8ab262dae45c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Fri, 8 Aug 2025 11:49:15 +0200 Subject: [PATCH 3/6] Use target reg as temp for comparison result --- src/coreclr/jit/codegenriscv64.cpp | 7 +++---- src/coreclr/jit/lsrariscv64.cpp | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index ee0fe60ec5880b..7973acdb466355 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1002,15 +1002,14 @@ void CodeGen::genCodeForIncSaturate(GenTree* tree) GenTree* operand = tree->gtGetOp1(); assert(!operand->isContained()); - regNumber tempReg = internalRegisters.GetSingle(tree); // The src must be a register. regNumber operandReg = genConsumeReg(operand); emitAttr attr = emitActualTypeSize(tree); assert(EA_SIZE(attr) == EA_PTRSIZE); - assert(tempReg != operandReg); + noway_assert(targetReg != operandReg); // lifetime of the operand register should have been extended - GetEmitter()->emitIns_R_R_I(INS_sltiu, attr, tempReg, operandReg, SIZE_T_MAX); // temp = (operand < max) ? 1 : 0; - GetEmitter()->emitIns_R_R_I(INS_add, attr, targetReg, operandReg, tempReg); // target = operand + temp; + GetEmitter()->emitIns_R_R_I(INS_sltiu, attr, targetReg, operandReg, SIZE_T_MAX); // temp = (operand < max) ? 1 : 0; + GetEmitter()->emitIns_R_R_R(INS_add, attr, targetReg, operandReg, targetReg); // target = operand + temp; genProduceReg(tree); } diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 03a525deaebdc4..6e4873e60aa18a 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -790,9 +790,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_INC_SATURATE: assert(dstCount == 1); srcCount = 1; - BuildUse(tree->gtGetOp1()); - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); + setDelayFree(BuildUse(tree->gtGetOp1())); BuildDef(tree); break; } // end switch (tree->OperGet()) From 836734dfc16f74ff745875da110c7d03060f5e19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Fri, 8 Aug 2025 14:18:40 +0200 Subject: [PATCH 4/6] Fix returning stackalloc'ed address --- src/coreclr/jit/codegenriscv64.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 7973acdb466355..3d0365e920bd17 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1364,6 +1364,7 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; regNumber tempReg = REG_NA; + regNumber spSourceReg = REG_SPBASE; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; // can optimize for riscv64. @@ -1567,11 +1568,10 @@ void CodeGen::genLclHeap(GenTree* tree) tempReg = internalRegisters.Extract(tree); assert(regCnt != tempReg); - // regCnt now holds the number of bytes to localloc, after the sequence it will be set to the ultimate SP if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) { - emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = max(sp, cnt); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); // cnt = temp - cnt; + emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); } else { @@ -1581,8 +1581,9 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask; emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt; } - regNumber rPageSize = internalRegisters.GetSingle(tree); + // At this point 'regCnt' is set to the ultimate SP. + regNumber rPageSize = internalRegisters.GetSingle(tree); noway_assert(rPageSize != tempReg); emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); @@ -1599,6 +1600,7 @@ void CodeGen::genLclHeap(GenTree* tree) // we're going to assume the worst and probe. // Move the final value to SP emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); + spSourceReg = regCnt; // regCnt may be same as targetReg which gives advantage in returning the address below } ALLOC_DONE: @@ -1621,13 +1623,12 @@ void CodeGen::genLclHeap(GenTree* tree) // Return the stackalloc'ed address in result register. // TargetReg = SP + stackAdjustment. // - genInstrWithConstant(INS_addi, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tempReg); + genInstrWithConstant(INS_addi, EA_PTRSIZE, targetReg, spSourceReg, (ssize_t)stackAdjustment, tempReg); } else // stackAdjustment == 0 { - // Move the final value of SP to targetReg (if necessary; targetReg may be same as regCnt which also holds SP) - regNumber spSrc = (regCnt == REG_NA) ? REG_SPBASE : regCnt; - emit->emitIns_Mov(EA_PTRSIZE, targetReg, spSrc, true); + // Move the final value of SP to targetReg + emit->emitIns_Mov(EA_PTRSIZE, targetReg, spSourceReg, true); } BAILOUT: From d7adaf89ce5887d67e36750bcc7392e6d726d366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Fri, 8 Aug 2025 14:54:04 +0200 Subject: [PATCH 5/6] Fix instruction order, remove pointless diffs --- src/coreclr/jit/codegenriscv64.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 3d0365e920bd17..a7f4ed011f9510 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1571,17 +1571,19 @@ void CodeGen::genLclHeap(GenTree* tree) if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) { emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); // regCnt now holds ultimate SP } else { - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); // cnt = sp - cnt; // ultimate SP - // If the subtraction above overflowed (i.e. sp < ultimateSP), set regCnt to lowest possible value (i.e. 0) emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = overflow ? 1 : 0; - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask; - emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt; + + // sub regCnt, SP, regCnt // regCnt now holds ultimate SP + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + + // If overflow, set regCnt to lowest possible value + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask; + emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt; } - // At this point 'regCnt' is set to the ultimate SP. regNumber rPageSize = internalRegisters.GetSingle(tree); noway_assert(rPageSize != tempReg); From b1572f8f2118702ca0bad2fa95d58beb6bd5f748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Sowi=C5=84ski?= Date: Mon, 11 Aug 2025 10:05:27 +0200 Subject: [PATCH 6/6] Revert adding to sp in stackAdjustment case --- src/coreclr/jit/codegenriscv64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index a7f4ed011f9510..1bfa89dc1ff68f 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1625,7 +1625,7 @@ void CodeGen::genLclHeap(GenTree* tree) // Return the stackalloc'ed address in result register. // TargetReg = SP + stackAdjustment. // - genInstrWithConstant(INS_addi, EA_PTRSIZE, targetReg, spSourceReg, (ssize_t)stackAdjustment, tempReg); + genInstrWithConstant(INS_addi, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tempReg); } else // stackAdjustment == 0 {