Skip to content

[RISC-V] Improve clamped subtract & increment #118530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 21 additions & 30 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,11 +1005,11 @@ void CodeGen::genCodeForIncSaturate(GenTree* tree)
// The src must be a register.
regNumber operandReg = genConsumeReg(operand);
emitAttr attr = emitActualTypeSize(tree);
assert(EA_SIZE(attr) == EA_PTRSIZE);
noway_assert(targetReg != operandReg); // lifetime of the operand register should have been extended

GetEmitter()->emitIns_R_R_I(INS_addi, attr, targetReg, operandReg, 1);
// bne targetReg, zero, 2 * 4
GetEmitter()->emitIns_R_R_I(INS_bne, attr, targetReg, REG_R0, 8);
GetEmitter()->emitIns_R_R(INS_not, attr, targetReg, targetReg);
GetEmitter()->emitIns_R_R_I(INS_sltiu, attr, targetReg, operandReg, SIZE_T_MAX); // temp = (operand < max) ? 1 : 0;
GetEmitter()->emitIns_R_R_R(INS_add, attr, targetReg, operandReg, targetReg); // target = operand + temp;

genProduceReg(tree);
}
Expand Down Expand Up @@ -1364,6 +1364,7 @@ void CodeGen::genLclHeap(GenTree* tree)
regNumber targetReg = tree->GetRegNum();
regNumber regCnt = REG_NA;
regNumber tempReg = REG_NA;
regNumber spSourceReg = REG_SPBASE;
var_types type = genActualType(size->gtType);
emitAttr easz = emitTypeSize(type);
BasicBlock* endLabel = nullptr; // can optimize for riscv64.
Expand Down Expand Up @@ -1560,42 +1561,31 @@ void CodeGen::genLclHeap(GenTree* tree)
// case SP is on the last byte of the guard page. Thus you must
// touch SP-0 first not SP-0x1000.
//
//
// Note that we go through a few hoops so that SP never points to
// illegal pages at any time during the tickling process.
//
// sltu RA, SP, regCnt
// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
// beq RA, REG_R0, Skip
// addi regCnt, REG_R0, 0
//
// Skip:
// lui regPageSize, eeGetPageSize()>>12
// addi regTmp, SP, 0
// Loop:
// lw r0, 0(regTmp) // tickle the page - read from the page
// sub regTmp, regTmp, regPageSize
// bgeu regTmp, regCnt, Loop
//
// Done:
// addi SP, regCnt, 0
//

if (tempReg == REG_NA)
tempReg = internalRegisters.Extract(tree);

assert(regCnt != tempReg);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb))
{
emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); // regCnt now holds ultimate SP
}
else
{
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = overflow ? 1 : 0;

// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);

// Overflow, set regCnt to lowest possible value
emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2);
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, regCnt, REG_R0);
// If overflow, set regCnt to lowest possible value
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask;
emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt;
}

regNumber rPageSize = internalRegisters.GetSingle(tree);

noway_assert(rPageSize != tempReg);

emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
Expand All @@ -1612,6 +1602,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// we're going to assume the worst and probe.
// Move the final value to SP
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
spSourceReg = regCnt; // regCnt may be same as targetReg which gives advantage in returning the address below
}

ALLOC_DONE:
Expand Down Expand Up @@ -1639,7 +1630,7 @@ void CodeGen::genLclHeap(GenTree* tree)
else // stackAdjustment == 0
{
// Move the final value of SP to targetReg
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_SPBASE);
emit->emitIns_Mov(EA_PTRSIZE, targetReg, spSourceReg, true);
}

BAILOUT:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsrariscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,12 @@ int LinearScan::BuildNode(GenTree* tree)
BuildDef(tree);
break;

case GT_INC_SATURATE:
assert(dstCount == 1);
srcCount = 1;
setDelayFree(BuildUse(tree->gtGetOp1()));
BuildDef(tree);
break;
} // end switch (tree->OperGet())

if (tree->IsUnusedValue() && (dstCount != 0))
Expand Down
Loading