Skip to content

Commit 7e14334

Browse files
authored
cmov: add riscv32 optimised mask generation (#1396)
Currently, arm assembly is used to optimise for performance and have stronger guarantees that compiler optimisations will not insert branching during mask generation. This commit introduces the same for the `riscv32` architecture. I managed to run successfully some of `cmov`'s integration tests (the test in `regression` and the 94 tests of `core_impls`) on a real target: a board with an `esp32c6` chip.
1 parent 953407a commit 7e14334

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

cmov/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ on the following CPU architectures:
3636
- [x] `x86_64` (`CMOVZ`, `CMOVNZ`)
3737
- [x] `arm` (mask generation only)
3838
- [x] `aarch64` (`CSEL`)
39+
- [x] `riscv32` (mask generation only)
3940

4041
On other target architectures, a "best effort" portable fallback implementation
4142
based on bitwise arithmetic is used instead, augmented with tactical usage of

cmov/src/backends/soft.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ fn maskne64(x: u64, y: u64) -> u64 {
122122
}
123123

124124
/// Return a `u32::MAX` mask if `condition` is non-zero, otherwise return zero for a zero input.
125-
#[cfg(not(target_arch = "arm"))]
125+
#[cfg(not(any(target_arch = "arm", target_arch = "riscv32")))]
126126
fn masknz32(condition: u32) -> u32 {
127127
masknz!(condition: u32)
128128
}
129129

130130
/// Return a `u64::MAX` mask if `condition` is non-zero, otherwise return zero for a zero input.
131-
#[cfg(not(target_arch = "arm"))]
131+
#[cfg(not(any(target_arch = "arm", target_arch = "riscv32")))]
132132
fn masknz64(condition: u64) -> u64 {
133133
masknz!(condition: u64)
134134
}
@@ -153,8 +153,24 @@ fn masknz32(condition: u32) -> u32 {
153153
mask
154154
}
155155

156+
/// Optimized mask generation for riscv32 targets.
157+
#[cfg(target_arch = "riscv32")]
158+
fn masknz32(condition: u32) -> u32 {
159+
let mut mask: u32;
160+
unsafe {
161+
core::arch::asm!(
162+
"seqz {0}, {1}", // Set-if-not-zero pseudo-instruction
163+
"addi {0}, {0}, -1", // Subtract 1, to have either full ones or full zeroes mask
164+
lateout(reg) mask,
165+
in(reg) condition,
166+
options(nostack, nomem),
167+
);
168+
}
169+
mask
170+
}
171+
156172
/// 64-bit wrapper for targets that implement 32-bit mask generation in assembly.
157-
#[cfg(target_arch = "arm")]
173+
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
158174
fn masknz64(condition: u64) -> u64 {
159175
let lo = masknz32((condition & 0xFFFF_FFFF) as u32);
160176
let hi = masknz32((condition >> 32) as u32);

0 commit comments

Comments
 (0)