Skip to content

Commit d886df6

Browse files
authored
Add fast single-precision add/sub/mul for Hazard3 (#1883)
* Add fast single-precision add/sub/mul for Hazard3 * Make test output less noisy. Map -nan to -inf in vector gen. Move random vectors to separate files. * Re-disable USB stdout for pico_float_test by default... * Disable pico/float.h exports on RISC-V as these functions aren't implemented * Add hazard3 instructions to asm_helper. Split hazard3.h to support this. You can still include hazard3.h to get everything. This just allows you to pull in less.
1 parent 876f331 commit d886df6

File tree

12 files changed

+2896
-164
lines changed

12 files changed

+2896
-164
lines changed

src/rp2350/pico_platform/include/pico/asm_helper.S

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@
66

77
#include "pico.h"
88

9+
#ifdef __riscv
10+
// Get macros for convenient use of Hazard3 instructions without binutils support
11+
#include "hardware/hazard3/instructions.h"
12+
#endif
13+
914
#if !PICO_ASSEMBLER_IS_CLANG
1015
#define apsr_nzcv r15
1116
#endif
12-
# note we don't do this by default in this file for backwards comaptibility with user code
13-
# that may include this file, but not use unified syntax. Note that this macro does equivalent
14-
# setup to the pico_default_asm macro for inline assembly in C code.
17+
// note we don't do this by default in this file for backwards comaptibility with user code
18+
// that may include this file, but not use unified syntax. Note that this macro does equivalent
19+
// setup to the pico_default_asm macro for inline assembly in C code.
1520
.macro pico_default_asm_setup
1621
#ifndef __riscv
1722
.syntax unified
@@ -60,28 +65,7 @@ weak_func WRAPPER_FUNC_NAME(\x)
6065
.word \func + \offset
6166
.endm
6267

63-
# backwards compatibility
68+
// backwards compatibility
6469
.macro __pre_init func, priority_string1
6570
__pre_init_with_offset func, 0, priority_string1
6671
.endm
67-
68-
#ifdef __riscv
69-
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)
70-
.macro h3.bextm rd rs1 rs2 nbits
71-
.if (\nbits < 1) || (\nbits > 8)
72-
.err
73-
.endif
74-
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
75-
.endm
76-
77-
// rd = (rs1 >> shamt) & ~(-1 << nbits)
78-
.macro h3.bextmi rd rs1 shamt nbits
79-
.if (\nbits < 1) || (\nbits > 8)
80-
.err
81-
.endif
82-
.if (\shamt < 0) || (\shamt > 31)
83-
.err
84-
.endif
85-
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
86-
.endm
87-
#endif

src/rp2_common/hardware_hazard3/include/hardware/hazard3.h

Lines changed: 8 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -4,101 +4,26 @@
44
* SPDX-License-Identifier: BSD-3-Clause
55
*/
66

7-
#ifndef _HARDWARE_HAZARD3_
8-
#define _HARDWARE_HAZARD3_
7+
#ifndef _HARDWARE_HAZARD3_H
8+
#define _HARDWARE_HAZARD3_H
99

1010
#include "pico.h"
1111
#include "hardware/riscv.h"
12+
13+
// This includes both standard and Hazard3 custom CSRs:
1214
#include "hardware/regs/rvcsr.h"
1315

16+
#include "hardware/hazard3/features.h"
17+
#include "hardware/hazard3/instructions.h"
18+
1419
/** \file hardware/hazard3.h
1520
* \defgroup hardware_hazard3 hardware_hazard3
1621
*
1722
* \brief Accessors for Hazard3-specific RISC-V CSRs, and intrinsics for Hazard3 custom instructions
1823
*
1924
*/
2025

21-
// Feature detection macros for Hazard3 custom extensions
22-
#if PICO_RP2350
23-
#define __hazard3_extension_xh3power
24-
#define __hazard3_extension_xh3bextm
25-
#define __hazard3_extension_xh3irq
26-
#define __hazard3_extension_xh3pmpm
27-
#endif
28-
29-
#ifdef __ASSEMBLER__
30-
31-
// Assembly language instruction macros for Hazard3 custom instructions
32-
33-
// h3.bextm: Extract up to 8 consecutive bits from register rs1, with the
34-
// first bit indexed by rs2, and bit count configured by an immediate value.
35-
// R-format instruction. Pseudocode:
36-
//
37-
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)
38-
39-
.macro h3.bextm rd rs1 rs2 nbits
40-
.if (\nbits < 1) || (\nbits > 8)
41-
.err
42-
.endif
43-
#ifdef __hazard3_extension_xh3bextm
44-
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
45-
#else
46-
srl \rd, \rs1, \rs2
47-
andi \rd, \rd, ((1 << \nbits) - 1)
48-
#endif
49-
.endm
50-
51-
// h3.bextmi: Extract up to 8 consecutive bits from register rs1, with the
52-
// first bit index and the number of bits both configured by immediate
53-
// values. I-format instruction. Pseudocode:
54-
//
55-
// rd = (rs1 >> shamt) & ~(-1 << nbits)
56-
57-
.macro h3.bextmi rd rs1 shamt nbits
58-
.if (\nbits < 1) || (\nbits > 8)
59-
.err
60-
.endif
61-
.if (\shamt < 0) || (\shamt > 31)
62-
.err
63-
.endif
64-
#ifdef __hazard3_extension_xh3bextm
65-
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
66-
#else
67-
srli \rd, \rs1, \shamt
68-
andi \rd, \rd, ((1 << \nbits) - 1)
69-
#endif
70-
.endm
71-
72-
// h3.block: enter an idle state until another processor in the same
73-
// multiprocessor complex executes an h3.unblock instruction, or the
74-
// processor is interrupted. Fall through immediately if an h3.unblock has
75-
// been received since the last execution of an h3.block on this processor.
76-
// On RP2350, processors also have their own h3.unblock signals reflected
77-
// back to them.
78-
79-
.macro h3.block
80-
#ifdef __hazard3_extension_xh3power
81-
slt x0, x0, x0
82-
#else
83-
nop
84-
#endif
85-
.endm
86-
87-
// h3.unblock: signal other processors in the same multiprocessor complex to
88-
// exit the idle state entered by an h3.block instruction. On RP2350, this
89-
// signal is also reflected back to the processor that executed the
90-
// h3.unblock, which will cause that processor's next h3.block to fall
91-
// through immediately.
92-
93-
.macro h3.unblock
94-
#ifdef __hazard3_extension_xh3power
95-
slt x0, x0, x1
96-
#else
97-
nop
98-
#endif
99-
.endm
100-
101-
#else // !__ASSEMBLER__
26+
#ifndef __ASSEMBLER__
10227

10328
#ifdef __cplusplus
10429
extern "C" {
@@ -128,51 +53,6 @@ extern "C" {
12853
#define hazard3_irqarray_clear(csr, index, data) static_assert(false, "Not supported: Xh3irq extension")
12954
#endif
13055

131-
132-
// nbits must be a constant expression
133-
#ifdef __hazard3_extension_xh3bextm
134-
#define __hazard3_bextm(nbits, rs1, rs2) ({\
135-
uint32_t __h3_bextm_rd; \
136-
asm (".insn r 0x0b, 0, %3, %0, %1, %2"\
137-
: "=r" (__h3_bextm_rd) \
138-
: "r" (rs1), "r" (rs2), "i" ((((nbits) - 1) & 0x7) << 1)\
139-
); \
140-
__h3_bextm_rd; \
141-
})
142-
#else
143-
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((rs2) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
144-
#endif
145-
146-
// nbits and shamt must be constant expressions
147-
#ifdef __hazard3_extension_xh3bextm
148-
#define __hazard3_bextmi(nbits, rs1, shamt) ({\
149-
uint32_t __h3_bextmi_rd; \
150-
asm (".insn i 0x0b, 0x4, %0, %1, %2"\
151-
: "=r" (__h3_bextmi_rd) \
152-
: "r" (rs1), "i" ((((nbits) - 1) & 0x7) << 6 | ((shamt) & 0x1f)) \
153-
); \
154-
__h3_bextmi_rd; \
155-
})
156-
#else
157-
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((shamt) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
158-
#endif
159-
160-
#ifdef __hazard3_extension_xh3power
161-
#define __hazard3_block() asm volatile ("slt x0, x0, x0" : : : "memory")
162-
#else
163-
#define __hazard3_block() do {} while (0)
164-
#endif
165-
166-
#ifdef __hazard3_extension_xh3power
167-
#define __hazard3_unblock() asm volatile ("slt x0, x0, x1" : : : "memory")
168-
#else
169-
#define __hazard3_unblock() do {} while (0)
17056
#endif
17157

172-
#ifdef __cplusplus
173-
}
174-
#endif
175-
176-
#endif // !__ASSEMBLER__
177-
17858
#endif
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright (c) 2024 Raspberry Pi Ltd.
3+
*
4+
* SPDX-License-Identifier: BSD-3-Clause
5+
*/
6+
7+
#ifndef _HARDWARE_HAZARD3_FEATURES_H
8+
#define _HARDWARE_HAZARD3_FEATURES_H
9+
10+
#include "pico.h"
11+
12+
/** \file hardware/hazard3/features.h
13+
* \addtogroup hardware_hazard3
14+
*
15+
* \brief Sets macros for supported Hazard3 custom extensions (features) based on PICO_PLATFORM macros
16+
*
17+
*/
18+
19+
// Feature detection macros for Hazard3 custom extensions
20+
#if PICO_RP2350
21+
// Version 1.0 of these four extensions
22+
// (encoded as major * 100 + minor)
23+
#define __hazard3_extension_xh3power 100
24+
#define __hazard3_extension_xh3bextm 100
25+
#define __hazard3_extension_xh3irq 100
26+
#define __hazard3_extension_xh3pmpm 100
27+
#endif
28+
29+
#endif
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Copyright (c) 2024 Raspberry Pi Ltd.
3+
*
4+
* SPDX-License-Identifier: BSD-3-Clause
5+
*/
6+
7+
#ifndef _HARDWARE_HAZARD3_INSTRUCTIONS_H
8+
#define _HARDWARE_HAZARD3_INSTRUCTIONS_H
9+
10+
#include "pico.h"
11+
12+
// Get list of supported extensions based on platform:
13+
#include "hardware/hazard3/features.h"
14+
15+
/** \file hardware/hazard3/instructions.h
16+
* \addtogroup hardware_hazard3
17+
*
18+
* \brief Intrinsics and asm macros for Hazard3 custom instructions
19+
*
20+
* The implementation of these intrinsics depends on the feature macros
21+
* defined in hardware/hazard3/features.h. When the relevant feature is not
22+
* present, the intrinsics fall back on an RV32I equivalent if possible.
23+
*
24+
*/
25+
26+
#ifdef __ASSEMBLER__
27+
28+
// Assembly language instruction macros for Hazard3 custom instructions
29+
30+
// h3.bextm: Extract up to 8 consecutive bits from register rs1, with the
31+
// first bit indexed by rs2, and bit count configured by an immediate value.
32+
// R-format instruction. Pseudocode:
33+
//
34+
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)
35+
36+
.macro h3.bextm rd rs1 rs2 nbits
37+
.if (\nbits < 1) || (\nbits > 8)
38+
.err
39+
.endif
40+
#ifdef __hazard3_extension_xh3bextm
41+
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
42+
#else
43+
srl \rd, \rs1, \rs2
44+
andi \rd, \rd, ((1 << \nbits) - 1)
45+
#endif
46+
.endm
47+
48+
// h3.bextmi: Extract up to 8 consecutive bits from register rs1, with the
49+
// first bit index and the number of bits both configured by immediate
50+
// values. I-format instruction. Pseudocode:
51+
//
52+
// rd = (rs1 >> shamt) & ~(-1 << nbits)
53+
54+
.macro h3.bextmi rd rs1 shamt nbits
55+
.if (\nbits < 1) || (\nbits > 8)
56+
.err
57+
.endif
58+
.if (\shamt < 0) || (\shamt > 31)
59+
.err
60+
.endif
61+
#ifdef __hazard3_extension_xh3bextm
62+
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
63+
#else
64+
srli \rd, \rs1, \shamt
65+
andi \rd, \rd, ((1 << \nbits) - 1)
66+
#endif
67+
.endm
68+
69+
// h3.block: enter an idle state until another processor in the same
70+
// multiprocessor complex executes an h3.unblock instruction, or the
71+
// processor is interrupted. Fall through immediately if an h3.unblock has
72+
// been received since the last execution of an h3.block on this processor.
73+
// On RP2350, processors also have their own h3.unblock signals reflected
74+
// back to them.
75+
76+
.macro h3.block
77+
#ifdef __hazard3_extension_xh3power
78+
slt x0, x0, x0
79+
#else
80+
nop
81+
#endif
82+
.endm
83+
84+
// h3.unblock: signal other processors in the same multiprocessor complex to
85+
// exit the idle state entered by an h3.block instruction. On RP2350, this
86+
// signal is also reflected back to the processor that executed the
87+
// h3.unblock, which will cause that processor's next h3.block to fall
88+
// through immediately.
89+
90+
.macro h3.unblock
91+
#ifdef __hazard3_extension_xh3power
92+
slt x0, x0, x1
93+
#else
94+
nop
95+
#endif
96+
.endm
97+
98+
#else // !__ASSEMBLER__
99+
100+
// C language instruction macros for Hazard3 custom instructions
101+
102+
#ifdef __cplusplus
103+
extern "C" {
104+
#endif
105+
106+
// nbits must be a constant expression
107+
#ifdef __hazard3_extension_xh3bextm
108+
#define __hazard3_bextm(nbits, rs1, rs2) ({\
109+
uint32_t __h3_bextm_rd; \
110+
asm (".insn r 0x0b, 0, %3, %0, %1, %2"\
111+
: "=r" (__h3_bextm_rd) \
112+
: "r" (rs1), "r" (rs2), "i" ((((nbits) - 1) & 0x7) << 1)\
113+
); \
114+
__h3_bextm_rd; \
115+
})
116+
#else
117+
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((rs2) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
118+
#endif
119+
120+
// nbits and shamt must be constant expressions
121+
#ifdef __hazard3_extension_xh3bextm
122+
#define __hazard3_bextmi(nbits, rs1, shamt) ({\
123+
uint32_t __h3_bextmi_rd; \
124+
asm (".insn i 0x0b, 0x4, %0, %1, %2"\
125+
: "=r" (__h3_bextmi_rd) \
126+
: "r" (rs1), "i" ((((nbits) - 1) & 0x7) << 6 | ((shamt) & 0x1f)) \
127+
); \
128+
__h3_bextmi_rd; \
129+
})
130+
#else
131+
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((shamt) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
132+
#endif
133+
134+
#ifdef __hazard3_extension_xh3power
135+
#define __hazard3_block() asm volatile ("slt x0, x0, x0" : : : "memory")
136+
#else
137+
#define __hazard3_block() do {} while (0)
138+
#endif
139+
140+
#ifdef __hazard3_extension_xh3power
141+
#define __hazard3_unblock() asm volatile ("slt x0, x0, x1" : : : "memory")
142+
#else
143+
#define __hazard3_unblock() do {} while (0)
144+
#endif
145+
146+
#ifdef __cplusplus
147+
}
148+
#endif
149+
150+
#endif // !__ASSEMBLER__
151+
152+
#endif

0 commit comments

Comments
 (0)