Skip to content

Commit e25a63a

Browse files
committed
various bug fixes based on review
1 parent 07f2d71 commit e25a63a

File tree

7 files changed

+310
-126
lines changed

7 files changed

+310
-126
lines changed

src/rp2_common/pico_double/double_aeabi_dcp.S

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "pico/asm_helper.S"
88

99
#if !HAS_DOUBLE_COPROCESSOR
10-
#error attempt to compile double_aeabi_rp2350 when there is no DCP
10+
#error attempt to compile double_aeabi_dcp when there is no DCP
1111
#else
1212

1313
#include "hardware/dcp_instr.inc.S"
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
2929

3030
// ============== STATE SAVE AND RESTORE ===============
3131

32-
.macro saving_func type func
32+
.macro saving_func type func, opt_label1='-', opt_label2='-'
3333
// Note we are usually 32-bit aligned already at this point, as most of the
3434
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
3535
// We want the PCMP word-aligned.
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
4141
push {lr} // 16-bit instruction
4242
bl generic_save_state // 32-bit instruction
4343
b 1f // 16-bit instruction
44+
.ifnc \opt_label1,'-'
45+
regular_func \opt_label1
46+
.endif
47+
.ifnc \opt_label2,'-'
48+
regular_func \opt_label2
49+
.endif
4450
// This is the actual entry point:
4551
\type\()_func \func
4652
PCMP apsr_nzcv
@@ -130,28 +136,24 @@ saving_func wrapper sqrt
130136

131137
double_section dclassify
132138
saving_func regular dclassify
133-
@ with correct rounding
134139
dcp_dclassify_m apsr_nzcv,r0,r1
135140
saving_func_return
136141

137142
// ============== CONVERSION FUNCTIONS ===============
138143

139144
double_wrapper_section __aeabi_d2f
140-
regular_func double2float
141-
saving_func wrapper __aeabi_d2f
145+
saving_func wrapper __aeabi_d2f double2float
142146
@ with rounding
143147
dcp_double2float_m r0,r0,r1
144148
saving_func_return
145149

146150
double_wrapper_section __aeabi_i2d
147-
regular_func int2double
148-
saving_func wrapper __aeabi_i2d
151+
saving_func wrapper __aeabi_i2d int2double
149152
dcp_int2double_m r0,r1,r0
150153
saving_func_return
151154

152155
double_wrapper_section __aeabi_ui2d
153-
regular_func uint2double
154-
saving_func wrapper __aeabi_ui2d
156+
saving_func wrapper __aeabi_ui2d uint2double
155157
dcp_uint2double_m r0,r1,r0
156158
saving_func_return
157159

@@ -170,8 +172,7 @@ saving_func regular double2fix_z
170172
b double2int_z_entry
171173

172174
double_section double2ufix
173-
regular_func double2ufix
174-
saving_func regular double2ufix_z
175+
saving_func regular double2ufix_z double2ufix
175176
double2ufix_z_entry:
176177
ubfx r3, r1, #20, #11
177178
adds r3, r2
@@ -188,6 +189,7 @@ double2ufix_z_entry:
188189
double_section double2fix
189190
saving_func regular double2fix
190191
ubfx r3, r1, #20, #11
192+
cbz r3, 2f // 0 or denormal
191193
adds r3, r2
192194
beq 1f // very small; we don't care that we might make a denormal
193195
asrs ip, r3, #11
@@ -198,47 +200,45 @@ saving_func regular double2fix
198200
1:
199201
bfi r1, r3, #20, #11
200202
b double2int_entry
203+
2:
204+
movs r0, #0
205+
saving_func_return
206+
201207

202208
double_section double2int
203209
saving_func regular double2int
204210
double2int_entry:
205211
lsls r2, r1, #1
206212
bcc double2int_z_entry // positive is ok for int64_z
207-
orrs r3, r2, r0
208-
beq double2int_z_entry // 0 or -0 is ok for int64_z
213+
lsrs r3, r2, #21
214+
beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
209215

210216
lsrs r2, #21
211217
adds r2, #1
212218
subs r2, r2, #0x400
213219
bcc 1f // <1 means subtract 1
214-
cmp r2, #52
215-
bge double2int_z_entry // must be an integer
220+
cmp r2, #31
221+
bge double2int_z_entry // must be an integer or maxed out
216222
lsls r3, r1, #12
217-
adds r3, r3, r0, lsr #20
218-
// r3 now has highest 32 mantissa bits
219-
lsls r3, r2
220-
bne 1f // not integer as non zero fractional bits remain
221-
lsls r3, r0, #12
223+
adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
222224
lsls r3, r2
225+
orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
223226
beq double2int_z_entry // integer
224227
1:
225228
dcp_double2int_m r0,r0,r1
226229
subs r0, #1
227230
saving_func_return
228231

229232
double_wrapper_section __aeabi_d2iz
230-
regular_func double2int_z
231-
saving_func wrapper __aeabi_d2iz
233+
saving_func wrapper __aeabi_d2iz double2int_z
232234
double2int_z_entry:
233235
@ with truncation towards 0
234236
dcp_double2int_m r0,r0,r1
235237
// note: this works with either saved or not saved call as it is just a `bx lr`
236238
saving_func_return
237239

238240
double_wrapper_section __aeabi_d2uiz
239-
regular_func double2uint_z
240-
regular_func double2uint
241-
saving_func wrapper __aeabi_d2uiz
241+
saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
242242
double2uint_z_entry:
243243
@ with truncation towards 0
244244
dcp_double2uint_m r0,r0,r1
@@ -266,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
266266
saving_func_return
267267

268268
double_wrapper_section __aeabi_dcmp
269-
270269
saving_func wrapper __aeabi_cdrcmple
271270
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
272271
bvs cmp_nan

src/rp2_common/pico_double/double_aeabi_rp2040.S

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -578,35 +578,45 @@ regular_func double2uint64_z
578578
double_section double2fix64_z
579579
regular_func double2fix64_z
580580
lsls r3, r1, #1
581-
bcc double2fix64 // positive is ok for fix64
582-
push {r1, r2}
583-
lsrs r3, #21
581+
bcc double2fix64 // input positive is ok for fix64
582+
mov ip, r2
583+
asrs r2, r3, #21
584+
beq 3f // input zero or denormal, so just return zero
585+
adds r2, #1
586+
beq double2fix64 // input infinite/nan is ok for fix64
584587

585-
adds r3, r2
588+
lsrs r3, #21
589+
add r3, ip
586590
movs r2, #1
587591
negs r2, r2
588592
lsrs r2, #22
589-
subs r3, r2
593+
subs r3, r2 // r3 = modified e - 0x3ff
590594

591-
bcc 1f // <1 means subtract 1
595+
bcc 3f // modified input < 1.0 means result is zero
592596
cmp r3, #52
593-
bge double2fix64 // must be an integer
594-
595-
lsls r2, r1, #12
596-
lsrs r1, r0, #20
597-
adds r2, r1
598-
// r2 now has highest 32 mantissa bits
599-
lsls r2, r3
600-
bne 1f // not integer as non zero fractional bits remain
601-
lsls r2, r0, #12
602-
lsls r2, r3
603-
bne 1f
597+
bge 2f // modified input must be an integer or infinite
598+
599+
adds r3, #12
600+
mov r2, r1
601+
lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1
602+
bne 1f // not integer as non zero fractional bits remain
603+
subs r3, #32
604+
asrs r2, r3, #31
605+
bics r3, r3, r2
606+
movs r2, r0
607+
lsls r2, r2, r3
608+
bne 1f // remaining fractional bits are non-zero, so argument was not an integer
609+
2:
604610
// integer
605-
pop {r1, r2}
611+
mov r2, ip
606612
b double2fix64
613+
3: // result is zero
614+
movs r0, #0
615+
movs r1, #0
616+
bx lr
607617
1:
608-
pop {r1, r2}
609618
push {lr}
619+
mov r2, ip
610620
bl double2fix64
611621
movs r2, #0
612622
adds r0, #1
@@ -626,6 +636,44 @@ double_section double2fix
626636
regular_func double2fix
627637
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
628638

639+
double_section double2fix_z
640+
regular_func double2fix_z
641+
lsls r3, r1, #1
642+
asrs r3, #21
643+
beq 2f // input is zero or denormal
644+
adds r3, #1
645+
beq 3f // input is infinite or nan
646+
647+
// extract exponent again
648+
lsls r3, r1, #1
649+
lsrs r3, #21
650+
// adjust
651+
adds r3, r2
652+
ble 2f // adjusted input is zero or dedornmal or < 1
653+
lsrs r2, r3, #11
654+
bne 3f // adjusted input is > infinite
655+
656+
// put updated exponent back in double and convert to int
657+
movs r2, #1
658+
lsls r2, #11
659+
subs r2, #1
660+
lsls r2, #20
661+
bics r1, r2
662+
lsls r3, #20
663+
orrs r1, r3
664+
b double2int_z
665+
2:
666+
// result is zero
667+
movs r0, #0
668+
bx lr
669+
3:
670+
movs r0, #0
671+
subs r0, #1
672+
lsrs r0, #1
673+
asrs r1, #31
674+
eors r0, r1
675+
bx lr
676+
629677
double_section double2ufix
630678
regular_func double2ufix
631679
regular_func double2ufix_z

src/rp2_common/pico_double/double_conv_m33.S

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -251,57 +251,65 @@ regular_func ufix2double
251251

252252
double_section conv_dtoi64
253253
regular_func double2int64
254-
lsls r2, r1, #1
255-
bcc double2int64_z // positive is ok for int64_z
256-
orrs r3, r2, r0
257-
beq double2int64_z // 0 or -0 is ok for int64_z
258-
259-
lsrs r2, #21
260-
adds r2, #1
261-
subs r2, r2, #0x400
262-
bcc 1f // <1 means subtract 1
263-
cmp r2, #52
264-
bge double2int64_z // must be an integer
265-
lsls r3, r1, #12
266-
adds r3, r3, r0, lsr #20
267-
// r3 now has highest 32 mantissa bits
268-
lsls r3, r2
269-
bne 1f // not integer as non zero fractional bits remain
270-
lsls r3, r0, #12
271-
lsls r3, r2
272-
beq double2int64_z // integer
254+
lsls r3, r1, #1
255+
bcc double2int64_z // input positive is ok for int64_z
256+
cmp r3, #0xffe00000
257+
bcs double2int64_z // input is infinite
258+
lsrs r3, #21
259+
beq 2f // input zero or denormal, means answer remains zero
260+
sub r3, #0x3ff
261+
cmp r3, #0
262+
blt 1f // modified input zero or denormal, or less than 1.0
263+
cmp r3, #52
264+
bge double2int64_z // modified input must be an integer or infinite
265+
adds r3, #12
266+
lsls r2, r1, r3 // ip has remaining fractional mantissa bits of r1
267+
bne 1f // not integer as non zero fractional bits remain
268+
subs r3, #32
269+
bics r3, r3, r3, asr #31 // map negative shift to zero
270+
lsls r3, r0, r3
271+
beq double2int64_z // remaining fractional bits are 0, so argument was an integer
273272
1:
274273
push {lr}
275274
bl double2int64_z
276275
subs r0, #1
277276
sbcs r1, r1, #0
278277
pop {pc}
278+
2:
279+
movs r0, #0
280+
movs r1, #0
281+
bx lr
279282

280283
double_section conv_dtofix64
281284
regular_func double2fix64
282285
lsls r3, r1, #1
283-
bcc double2fix64_z // positive is ok for int64_z
286+
bcc double2fix64_z // input positive is ok for fix64_z
287+
cmp r3, #0xffe00000
288+
bcs double2fix64_z // input is infinite
284289
lsrs r3, #21
285-
adds r3, #1
286-
rsb ip, r2, #0x400
287-
subs r3, ip
288-
bcc 1f // <1 means subtract 1
290+
beq 2f // input zero or denormal, means answer remains zero
291+
sub r3, #0x3ff
292+
adds r3, r2
293+
blt 1f // modified input zero or denormal, or less than 1.0
289294
cmp r3, #52
290-
bge double2fix64_z // must be an integer
291-
lsls ip, r1, #12
292-
adds ip, ip, r0, lsr #20
293-
// ip now has highest 32 mantissa bits
294-
lsls ip, r3
295-
bne 1f // not integer as non zero fractional bits remain
296-
lsls ip, r0, #12
297-
lsls ip, r3
298-
beq double2fix64_z // integer
295+
bge double2fix64_z // modified input must be an integer or infinite
296+
adds r3, #12
297+
lsls ip, r1, r3 // ip has remaining fractional mantissa bits of r1
298+
bne 1f // not integer as non zero fractional bits remain
299+
subs r3, #32
300+
bics r3, r3, r3, asr #31 // map negative shift to zero
301+
lsls r3, r0, r3
302+
beq double2fix64_z // remaining fractional bits are 0, so argument was an integer
299303
1:
300304
push {lr}
301305
bl double2fix64_z
302306
subs r0, #1
303307
sbcs r1, r1, #0
304308
pop {pc}
309+
2:
310+
movs r0, #0
311+
movs r1, #0
312+
bx lr
305313

306314
double_wrapper_section conv_dtoi64_z
307315

0 commit comments

Comments
 (0)