Skip to content

Commit 69f6793

Browse files
committed
Improve software multiply/divide
This commit: * Add early exit optimizations for trivial cases (0, 1 operands) * Implement power-of-2 division optimization using bit shifts * Enhance division by zero handling with consistent error values * Add proper signed overflow handling for edge cases
1 parent 29757b6 commit 69f6793

File tree

2 files changed

+187
-29
lines changed

2 files changed

+187
-29
lines changed

arch/riscv/muldiv.c

Lines changed: 186 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,48 @@
11
/* software implementation of 64-bit multiply/divide */
22

33
#include <types.h>
4+
#include "private/utils.h"
45

6+
/* 32-bit multiplication with overflow detection */
57
uint32_t __mulsi3(uint32_t a, uint32_t b)
68
{
9+
/* Early exit for common cases */
10+
if (unlikely(a == 0 || b == 0))
11+
return 0;
12+
if (unlikely(a == 1))
13+
return b;
14+
if (unlikely(b == 1))
15+
return a;
16+
717
uint32_t result = 0;
8-
while (b) {
9-
if (b & 1)
10-
result += a;
11-
a <<= 1;
12-
b >>= 1;
18+
19+
/* Use the smaller operand as the multiplier for efficiency */
20+
if (a > b) {
21+
uint32_t temp = a;
22+
a = b;
23+
b = temp;
24+
}
25+
26+
while (a) {
27+
if (a & 1)
28+
result += b;
29+
b <<= 1;
30+
a >>= 1;
1331
}
1432
return result;
1533
}
1634

35+
/* 32x32 -> 64-bit multiplication */
1736
uint64_t __muldsi3(uint32_t a, uint32_t b)
1837
{
38+
/* Early exit optimizations */
39+
if (unlikely(a == 0 || b == 0))
40+
return 0;
41+
if (unlikely(a == 1))
42+
return b;
43+
if (unlikely(b == 1))
44+
return a;
45+
1946
uint64_t result = 0;
2047
uint64_t aa = a;
2148

@@ -29,29 +56,67 @@ uint64_t __muldsi3(uint32_t a, uint32_t b)
2956
return result;
3057
}
3158

59+
/* 64x64 -> 64-bit multiplication using Karatsuba-like decomposition */
3260
uint64_t __muldi3(uint64_t a, uint64_t b)
3361
{
62+
/* Early exit for common cases */
63+
if (unlikely(a == 0 || b == 0))
64+
return 0;
65+
if (unlikely(a == 1))
66+
return b;
67+
if (unlikely(b == 1))
68+
return a;
69+
70+
/* Split into 32-bit components */
3471
uint32_t al = (uint32_t) a, ah = (uint32_t) (a >> 32);
3572
uint32_t bl = (uint32_t) b, bh = (uint32_t) (b >> 32);
3673

74+
/* Compute partial products */
3775
uint64_t low = __muldsi3(al, bl);
3876
uint64_t mid = __muldsi3(al, bh) + __muldsi3(ah, bl);
3977

78+
/* Combine results (only lower 64 bits matter) */
4079
return low + (mid << 32);
4180
}
4281

82+
/* Common division helper with comprehensive error handling */
4383
uint32_t __udivmodsi4(uint32_t num, uint32_t den, int mod)
4484
{
45-
if (den == 0)
46-
return 0;
85+
/* Handle division by zero */
86+
if (unlikely(den == 0)) {
87+
/* Return maximum value for quotient, 0 for remainder */
88+
return mod ? 0 : UINT32_MAX;
89+
}
90+
91+
/* Handle trivial cases for efficiency */
92+
if (unlikely(num < den))
93+
return mod ? num : 0;
94+
if (unlikely(num == den))
95+
return mod ? 0 : 1;
96+
if (unlikely(den == 1))
97+
return mod ? 0 : num;
98+
99+
/* Check for power-of-2 divisor optimization */
100+
if ((den & (den - 1)) == 0) {
101+
/* den is a power of 2 */
102+
int shift = 0;
103+
uint32_t temp = den;
104+
while (temp > 1) {
105+
temp >>= 1;
106+
shift++;
107+
}
108+
return mod ? (num & (den - 1)) : (num >> shift);
109+
}
47110

48111
uint32_t quot = 0, qbit = 1;
49112

113+
/* Normalize divisor to avoid overflow */
50114
while ((int32_t) den >= 0) {
51115
den <<= 1;
52116
qbit <<= 1;
53117
}
54118

119+
/* Long division algorithm */
55120
while (qbit) {
56121
if (num >= den) {
57122
num -= den;
@@ -64,79 +129,152 @@ uint32_t __udivmodsi4(uint32_t num, uint32_t den, int mod)
64129
return mod ? num : quot;
65130
}
66131

132+
/* Signed division with proper handling of edge cases */
67133
int32_t __divmodsi4(int32_t num, int32_t den, int mod)
68134
{
135+
/* Handle division by zero */
136+
if (unlikely(den == 0))
137+
return mod ? 0 : (num < 0 ? INT32_MIN : INT32_MAX);
138+
139+
/* Handle overflow case: INT32_MIN / -1 */
140+
if (unlikely(num == INT32_MIN && den == -1)) {
141+
return mod ? 0
142+
: INT32_MIN; /* Undefined behavior in C, but consistent */
143+
}
144+
145+
/* Determine result sign */
69146
int neg = (num < 0) ^ (den < 0);
147+
int num_neg = (num < 0);
70148

71-
uint32_t unum = (num < 0) ? -num : num;
72-
uint32_t uden = (den < 0) ? -den : den;
149+
/* Convert to unsigned for division */
150+
uint32_t unum = (num < 0) ? -(uint32_t) num : (uint32_t) num;
151+
uint32_t uden = (den < 0) ? -(uint32_t) den : (uint32_t) den;
73152
uint32_t res = __udivmodsi4(unum, uden, mod);
74153

75-
return neg ? -res : res;
154+
/* Apply sign correction */
155+
if (mod) {
156+
/* Remainder has the same sign as dividend */
157+
return num_neg ? -(int32_t) res : (int32_t) res;
158+
} else {
159+
/* Quotient sign determined by operand signs */
160+
return neg ? -(int32_t) res : (int32_t) res;
161+
}
76162
}
77163

164+
/* public division/modulo interfaces */
78165
uint32_t __udivsi3(uint32_t num, uint32_t den)
79166
{
80167
return __udivmodsi4(num, den, 0);
81168
}
169+
82170
uint32_t __umodsi3(uint32_t num, uint32_t den)
83171
{
84172
return __udivmodsi4(num, den, 1);
85173
}
174+
86175
int32_t __divsi3(int32_t num, int32_t den)
87176
{
88177
return __divmodsi4(num, den, 0);
89178
}
179+
90180
int32_t __modsi3(int32_t num, int32_t den)
91181
{
92182
return __divmodsi4(num, den, 1);
93183
}
94184

185+
/* 64-bit left shift with bounds checking */
95186
uint64_t __ashldi3(uint64_t val, int cnt)
96187
{
97-
if (cnt >= 64)
98-
return 0;
99-
if (cnt == 0)
188+
/* Handle edge cases */
189+
if (unlikely(cnt <= 0))
100190
return val;
191+
if (unlikely(cnt >= 64))
192+
return 0;
193+
101194
if (cnt < 32)
102-
return (val << cnt);
103-
return ((uint64_t) (uint32_t) val << (cnt - 32)) << 32;
195+
return val << cnt;
196+
/* Shift by 32 or more - high word becomes shifted low word */
197+
return ((uint64_t) (uint32_t) val) << (cnt - 32) << 32;
104198
}
105199

200+
/* 64-bit arithmetic right shift with sign extension */
106201
uint64_t __ashrdi3(uint64_t val, int cnt)
107202
{
108-
if (cnt >= 64)
109-
cnt = 63;
110-
if (cnt == 0)
203+
/* Handle edge cases */
204+
if (unlikely(cnt <= 0))
111205
return val;
206+
if (unlikely(cnt >= 64)) {
207+
/* Fill with sign bit */
208+
return ((int64_t) val < 0) ? UINT64_MAX : 0;
209+
}
210+
211+
/* Perform arithmetic shift */
112212
if (cnt < 32)
113213
return ((int64_t) val) >> cnt;
114-
return ((int64_t) (val >> 32)) >> (cnt - 32);
214+
/* Shift by 32 or more */
215+
int32_t high = (int32_t) (val >> 32);
216+
return ((int64_t) high) >> (cnt - 32);
115217
}
116218

219+
/* 64-bit logical right shift */
117220
uint64_t __lshrdi3(uint64_t val, int cnt)
118221
{
119-
if (cnt >= 64)
120-
return 0;
121-
if (cnt == 0)
222+
/* Handle edge cases */
223+
if (unlikely(cnt <= 0))
122224
return val;
225+
if (unlikely(cnt >= 64))
226+
return 0;
227+
123228
if (cnt < 32)
124229
return val >> cnt;
230+
/* Shift by 32 or more */
125231
return (val >> 32) >> (cnt - 32);
126232
}
127233

234+
/* 64-bit unsigned division with remainder - enhanced version */
128235
uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem)
129236
{
130-
if (den == 0)
237+
/* Handle division by zero */
238+
if (unlikely(den == 0)) {
239+
if (rem)
240+
*rem = 0;
241+
return UINT64_MAX;
242+
}
243+
244+
/* Handle trivial cases */
245+
if (unlikely(num < den)) {
246+
if (rem)
247+
*rem = num;
131248
return 0;
249+
}
250+
if (unlikely(num == den)) {
251+
if (rem)
252+
*rem = 0;
253+
return 1;
254+
}
255+
if (unlikely(den == 1)) {
256+
if (rem)
257+
*rem = 0;
258+
return num;
259+
}
260+
261+
/* Check for 32-bit divisors for optimization */
262+
if (den <= UINT32_MAX && num <= UINT32_MAX) {
263+
uint32_t q = __udivmodsi4((uint32_t) num, (uint32_t) den, 0);
264+
if (rem)
265+
*rem = (uint32_t) num - q * (uint32_t) den;
266+
return q;
267+
}
132268

133269
uint64_t quot = 0, qbit = 1;
134270

271+
/* Normalize divisor */
135272
while ((int64_t) den >= 0) {
136273
den <<= 1;
137274
qbit <<= 1;
138275
}
139276

277+
/* Long division */
140278
while (qbit) {
141279
if (num >= den) {
142280
num -= den;
@@ -152,20 +290,40 @@ uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem)
152290
return quot;
153291
}
154292

293+
/* 64-bit signed division with remainder */
155294
int64_t __divmoddi4(int64_t num, int64_t den, int64_t *rem)
156295
{
296+
/* Handle division by zero */
297+
if (unlikely(den == 0)) {
298+
if (rem)
299+
*rem = 0;
300+
return (num < 0) ? INT64_MIN : INT64_MAX;
301+
}
302+
303+
/* Handle overflow case */
304+
if (unlikely(num == INT64_MIN && den == -1)) {
305+
if (rem)
306+
*rem = 0;
307+
return INT64_MIN;
308+
}
309+
310+
/* Determine signs */
157311
int neg = (num < 0) ^ (den < 0);
312+
int num_neg = (num < 0);
158313

159-
uint64_t unum = (num < 0) ? -num : num;
160-
uint64_t uden = (den < 0) ? -den : den;
161-
uint64_t res = __udivmoddi4(unum, uden, (uint64_t *) rem);
314+
/* Convert to unsigned */
315+
uint64_t unum = (num < 0) ? -(uint64_t) num : (uint64_t) num;
316+
uint64_t uden = (den < 0) ? -(uint64_t) den : (uint64_t) den;
317+
uint64_t ures = __udivmoddi4(unum, uden, (uint64_t *) rem);
162318

163-
if (rem && num < 0)
319+
/* Apply sign corrections */
320+
if (rem && num_neg)
164321
*rem = -(*rem);
165322

166-
return neg ? -res : res;
323+
return neg ? -(int64_t) ures : (int64_t) ures;
167324
}
168325

326+
/* Public 64-bit division/modulo interfaces */
169327
uint64_t __umoddi3(uint64_t num, uint64_t den)
170328
{
171329
uint64_t rem = 0;

arch/riscv/types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ typedef long long int64_t;
3636

3737
#ifndef INT64_MAX
3838
#define INT64_MAX 9223372036854775807LL
39-
#define INT64_MIN (-9223372036854775808LL)
39+
#define INT64_MIN (-9223372036854775807LL - 1)
4040
#define UINT64_MAX 18446744073709551615ULL
4141
#endif
4242

0 commit comments

Comments
 (0)