Skip to content

Commit 73b788f

Browse files
authored
Speed up div100(uint16_t) - using libdivide is faster than compiler generated code! (speeduino#1364)
1 parent c7f911f commit 73b788f

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

speeduino/maths.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
// libdivide generator functions (E.g. libdivide_s32_gen)
1111
// 32-bit constants generated here: https://godbolt.org/z/vP8Kfejo9
1212
#include "src/libdivide/libdivide.h"
13-
#include "src/libdivide/constant_fast_div.h"
1413
#endif
1514

1615
uint8_t random1to100(void);
@@ -124,7 +123,12 @@ static inline uint16_t div100(uint16_t n) {
124123
// As of avr-gcc 5.4.0, the compiler will optimize this to a multiply/shift
125124
// (unlike the signed integer overload, where __divmodhi4 is still called
126125
// see https://godbolt.org/z/c5bs5noT1)
126+
#ifdef USE_LIBDIVIDE
127+
constexpr libdivide::libdivide_u16_t libdiv_u16_100 = { .magic = 18351, .more = 70 };
128+
return libdivide::libdivide_u16_do_raw(n + DIV_ROUND_CORRECT(UINT16_C(100), uint16_t), libdiv_u16_100.magic, libdiv_u16_100.more);
129+
#else
127130
return UDIV_ROUND_CLOSEST(n, UINT16_C(100), uint16_t);
131+
#endif
128132
}
129133

130134
static inline int16_t div100(int16_t n) {
@@ -135,7 +139,8 @@ static inline int16_t div100(int16_t n) {
135139
}
136140
// Negative values here, so adjust pre-division to get same
137141
// behavior as roundf(float)
138-
return libdivide::libdivide_s16_do_raw(n - DIV_ROUND_CORRECT(UINT16_C(100), uint16_t), S16_MAGIC(100), S16_MORE(100));
142+
constexpr libdivide::libdivide_s16_t libdiv_s16_100 = { .magic = 20972, .more = 5 };
143+
return libdivide::libdivide_s16_do_raw(n - DIV_ROUND_CORRECT(UINT16_C(100), uint16_t), libdiv_s16_100.magic, libdiv_s16_100.more);
139144
#else
140145
return DIV_ROUND_CLOSEST(n, UINT16_C(100), int16_t);
141146
#endif
@@ -146,7 +151,8 @@ static inline uint32_t div100(uint32_t n) {
146151
if (n<=(uint32_t)UINT16_MAX) {
147152
return div100((uint16_t)n);
148153
}
149-
return libdivide::libdivide_u32_do_raw(n + DIV_ROUND_CORRECT(UINT32_C(100), uint32_t), 2748779070L, 6);
154+
constexpr libdivide::libdivide_u32_t libdiv_u32_100 = { .magic = 2748779070, .more = 6 };
155+
return libdivide::libdivide_u32_do_raw(n + DIV_ROUND_CORRECT(UINT32_C(100), uint32_t), libdiv_u32_100.magic, libdiv_u32_100.more);
150156
#else
151157
return UDIV_ROUND_CLOSEST(n, UINT32_C(100), uint32_t);
152158
#endif
@@ -157,7 +163,8 @@ static inline int32_t div100(int32_t n) {
157163
if (n<=INT16_MAX && n>=INT16_MIN) {
158164
return div100((int16_t)n);
159165
}
160-
return libdivide::libdivide_s32_do_raw(n + (DIV_ROUND_CORRECT(UINT16_C(100), uint32_t) * (n<0 ? -1 : 1)), 1374389535L, 5);
166+
constexpr libdivide::libdivide_s32_t libdiv_s32_100 = { .magic = 1374389535, .more = 5 };
167+
return libdivide::libdivide_s32_do_raw(n + (DIV_ROUND_CORRECT(UINT16_C(100), uint32_t) * (n<0 ? -1 : 1)), libdiv_s32_100.magic, libdiv_s32_100.more);
161168
#else
162169
return DIV_ROUND_CLOSEST(n, INT32_C(100), int32_t);
163170
#endif
@@ -172,7 +179,8 @@ static inline int32_t div100(int32_t n) {
172179
*/
173180
static inline uint32_t div360(uint32_t n) {
174181
#ifdef USE_LIBDIVIDE
175-
return libdivide::libdivide_u32_do_raw(n + DIV_ROUND_CORRECT(UINT32_C(360), uint32_t), 1813430637L, 72);
182+
constexpr libdivide::libdivide_u32_t libdiv_u32_360 = { .magic = 1813430637, .more = 72 };
183+
return libdivide::libdivide_u32_do_raw(n + DIV_ROUND_CORRECT(UINT32_C(360), uint32_t), libdiv_u32_360.magic, libdiv_u32_360.more);
176184
#else
177185
return (uint32_t)UDIV_ROUND_CLOSEST(n, UINT32_C(360), uint32_t);
178186
#endif
@@ -267,7 +275,8 @@ static inline uint32_t percentage(uint16_t percent, uint32_t value)
267275
static inline uint16_t halfPercentage(uint8_t percent, uint16_t value) {
268276
uint32_t x200 = (uint32_t)percent * (uint32_t)value;
269277
#ifdef USE_LIBDIVIDE
270-
return (uint16_t)libdivide::libdivide_u32_do_raw(x200 + DIV_ROUND_CORRECT(UINT32_C(200), uint32_t), 2748779070L, 7);
278+
constexpr libdivide::libdivide_u32_t libdiv_u32_200 = { .magic = 2748779070, .more = 7 };
279+
return (uint16_t)libdivide::libdivide_u32_do_raw(x200 + DIV_ROUND_CORRECT(UINT32_C(200), uint32_t), libdiv_u32_200.magic, libdiv_u32_200.more);
271280
#else
272281
return (uint16_t)UDIV_ROUND_CLOSEST(x200, UINT16_C(200), uint32_t);
273282
#endif

0 commit comments

Comments
 (0)