Skip to content

Commit c48994b

Browse files
dtahtkuba-moo
authored andcommitted
sch_cake: constify inverse square root cache
sch_cake uses a cache of the first 16 values of the inverse square root calculation for the Cobalt AQM to save some cycles on the fast path. This cache is populated when the qdisc is first loaded, but there's really no reason why it can't just be pre-populated. So change it to be pre-populated with constants, which also makes it possible to constify it. This gives a modest space saving for the module (not counting debug data): .text: -224 bytes .rodata: +80 bytes .bss: -64 bytes Total: -192 bytes Signed-off-by: Dave Taht <[email protected]> [ fixed up comment, rewrote commit message ] Signed-off-by: Toke Høiland-Jørgensen <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 3f464b1 commit c48994b

File tree

1 file changed

+18
-35
lines changed

1 file changed

+18
-35
lines changed

net/sched/sch_cake.c

Lines changed: 18 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,24 @@ static const u8 besteffort[] = {
361361
static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
362362
static const u8 bulk_order[] = {1, 0, 2, 3};
363363

364+
/* There is a big difference in timing between the accurate values placed in the
365+
* cache and the approximations given by a single Newton step for small count
366+
* values, particularly when stepping from count 1 to 2 or vice versa. Hence,
367+
* these values are calculated using eight Newton steps, using the
368+
* implementation below. Above 16, a single Newton step gives sufficient
369+
* accuracy in either direction, given the precision stored.
370+
*
371+
* The magnitude of the error when stepping up to count 2 is such as to give the
372+
* value that *should* have been produced at count 4.
373+
*/
374+
364375
#define REC_INV_SQRT_CACHE (16)
365-
static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
376+
static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
377+
~0, ~0, 3037000500, 2479700525,
378+
2147483647, 1920767767, 1753413056, 1623345051,
379+
1518500250, 1431655765, 1358187914, 1294981364,
380+
1239850263, 1191209601, 1147878294, 1108955788
381+
};
366382

367383
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
368384
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
@@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
388404
static void cobalt_invsqrt(struct cobalt_vars *vars)
389405
{
390406
if (vars->count < REC_INV_SQRT_CACHE)
391-
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
407+
vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
392408
else
393409
cobalt_newton_step(vars);
394410
}
395411

396-
/* There is a big difference in timing between the accurate values placed in
397-
* the cache and the approximations given by a single Newton step for small
398-
* count values, particularly when stepping from count 1 to 2 or vice versa.
399-
* Above 16, a single Newton step gives sufficient accuracy in either
400-
* direction, given the precision stored.
401-
*
402-
* The magnitude of the error when stepping up to count 2 is such as to give
403-
* the value that *should* have been produced at count 4.
404-
*/
405-
406-
static void cobalt_cache_init(void)
407-
{
408-
struct cobalt_vars v;
409-
410-
memset(&v, 0, sizeof(v));
411-
v.rec_inv_sqrt = ~0U;
412-
cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
413-
414-
for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
415-
cobalt_newton_step(&v);
416-
cobalt_newton_step(&v);
417-
cobalt_newton_step(&v);
418-
cobalt_newton_step(&v);
419-
420-
cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
421-
}
422-
}
423-
424412
static void cobalt_vars_init(struct cobalt_vars *vars)
425413
{
426414
memset(vars, 0, sizeof(*vars));
427-
428-
if (!cobalt_rec_inv_sqrt_cache[0]) {
429-
cobalt_cache_init();
430-
cobalt_rec_inv_sqrt_cache[0] = ~0;
431-
}
432415
}
433416

434417
/* CoDel control_law is t + interval/sqrt(count)

0 commit comments

Comments
 (0)