Skip to content

Commit 626f8ea

Browse files
committed
Cache span buffer to eliminate per-frame malloc
This implements span buffer cache in twin_screen_t to eliminate malloc and free on every screen update. Previous code allocated and freed the span buffer for each frame, creating unnecessary allocation overhead in the render loop. Previous behavior: - malloc() span buffer at start of twin_screen_update() - free() span buffer at end of function - Repeated every frame, creating allocation churn New behavior: - Allocate once, cache in screen->span_cache - Reuse buffer if current width fits - Realloc only when larger width needed - Free cache only on screen destruction
1 parent 6c49b0d commit 626f8ea

File tree

3 files changed

+247
-8
lines changed

3 files changed

+247
-8
lines changed

include/twin.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,10 @@ struct _twin_screen {
327327
/* Window manager */
328328
twin_coord_t button_x, button_y; /**< Window button position */
329329

330+
/* Span buffer cache for screen updates */
331+
twin_argb32_t *span_cache; /**< Cached span buffer */
332+
twin_coord_t span_cache_width; /**< Cached span buffer width */
333+
330334
/* Event processing: event filter callback */
331335
bool (*event_filter)(twin_screen_t *screen, twin_event_t *event);
332336
};

src/screen.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,16 @@ twin_screen_t *twin_screen_create(twin_coord_t width,
4141
screen->closure = closure;
4242

4343
screen->button_x = screen->button_y = -1;
44+
screen->span_cache = NULL;
45+
screen->span_cache_width = 0;
4446
return screen;
4547
}
4648

4749
void twin_screen_destroy(twin_screen_t *screen)
4850
{
4951
while (screen->bottom)
5052
twin_pixmap_hide(screen->bottom);
53+
free(screen->span_cache); /* Free span buffer cache */
5154
free(screen);
5255
}
5356

@@ -185,10 +188,20 @@ void twin_screen_update(twin_screen_t *screen)
185188

186189
screen->damage.left = screen->damage.right = 0;
187190
screen->damage.top = screen->damage.bottom = 0;
188-
/* FIXME: what is the maximum number of lines? */
189-
span = malloc(width * sizeof(twin_argb32_t));
190-
if (!span)
191-
return;
191+
192+
/* Reuse cached span buffer if large enough */
193+
if (screen->span_cache && screen->span_cache_width >= width) {
194+
span = screen->span_cache;
195+
} else {
196+
/* Need larger cache - reallocate */
197+
twin_argb32_t *new_cache =
198+
realloc(screen->span_cache, width * sizeof(twin_argb32_t));
199+
if (!new_cache)
200+
return;
201+
screen->span_cache = new_cache;
202+
screen->span_cache_width = width;
203+
span = new_cache;
204+
}
192205

193206
if (screen->put_begin)
194207
(*screen->put_begin)(left, top, right, bottom, screen->closure);
@@ -230,7 +243,7 @@ void twin_screen_update(twin_screen_t *screen)
230243

231244
(*screen->put_span)(left, y, right, span, screen->closure);
232245
}
233-
free(span);
246+
/* Span buffer is now cached - don't free */
234247
}
235248
}
236249

tools/perf.c

Lines changed: 225 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <stdint.h>
1111
#include <stdio.h>
1212
#include <string.h>
13+
#include <sys/resource.h>
1314
#include <sys/time.h>
1415
#include <unistd.h>
1516

@@ -26,6 +27,9 @@
2627
#define MAX_REPS_LARGE 20000 /* For operations >= 100x100 */
2728
#define MAX_REPS_MEDIUM 200000 /* For operations >= 10x10 */
2829

30+
/* Memory profiling iterations */
31+
#define MEM_TEST_ITERATIONS 10000
32+
2933
static twin_pixmap_t *src32, *dst32, *mask8;
3034
static int test_width, test_height;
3135

@@ -287,12 +291,219 @@ static void run_large_tests(void)
287291
run_test_series("500x500 solid over", test_solid_over_argb32, 500, 500);
288292
}
289293

294+
/* Memory profiling mode */
295+
296+
/* Get memory usage statistics */
297+
static void get_memory_usage(long *rss_kb, long *max_rss_kb)
298+
{
299+
struct rusage usage;
300+
getrusage(RUSAGE_SELF, &usage);
301+
#ifdef __APPLE__
302+
*max_rss_kb = usage.ru_maxrss / 1024; /* macOS reports in bytes */
303+
#else
304+
*max_rss_kb = usage.ru_maxrss; /* Linux reports in KB */
305+
#endif
306+
*rss_kb = *max_rss_kb; /* Current RSS approximation */
307+
}
308+
309+
/* Print memory test statistics */
310+
static void print_memory_stats(const char *test_name,
311+
int iterations,
312+
uint64_t elapsed_us,
313+
long start_rss,
314+
long end_rss,
315+
long peak_rss)
316+
{
317+
double ops_per_sec =
318+
(double) iterations / ((double) elapsed_us / 1000000.0);
319+
double us_per_op = (double) elapsed_us / iterations;
320+
double kops_per_sec = ops_per_sec / 1000.0;
321+
long delta_rss = end_rss - start_rss;
322+
323+
printf("%-28s %6d %8.1f %9.1f %+8ld %7ld\n", test_name, iterations,
324+
us_per_op, kops_per_sec, delta_rss, peak_rss);
325+
}
326+
327+
/* Memory test data structures */
328+
struct mem_composite_test {
329+
twin_pixmap_t *dst, *src;
330+
int width, height;
331+
int iterations;
332+
};
333+
334+
struct mem_polygon_test {
335+
twin_pixmap_t *dst;
336+
int iterations;
337+
};
338+
339+
struct mem_pixmap_test {
340+
int width, height;
341+
int iterations;
342+
};
343+
344+
struct mem_path_test {
345+
int iterations;
346+
};
347+
348+
/* Memory test: Composite operations (xform buffer allocation) */
349+
static void mem_test_composite(void *data)
350+
{
351+
struct mem_composite_test *d = (struct mem_composite_test *) data;
352+
twin_operand_t srco = {.source_kind = TWIN_PIXMAP, .u.pixmap = d->src};
353+
354+
for (int i = 0; i < d->iterations; i++) {
355+
twin_composite(d->dst, 0, 0, &srco, 0, 0, NULL, 0, 0, TWIN_OVER,
356+
d->width, d->height);
357+
}
358+
}
359+
360+
/* Memory test: Path operations (point reallocation) */
361+
static void mem_test_path(void *data)
362+
{
363+
struct mem_path_test *d = (struct mem_path_test *) data;
364+
365+
for (int i = 0; i < d->iterations; i++) {
366+
twin_path_t *path = twin_path_create();
367+
368+
/* Add many points to trigger reallocation */
369+
for (int j = 0; j < 100; j++) {
370+
twin_path_move(path, twin_int_to_fixed(j), twin_int_to_fixed(j));
371+
twin_path_draw(path, twin_int_to_fixed(j + 10),
372+
twin_int_to_fixed(j + 10));
373+
}
374+
375+
twin_path_destroy(path);
376+
}
377+
}
378+
379+
/* Memory test: Polygon filling (edge buffer allocation) */
380+
static void mem_test_polygon(void *data)
381+
{
382+
struct mem_polygon_test *d = (struct mem_polygon_test *) data;
383+
384+
for (int i = 0; i < d->iterations; i++) {
385+
twin_path_t *path = twin_path_create();
386+
387+
/* Create a complex polygon */
388+
twin_path_move(path, twin_int_to_fixed(10), twin_int_to_fixed(10));
389+
twin_path_draw(path, twin_int_to_fixed(100), twin_int_to_fixed(10));
390+
twin_path_draw(path, twin_int_to_fixed(100), twin_int_to_fixed(100));
391+
twin_path_draw(path, twin_int_to_fixed(10), twin_int_to_fixed(100));
392+
twin_path_close(path);
393+
394+
twin_paint_path(d->dst, 0xffff0000, path);
395+
396+
twin_path_destroy(path);
397+
}
398+
}
399+
400+
/* Memory test: Pixmap lifecycle */
401+
static void mem_test_pixmap(void *data)
402+
{
403+
struct mem_pixmap_test *d = (struct mem_pixmap_test *) data;
404+
405+
for (int i = 0; i < d->iterations; i++) {
406+
twin_pixmap_t *pixmap =
407+
twin_pixmap_create(TWIN_ARGB32, d->width, d->height);
408+
twin_pixmap_destroy(pixmap);
409+
}
410+
}
411+
412+
/* Run a memory profiling test */
413+
static void run_memory_test(const char *test_name,
414+
void (*test_func)(void *),
415+
void *test_data,
416+
int iterations)
417+
{
418+
struct timeval start, end;
419+
long start_rss, end_rss, peak_rss;
420+
421+
get_memory_usage(&start_rss, &peak_rss);
422+
423+
gettimeofday(&start, NULL);
424+
test_func(test_data);
425+
gettimeofday(&end, NULL);
426+
427+
get_memory_usage(&end_rss, &peak_rss);
428+
429+
uint64_t elapsed_us = ((uint64_t) end.tv_sec * 1000000U + end.tv_usec) -
430+
((uint64_t) start.tv_sec * 1000000U + start.tv_usec);
431+
432+
print_memory_stats(test_name, iterations, elapsed_us, start_rss, end_rss,
433+
peak_rss);
434+
}
435+
436+
/* Run complete memory profiling suite */
437+
static void run_memory_profiling(void)
438+
{
439+
printf("\n");
440+
printf(
441+
"Test Iters us/op kops/s DeltaRS "
442+
"PeakRS\n");
443+
printf(
444+
" (KB) "
445+
"(KB)\n");
446+
printf(
447+
"----------------------------------------------------------------------"
448+
"\n");
449+
450+
/* Composite operations (xform buffer) */
451+
struct mem_composite_test comp_100 = {
452+
.dst = dst32,
453+
.src = src32,
454+
.width = 100,
455+
.height = 100,
456+
.iterations = MEM_TEST_ITERATIONS,
457+
};
458+
run_memory_test("100x100 comp (xform)", mem_test_composite, &comp_100,
459+
MEM_TEST_ITERATIONS);
460+
461+
struct mem_composite_test comp_500 = {
462+
.dst = dst32,
463+
.src = src32,
464+
.width = 500,
465+
.height = 500,
466+
.iterations = MEM_TEST_ITERATIONS / 10,
467+
};
468+
run_memory_test("500x500 comp (xform)", mem_test_composite, &comp_500,
469+
MEM_TEST_ITERATIONS / 10);
470+
471+
/* Path operations (point reallocation) */
472+
struct mem_path_test path_test = {.iterations = MEM_TEST_ITERATIONS / 10};
473+
run_memory_test("Path ops (realloc)", mem_test_path, &path_test,
474+
MEM_TEST_ITERATIONS / 10);
475+
476+
/* Polygon fill (edge buffer) */
477+
struct mem_polygon_test poly_test = {
478+
.dst = dst32,
479+
.iterations = MEM_TEST_ITERATIONS,
480+
};
481+
run_memory_test("Polygon fill (pool)", mem_test_polygon, &poly_test,
482+
MEM_TEST_ITERATIONS);
483+
484+
/* Pixmap lifecycle */
485+
struct mem_pixmap_test pixmap_100 = {
486+
.width = 100, .height = 100, .iterations = MEM_TEST_ITERATIONS};
487+
run_memory_test("100x100 pixmap life", mem_test_pixmap, &pixmap_100,
488+
MEM_TEST_ITERATIONS);
489+
490+
struct mem_pixmap_test pixmap_500 = {
491+
.width = 500, .height = 500, .iterations = MEM_TEST_ITERATIONS / 10};
492+
run_memory_test("500x500 pixmap life", mem_test_pixmap, &pixmap_500,
493+
MEM_TEST_ITERATIONS / 10);
494+
495+
printf("\nNotes:\n");
496+
printf(" DeltaRS: RSS change (+/- KB, negative = cleanup)\n");
497+
printf(" PeakRS: Maximum memory usage during test\n");
498+
printf(" Targets: xform (comp), pool (poly), realloc (path)\n");
499+
}
500+
290501
int main(void)
291502
{
292503
time_t now;
293504
char hostname[256];
294505

295-
/* Print header similar to x11perf */
506+
/* Print header */
296507
time(&now);
297508
if (gethostname(hostname, sizeof(hostname)) != 0)
298509
strcpy(hostname, "localhost");
@@ -306,7 +517,11 @@ int main(void)
306517

307518
/* Create test pixmaps */
308519
src32 = twin_pixmap_from_file("assets/tux.png", TWIN_ARGB32);
309-
assert(src32);
520+
if (!src32) {
521+
/* Fallback: create a simple pixmap if file not found */
522+
src32 = twin_pixmap_create(TWIN_ARGB32, 256, 256);
523+
twin_fill(src32, 0xffff0000, TWIN_SOURCE, 0, 0, 256, 256);
524+
}
310525
dst32 = twin_pixmap_create(TWIN_ARGB32, TEST_PIX_WIDTH, TEST_PIX_HEIGHT);
311526
assert(dst32);
312527
mask8 = twin_pixmap_create(TWIN_A8, TEST_PIX_WIDTH, TEST_PIX_HEIGHT);
@@ -325,12 +540,19 @@ int main(void)
325540
test_height = 1;
326541
test_argb32_source_argb32();
327542

328-
/* Run comprehensive test series */
543+
/* Run comprehensive performance test series */
329544
run_basic_tests();
330545
run_solid_tests();
331546
run_alpha_tests();
332547
run_large_tests();
333548

549+
/* Run memory profiling tests */
550+
printf("\n");
551+
printf("========================================\n");
552+
printf(" Memory Profiling\n");
553+
printf("========================================\n");
554+
run_memory_profiling();
555+
334556
/* Cleanup */
335557
twin_pixmap_destroy(src32);
336558
twin_pixmap_destroy(dst32);

0 commit comments

Comments
 (0)