Skip to content

Commit 9ee488c

Browse files
authored
Update 2025-11-30-comptime-c-functions.md
1 parent 9b8308e commit 9ee488c

File tree

1 file changed

+76
-21
lines changed

1 file changed

+76
-21
lines changed

_posts/2025-11-30-comptime-c-functions.md

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,21 @@ Compile-time function execution is great, but what if:
99
2. You don't want to use evil C macros, which are debugging nightmares.
1010
3. You want generic data structures that work for all types.
1111

12-
The below data structure showcase programs get optimized away at compile time by Clang and GCC, such that only the `printf()` at the end of `main()` is left:
12+
The below data structure showcase programs get optimized away at compile time by Clang and GCC:
1313
```nasm
14-
main:
15-
push rax
16-
lea rdi, [rip + .Lstr]
17-
call puts@PLT ; printf() got translated to the faster puts()
18-
xor eax, eax
19-
pop rcx
20-
ret
14+
fn_version:
15+
ret
16+
17+
macro_version:
18+
ret
2119
```
2220

2321
Here is how it is achieved in C:
2422
- `static inline` allows inlining across compilation boundaries.
2523
- `__attribute__((always_inline))` *strongly* urges compilers to inline functions.
2624
- `__builtin_unreachable()` is used to teach the optimizer which assumptions it can make about input arguments.
25+
- Passing `-O3` to the compiler tells it to optimize the code very hard.
26+
- Passing `-march=native` to the compiler tells it to make optimizations based on your specific CPU.
2727
- Constant buffer addresses + sizes let the optimizer trace through `memcpy()` calls.
2828
- All operations become statically analyzable, reducing to constants.
2929
- `assert()` calls get eliminated when conditions are provably true.
@@ -36,9 +36,7 @@ The only legitimate use-case I can think of for this technique is generating loo
3636

3737
# Generic Stack
3838

39-
Clang and GCC require `-O1`.
40-
41-
Copy of the code on [Compiler Explorer](https://godbolt.org/z/Y86szvfeG):
39+
Copy of the code on [Compiler Explorer](https://godbolt.org/z/h9narbMG8):
4240

4341
```c
4442
#include <assert.h>
@@ -47,6 +45,7 @@ Copy of the code on [Compiler Explorer](https://godbolt.org/z/Y86szvfeG):
4745
#include <stddef.h>
4846
#include <stdint.h>
4947
#include <stdio.h>
48+
#include <stdlib.h>
5049
#include <string.h>
5150

5251
typedef enum ErrorCode {
@@ -75,8 +74,8 @@ static inline ErrorCode stack_push(stack *s, const void *element) {
7574
if (s->size >= s->capacity) {
7675
return STACK_FULL;
7776
}
78-
// This memcpy() is like assigning a value of *any* type using the = operator
79-
memcpy((unsigned char *)s->data + s->size * s->element_size, element, s->element_size);
77+
memcpy((unsigned char *)s->data + s->size * s->element_size,
78+
element, s->element_size);
8079
s->size++;
8180
return SUCCESS;
8281
}
@@ -87,7 +86,9 @@ static inline ErrorCode stack_pop(stack *s, void *out) {
8786
return STACK_EMPTY;
8887
}
8988
s->size--;
90-
memcpy(out, (unsigned char *)s->data + s->size * s->element_size, s->element_size);
89+
memcpy(out,
90+
(unsigned char *)s->data + s->size * s->element_size,
91+
s->element_size);
9192
return SUCCESS;
9293
}
9394

@@ -101,13 +102,17 @@ typedef struct {
101102
double b;
102103
} Pair;
103104

104-
int main(void) {
105-
Pair buffer[100];
105+
void fn_version(size_t n) {
106+
// assert() isn't aggressive enough
107+
if (n < 2) __builtin_unreachable();
108+
109+
Pair *buffer = malloc(n * sizeof(*buffer));
110+
106111
stack s;
107-
stack_init(&s, buffer, sizeof(Pair), 100);
112+
stack_init(&s, buffer, sizeof(Pair), n);
108113

109114
Pair p1 = {.a = 10, .b = 20};
110-
Pair p2 = {.a = 111, .b = sin(222.0)}; // sin() is optimized away!
115+
Pair p2 = {.a = 111, .b = sin(222.0)};
111116

112117
assert(stack_push(&s, &p1) == SUCCESS);
113118
assert(stack_push(&s, &p2) == SUCCESS);
@@ -121,15 +126,65 @@ int main(void) {
121126
assert(out1.a == 10 && out1.b == 20.0);
122127

123128
assert(stack_empty(&s));
129+
}
130+
131+
#define STACK_PUSH(s, value) \
132+
do { \
133+
if ((s)->size >= (s)->capacity) { \
134+
return; \
135+
} \
136+
/* Byte-copy using macro, no memcpy() */ \
137+
unsigned char *dst = \
138+
(unsigned char *)(s)->data + (s)->size * (s)->element_size; \
139+
const unsigned char *src = (const unsigned char *)&(value); \
140+
for (size_t i = 0; i < (s)->element_size; ++i) \
141+
dst[i] = src[i]; \
142+
(s)->size++; \
143+
} while (0)
144+
145+
#define STACK_POP(s, out_lvalue) \
146+
do { \
147+
if ((s)->size == 0) { \
148+
return; \
149+
} \
150+
(s)->size--; \
151+
unsigned char *dst = (unsigned char *)&(out_lvalue); \
152+
const unsigned char *src = \
153+
(const unsigned char *)(s)->data + (s)->size * (s)->element_size; \
154+
for (size_t i = 0; i < (s)->element_size; ++i) \
155+
dst[i] = src[i]; \
156+
} while (0)
157+
158+
159+
void macro_version(size_t n) {
160+
// assert() isn't aggressive enough
161+
if (n < 2) __builtin_unreachable();
162+
163+
Pair *buffer = malloc(n * sizeof(*buffer));
164+
165+
stack s;
166+
stack_init(&s, buffer, sizeof(Pair), n);
167+
168+
Pair p1 = {.a = 10, .b = 20};
169+
Pair p2 = {.a = 111, .b = sin(222.0)};
170+
171+
STACK_PUSH(&s, p1);
172+
STACK_PUSH(&s, p2);
173+
174+
Pair out2;
175+
STACK_POP(&s, out2);
176+
assert(out2.a == 111 && out2.b == sin(222.0));
124177

125-
printf("Stack test passed.\n");
178+
Pair out1;
179+
STACK_POP(&s, out1);
180+
assert(out1.a == 10 && out1.b == 20.0);
181+
182+
assert(stack_empty(&s));
126183
}
127184
```
128185
129186
# Generic Hash Map
130187
131-
Clang requires `-O2`, while GCC requires `-O3`.
132-
133188
Copy of the code on [Compiler Explorer](https://godbolt.org/z/d176e16eb):
134189
135190
```c

0 commit comments

Comments
 (0)