|
| 1 | +/* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | +#ifndef __TASK_LOCAL_DATA_H |
| 3 | +#define __TASK_LOCAL_DATA_H |
| 4 | + |
| 5 | +#include <errno.h> |
| 6 | +#include <fcntl.h> |
| 7 | +#include <sched.h> |
| 8 | +#include <stdatomic.h> |
| 9 | +#include <stddef.h> |
| 10 | +#include <stdlib.h> |
| 11 | +#include <string.h> |
| 12 | +#include <unistd.h> |
| 13 | +#include <sys/syscall.h> |
| 14 | +#include <sys/types.h> |
| 15 | + |
| 16 | +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT |
| 17 | +#include <pthread.h> |
| 18 | +#endif |
| 19 | + |
| 20 | +#include <bpf/bpf.h> |
| 21 | + |
| 22 | +/* |
| 23 | + * OPTIONS |
| 24 | + * |
| 25 | + * Define the option before including the header |
| 26 | + * |
| 27 | + * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically |
| 28 | + * |
| 29 | + * Thread-specific memory for storing TLD is allocated lazily on the first call to |
| 30 | + * tld_get_data(). The thread that calls it must also call tld_free() on thread exit |
| 31 | + * to prevent memory leak. Pthread will be included if the option is defined. A pthread |
| 32 | + * key will be registered with a destructor that calls tld_free(). |
| 33 | + * |
| 34 | + * |
| 35 | + * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically |
| 36 | + * (default: 64 bytes) |
| 37 | + * |
| 38 | + * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using |
| 39 | + * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be |
| 40 | + * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated |
| 41 | + * for these TLDs. This additional memory is allocated for every thread that calls |
| 42 | + * tld_get_data() even if no tld_create_key are actually called, so be mindful of |
| 43 | + * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory |
| 44 | + * will be allocated for TLDs created with it. |
| 45 | + * |
| 46 | + * |
| 47 | + * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) |
| 48 | + * |
| 49 | + * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, |
| 50 | + * TLD_MAX_DATA_CNT. |
| 51 | + * |
| 52 | + * |
| 53 | + * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() |
| 54 | + * |
| 55 | + * When allocating the memory for storing TLDs, we need to make sure there is a memory |
| 56 | + * region of the X bytes within a page. This is due to the limit posed by UPTR: memory |
| 57 | + * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The |
| 58 | + * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses |
| 59 | + * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage |
| 60 | + * as not all memory allocator can use the exact amount of memory requested to fulfill |
| 61 | + * aligned_alloc(). For example, some may round the size up to the alignment. Enable the |
| 62 | + * option to always use aligned_alloc() if the implementation has low memory overhead. |
| 63 | + */ |
| 64 | + |
| 65 | +#define TLD_PAGE_SIZE getpagesize() |
| 66 | +#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) |
| 67 | + |
| 68 | +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) |
| 69 | +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) |
| 70 | + |
| 71 | +#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) |
| 72 | + |
| 73 | +#ifndef TLD_DYN_DATA_SIZE |
| 74 | +#define TLD_DYN_DATA_SIZE 64 |
| 75 | +#endif |
| 76 | + |
| 77 | +#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) |
| 78 | + |
| 79 | +#ifndef TLD_NAME_LEN |
| 80 | +#define TLD_NAME_LEN 62 |
| 81 | +#endif |
| 82 | + |
| 83 | +#ifdef __cplusplus |
| 84 | +extern "C" { |
| 85 | +#endif |
| 86 | + |
| 87 | +typedef struct { |
| 88 | + __s16 off; |
| 89 | +} tld_key_t; |
| 90 | + |
| 91 | +struct tld_metadata { |
| 92 | + char name[TLD_NAME_LEN]; |
| 93 | + _Atomic __u16 size; |
| 94 | +}; |
| 95 | + |
| 96 | +struct tld_meta_u { |
| 97 | + _Atomic __u8 cnt; |
| 98 | + __u16 size; |
| 99 | + struct tld_metadata metadata[]; |
| 100 | +}; |
| 101 | + |
| 102 | +struct tld_data_u { |
| 103 | + __u64 start; /* offset of tld_data_u->data in a page */ |
| 104 | + char data[]; |
| 105 | +}; |
| 106 | + |
| 107 | +struct tld_map_value { |
| 108 | + void *data; |
| 109 | + struct tld_meta_u *meta; |
| 110 | +}; |
| 111 | + |
| 112 | +struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); |
| 113 | +__thread struct tld_data_u *tld_data_p __attribute__((weak)); |
| 114 | +__thread void *tld_data_alloc_p __attribute__((weak)); |
| 115 | + |
| 116 | +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT |
| 117 | +pthread_key_t tld_pthread_key __attribute__((weak)); |
| 118 | + |
| 119 | +static void tld_free(void); |
| 120 | + |
| 121 | +static void __tld_thread_exit_handler(void *unused) |
| 122 | +{ |
| 123 | + tld_free(); |
| 124 | +} |
| 125 | +#endif |
| 126 | + |
| 127 | +static int __tld_init_meta_p(void) |
| 128 | +{ |
| 129 | + struct tld_meta_u *meta, *uninit = NULL; |
| 130 | + int err = 0; |
| 131 | + |
| 132 | + meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); |
| 133 | + if (!meta) { |
| 134 | + err = -ENOMEM; |
| 135 | + goto out; |
| 136 | + } |
| 137 | + |
| 138 | + memset(meta, 0, TLD_PAGE_SIZE); |
| 139 | + meta->size = TLD_DYN_DATA_SIZE; |
| 140 | + |
| 141 | + if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { |
| 142 | + free(meta); |
| 143 | + goto out; |
| 144 | + } |
| 145 | + |
| 146 | +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT |
| 147 | + pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); |
| 148 | +#endif |
| 149 | +out: |
| 150 | + return err; |
| 151 | +} |
| 152 | + |
| 153 | +static int __tld_init_data_p(int map_fd) |
| 154 | +{ |
| 155 | + bool use_aligned_alloc = false; |
| 156 | + struct tld_map_value map_val; |
| 157 | + struct tld_data_u *data; |
| 158 | + void *data_alloc = NULL; |
| 159 | + int err, tid_fd = -1; |
| 160 | + |
| 161 | + tid_fd = syscall(SYS_pidfd_open, gettid(), O_EXCL); |
| 162 | + if (tid_fd < 0) { |
| 163 | + err = -errno; |
| 164 | + goto out; |
| 165 | + } |
| 166 | + |
| 167 | +#ifdef TLD_DATA_USE_ALIGNED_ALLOC |
| 168 | + use_aligned_alloc = true; |
| 169 | +#endif |
| 170 | + |
| 171 | + /* |
| 172 | + * tld_meta_p->size = TLD_DYN_DATA_SIZE + |
| 173 | + * total size of TLDs defined via TLD_DEFINE_KEY() |
| 174 | + */ |
| 175 | + data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? |
| 176 | + aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : |
| 177 | + malloc(tld_meta_p->size * 2); |
| 178 | + if (!data_alloc) { |
| 179 | + err = -ENOMEM; |
| 180 | + goto out; |
| 181 | + } |
| 182 | + |
| 183 | + /* |
| 184 | + * Always pass a page-aligned address to UPTR since the size of tld_map_value::data |
| 185 | + * is a page in BTF. If data_alloc spans across two pages, use the page that contains large |
| 186 | + * enough memory. |
| 187 | + */ |
| 188 | + if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { |
| 189 | + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); |
| 190 | + data = data_alloc; |
| 191 | + data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + |
| 192 | + offsetof(struct tld_data_u, data); |
| 193 | + } else { |
| 194 | + map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); |
| 195 | + data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); |
| 196 | + data->start = offsetof(struct tld_data_u, data); |
| 197 | + } |
| 198 | + map_val.meta = TLD_READ_ONCE(tld_meta_p); |
| 199 | + |
| 200 | + err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); |
| 201 | + if (err) { |
| 202 | + free(data_alloc); |
| 203 | + goto out; |
| 204 | + } |
| 205 | + |
| 206 | + tld_data_p = data; |
| 207 | + tld_data_alloc_p = data_alloc; |
| 208 | +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT |
| 209 | + pthread_setspecific(tld_pthread_key, (void *)1); |
| 210 | +#endif |
| 211 | +out: |
| 212 | + if (tid_fd >= 0) |
| 213 | + close(tid_fd); |
| 214 | + return err; |
| 215 | +} |
| 216 | + |
| 217 | +static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) |
| 218 | +{ |
| 219 | + int err, i, sz, off = 0; |
| 220 | + __u8 cnt; |
| 221 | + |
| 222 | + if (!TLD_READ_ONCE(tld_meta_p)) { |
| 223 | + err = __tld_init_meta_p(); |
| 224 | + if (err) |
| 225 | + return (tld_key_t){err}; |
| 226 | + } |
| 227 | + |
| 228 | + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { |
| 229 | +retry: |
| 230 | + cnt = atomic_load(&tld_meta_p->cnt); |
| 231 | + if (i < cnt) { |
| 232 | + /* A metadata is not ready until size is updated with a non-zero value */ |
| 233 | + while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) |
| 234 | + sched_yield(); |
| 235 | + |
| 236 | + if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) |
| 237 | + return (tld_key_t){-EEXIST}; |
| 238 | + |
| 239 | + off += TLD_ROUND_UP(sz, 8); |
| 240 | + continue; |
| 241 | + } |
| 242 | + |
| 243 | + /* |
| 244 | + * TLD_DEFINE_KEY() is given memory upto a page while at most |
| 245 | + * TLD_DYN_DATA_SIZE is allocated for tld_create_key() |
| 246 | + */ |
| 247 | + if (dyn_data) { |
| 248 | + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) |
| 249 | + return (tld_key_t){-E2BIG}; |
| 250 | + } else { |
| 251 | + if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) |
| 252 | + return (tld_key_t){-E2BIG}; |
| 253 | + tld_meta_p->size += TLD_ROUND_UP(size, 8); |
| 254 | + } |
| 255 | + |
| 256 | + /* |
| 257 | + * Only one tld_create_key() can increase the current cnt by one and |
| 258 | + * takes the latest available slot. Other threads will check again if a new |
| 259 | + * TLD can still be added, and then compete for the new slot after the |
| 260 | + * succeeding thread update the size. |
| 261 | + */ |
| 262 | + if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) |
| 263 | + goto retry; |
| 264 | + |
| 265 | + strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); |
| 266 | + atomic_store(&tld_meta_p->metadata[i].size, size); |
| 267 | + return (tld_key_t){(__s16)off}; |
| 268 | + } |
| 269 | + |
| 270 | + return (tld_key_t){-ENOSPC}; |
| 271 | +} |
| 272 | + |
| 273 | +/** |
| 274 | + * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. |
| 275 | + * |
| 276 | + * @name: The name of the TLD |
| 277 | + * @size: The size of the TLD |
| 278 | + * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN |
| 279 | + * |
| 280 | + * The macro can only be used in file scope. |
| 281 | + * |
| 282 | + * A global variable key of opaque type, tld_key_t, will be declared and initialized before |
| 283 | + * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key |
| 284 | + * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. |
| 285 | + * bpf programs can also fetch the same key by name. |
| 286 | + * |
| 287 | + * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just |
| 288 | + * enough memory will be allocated for each thread on the first call to tld_get_data(). |
| 289 | + */ |
| 290 | +#define TLD_DEFINE_KEY(key, name, size) \ |
| 291 | +tld_key_t key; \ |
| 292 | + \ |
| 293 | +__attribute__((constructor)) \ |
| 294 | +void __tld_define_key_##key(void) \ |
| 295 | +{ \ |
| 296 | + key = __tld_create_key(name, size, false); \ |
| 297 | +} |
| 298 | + |
| 299 | +/** |
| 300 | + * tld_create_key() - Create a TLD and return a key associated with the TLD. |
| 301 | + * |
| 302 | + * @name: The name the TLD |
| 303 | + * @size: The size of the TLD |
| 304 | + * |
| 305 | + * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check |
| 306 | + * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to |
| 307 | + * locate the TLD. bpf programs can also fetch the same key by name. |
| 308 | + * |
| 309 | + * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is |
| 310 | + * not known statically or a TLD needs to be created conditionally) |
| 311 | + * |
| 312 | + * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs |
| 313 | + * created dynamically with tld_create_key(). Since only a user page is pinned to the |
| 314 | + * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - |
| 315 | + * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. |
| 316 | + */ |
| 317 | +__attribute__((unused)) |
| 318 | +static tld_key_t tld_create_key(const char *name, size_t size) |
| 319 | +{ |
| 320 | + return __tld_create_key(name, size, true); |
| 321 | +} |
| 322 | + |
| 323 | +__attribute__((unused)) |
| 324 | +static inline bool tld_key_is_err(tld_key_t key) |
| 325 | +{ |
| 326 | + return key.off < 0; |
| 327 | +} |
| 328 | + |
| 329 | +__attribute__((unused)) |
| 330 | +static inline int tld_key_err_or_zero(tld_key_t key) |
| 331 | +{ |
| 332 | + return tld_key_is_err(key) ? key.off : 0; |
| 333 | +} |
| 334 | + |
| 335 | +/** |
| 336 | + * tld_get_data() - Get a pointer to the TLD associated with the given key of the |
| 337 | + * calling thread. |
| 338 | + * |
| 339 | + * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map |
| 340 | + * of task local data. |
| 341 | + * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). |
| 342 | + * |
| 343 | + * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD |
| 344 | + * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte |
| 345 | + * aligned. |
| 346 | + * |
| 347 | + * Threads that call tld_get_data() must call tld_free() on exit to prevent |
| 348 | + * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. |
| 349 | + */ |
| 350 | +__attribute__((unused)) |
| 351 | +static void *tld_get_data(int map_fd, tld_key_t key) |
| 352 | +{ |
| 353 | + if (!TLD_READ_ONCE(tld_meta_p)) |
| 354 | + return NULL; |
| 355 | + |
| 356 | + /* tld_data_p is allocated on the first invocation of tld_get_data() */ |
| 357 | + if (!tld_data_p && __tld_init_data_p(map_fd)) |
| 358 | + return NULL; |
| 359 | + |
| 360 | + return tld_data_p->data + key.off; |
| 361 | +} |
| 362 | + |
| 363 | +/** |
| 364 | + * tld_free() - Free task local data memory of the calling thread |
| 365 | + * |
| 366 | + * For the calling thread, all pointers to TLDs acquired before will become invalid. |
| 367 | + * |
| 368 | + * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, |
| 369 | + * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered |
| 370 | + * to free the memory automatically. |
| 371 | + */ |
| 372 | +__attribute__((unused)) |
| 373 | +static void tld_free(void) |
| 374 | +{ |
| 375 | + if (tld_data_alloc_p) { |
| 376 | + free(tld_data_alloc_p); |
| 377 | + tld_data_alloc_p = NULL; |
| 378 | + tld_data_p = NULL; |
| 379 | + } |
| 380 | +} |
| 381 | + |
| 382 | +#ifdef __cplusplus |
| 383 | +} /* extern "C" */ |
| 384 | +#endif |
| 385 | + |
| 386 | +#endif /* __TASK_LOCAL_DATA_H */ |
0 commit comments