Skip to content

Commit b288ac0

Browse files
authored
[libc][stdlib] Implement setenv() with environment management infrastructure
Implement setenv() for LLVM libc, enabling modification of process environment variables in compliance with POSIX specifications. Implementation Overview: - Shared infrastructure in environ_internal.{h,cpp} providing thread-safe environment array management with copy-on-write semantics - Memory ownership tracking to distinguish library-allocated strings from external strings (startup environ) - Thread-safe operations using environ_mutex for all modifications - Dynamic array growth with configurable initial capacity Key Design Decisions: - Linear array approach (O(n) lookups) suitable for typical environment sizes (30-80 variables) with minimal memory overhead - Copy-on-write: starts with startup environment, allocates on first modification to avoid unnecessary copying - Memory ownership model prevents double-free bugs: tracks which strings can be safely freed (setenv allocations) Function Implementation: - setenv(name, value, overwrite): Allocates "name=value" string, respects overwrite flag, validates inputs per POSIX (rejects NULL, empty names, names with '='). Returns 0 on success, -1 with errno on error. Testing: - Comprehensive integration test suite covering basic functionality, POSIX compliance, error conditions, and edge cases - 12 test cases passing
1 parent 9f06843 commit b288ac0

File tree

9 files changed

+651
-0
lines changed

9 files changed

+651
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,7 @@ if(LLVM_LIBC_FULL_BUILD)
12481248
libc.src.stdlib.exit
12491249
libc.src.stdlib.getenv
12501250
libc.src.stdlib.quick_exit
1251+
libc.src.stdlib.setenv
12511252

12521253
# signal.h entrypoints
12531254
libc.src.signal.kill

libc/include/stdlib.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ functions:
180180
return_type: int
181181
arguments:
182182
- type: void
183+
- name: setenv
184+
standards:
185+
- posix
186+
return_type: int
187+
arguments:
188+
- type: const char *
189+
- type: const char *
190+
- type: int
183191
- name: srand
184192
standards:
185193
- stdc

libc/src/stdlib/CMakeLists.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,38 @@ add_entrypoint_object(
6565
libc.config.app_h
6666
)
6767

68+
add_object_library(
69+
environ_internal
70+
SRCS
71+
environ_internal.cpp
72+
HDRS
73+
environ_internal.h
74+
DEPENDS
75+
libc.config.app_h
76+
libc.hdr.types.size_t
77+
libc.src.__support.CPP.string_view
78+
libc.src.__support.threads.mutex
79+
libc.src.stdlib.free
80+
libc.src.stdlib.malloc
81+
libc.src.string.memcpy
82+
)
83+
84+
add_entrypoint_object(
85+
setenv
86+
SRCS
87+
setenv.cpp
88+
HDRS
89+
setenv.h
90+
DEPENDS
91+
.environ_internal
92+
libc.src.__support.CPP.string_view
93+
libc.src.__support.libc_errno
94+
libc.src.__support.threads.mutex
95+
libc.src.stdlib.malloc
96+
libc.src.string.memcpy
97+
libc.src.string.strlen
98+
)
99+
68100
add_entrypoint_object(
69101
strfromf
70102
SRCS
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
//===-- Implementation of internal environment utilities ------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "environ_internal.h"
10+
#include "config/app.h"
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/macros/config.h"
13+
#include "src/string/memcpy.h"
14+
15+
// We use extern "C" declarations for malloc/free/realloc instead of including
16+
// src/stdlib/malloc.h, src/stdlib/free.h, and src/stdlib/realloc.h. This allows
17+
// the implementation to work with different allocator implementations,
18+
// particularly in integration tests which provide a simple bump allocator. The
19+
// extern "C" linkage ensures we use whatever allocator is linked with the test
20+
// or application.
21+
extern "C" void *malloc(size_t);
22+
extern "C" void free(void *);
23+
extern "C" void *realloc(void *, size_t);
24+
25+
namespace LIBC_NAMESPACE_DECL {
26+
namespace internal {
27+
28+
// Minimum initial capacity for the environment array when first allocated.
29+
// This avoids frequent reallocations for small environments.
30+
constexpr size_t MIN_ENVIRON_CAPACITY = 32;
31+
32+
// Growth factor for environment array capacity when expanding.
33+
// When capacity is exceeded, new_capacity = old_capacity *
34+
// ENVIRON_GROWTH_FACTOR.
35+
constexpr size_t ENVIRON_GROWTH_FACTOR = 2;
36+
37+
// Global state for environment management
38+
Mutex environ_mutex(false, false, false, false);
39+
char **environ_storage = nullptr;
40+
EnvStringOwnership *environ_ownership = nullptr;
41+
size_t environ_capacity = 0;
42+
size_t environ_size = 0;
43+
bool environ_is_ours = false;
44+
45+
char **get_environ_array() {
46+
if (environ_is_ours)
47+
return environ_storage;
48+
return reinterpret_cast<char **>(LIBC_NAMESPACE::app.env_ptr);
49+
}
50+
51+
void init_environ() {
52+
// Count entries in the startup environ
53+
char **env_ptr = reinterpret_cast<char **>(LIBC_NAMESPACE::app.env_ptr);
54+
if (!env_ptr)
55+
return;
56+
57+
size_t count = 0;
58+
for (char **env = env_ptr; *env != nullptr; env++)
59+
count++;
60+
61+
environ_size = count;
62+
}
63+
64+
int find_env_var(cpp::string_view name) {
65+
char **env_array = get_environ_array();
66+
if (!env_array)
67+
return -1;
68+
69+
for (size_t i = 0; i < environ_size; i++) {
70+
cpp::string_view current(env_array[i]);
71+
if (!current.starts_with(name))
72+
continue;
73+
74+
// Check that name is followed by '='
75+
if (current.size() > name.size() && current[name.size()] == '=')
76+
return static_cast<int>(i);
77+
}
78+
79+
return -1;
80+
}
81+
82+
bool ensure_capacity(size_t needed) {
83+
// IMPORTANT: This function assumes environ_mutex is already held by the
84+
// caller. Do not add locking here as it would cause deadlock.
85+
86+
// If we're still using the startup environ, we need to copy it
87+
if (!environ_is_ours) {
88+
char **old_env = reinterpret_cast<char **>(LIBC_NAMESPACE::app.env_ptr);
89+
90+
// Allocate new array with room to grow
91+
size_t new_capacity = needed < MIN_ENVIRON_CAPACITY
92+
? MIN_ENVIRON_CAPACITY
93+
: needed * ENVIRON_GROWTH_FACTOR;
94+
char **new_storage =
95+
reinterpret_cast<char **>(malloc(sizeof(char *) * (new_capacity + 1)));
96+
if (!new_storage)
97+
return false;
98+
99+
// Allocate ownership tracking array
100+
EnvStringOwnership *new_ownership = reinterpret_cast<EnvStringOwnership *>(
101+
malloc(sizeof(EnvStringOwnership) * (new_capacity + 1)));
102+
if (!new_ownership) {
103+
free(new_storage);
104+
return false;
105+
}
106+
107+
// Copy existing pointers (we don't own the strings yet, so just copy
108+
// pointers)
109+
if (old_env) {
110+
for (size_t i = 0; i < environ_size; i++) {
111+
new_storage[i] = old_env[i];
112+
// Initialize ownership: startup strings are not owned by us
113+
new_ownership[i] = EnvStringOwnership();
114+
}
115+
}
116+
new_storage[environ_size] = nullptr;
117+
118+
environ_storage = new_storage;
119+
environ_ownership = new_ownership;
120+
environ_capacity = new_capacity;
121+
environ_is_ours = true;
122+
123+
// Update app.env_ptr to point to our storage
124+
LIBC_NAMESPACE::app.env_ptr =
125+
reinterpret_cast<uintptr_t *>(environ_storage);
126+
127+
return true;
128+
}
129+
130+
// We already own environ, check if we need to grow it
131+
if (needed <= environ_capacity)
132+
return true;
133+
134+
// Grow capacity by the growth factor
135+
size_t new_capacity = needed * ENVIRON_GROWTH_FACTOR;
136+
137+
// Use realloc to grow the arrays
138+
char **new_storage = reinterpret_cast<char **>(
139+
realloc(environ_storage, sizeof(char *) * (new_capacity + 1)));
140+
if (!new_storage)
141+
return false;
142+
143+
EnvStringOwnership *new_ownership =
144+
reinterpret_cast<EnvStringOwnership *>(realloc(
145+
environ_ownership, sizeof(EnvStringOwnership) * (new_capacity + 1)));
146+
if (!new_ownership) {
147+
// If ownership realloc fails, we still have the old storage in new_storage
148+
// which was successfully reallocated. We need to restore or handle this.
149+
// For safety, we'll keep the successfully reallocated storage.
150+
environ_storage = new_storage;
151+
return false;
152+
}
153+
154+
environ_storage = new_storage;
155+
environ_ownership = new_ownership;
156+
environ_capacity = new_capacity;
157+
158+
// Update app.env_ptr to point to our new storage
159+
LIBC_NAMESPACE::app.env_ptr = reinterpret_cast<uintptr_t *>(environ_storage);
160+
161+
return true;
162+
}
163+
164+
} // namespace internal
165+
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdlib/environ_internal.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//===-- Internal utilities for environment management ----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H
10+
#define LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H
11+
12+
#include "hdr/types/size_t.h"
13+
#include "src/__support/CPP/string_view.h"
14+
#include "src/__support/macros/attributes.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/threads/mutex.h"
17+
18+
namespace LIBC_NAMESPACE_DECL {
19+
namespace internal {
20+
21+
// Ownership information for environment strings.
22+
// We need to track ownership because environment strings come from three
23+
// sources:
24+
// 1. Startup environment (from program loader) - we don't own these
25+
// 2. putenv() calls where caller provides the string - we don't own these
26+
// 3. setenv() calls where we allocate the string - we DO own these
27+
// Only strings we allocated can be freed when replaced or removed.
28+
struct EnvStringOwnership {
29+
bool allocated_by_us; // True if we malloc'd this string (must free).
30+
// False for startup environ or putenv strings (don't
31+
// free).
32+
33+
// Default: not owned by us (startup or putenv - don't free).
34+
LIBC_INLINE EnvStringOwnership() : allocated_by_us(false) {}
35+
36+
// Returns true if this string can be safely freed.
37+
LIBC_INLINE bool can_free() const { return allocated_by_us; }
38+
};
39+
40+
// Global mutex protecting all environ modifications
41+
extern Mutex environ_mutex;
42+
43+
// Our allocated environ array (nullptr if using startup environ)
44+
extern char **environ_storage;
45+
46+
// Parallel array tracking ownership of each environ string
47+
// Same size/capacity as environ_storage
48+
extern EnvStringOwnership *environ_ownership;
49+
50+
// Allocated capacity of environ_storage
51+
extern size_t environ_capacity;
52+
53+
// Current number of variables in environ
54+
extern size_t environ_size;
55+
56+
// True if we allocated environ_storage (and are responsible for freeing it)
57+
extern bool environ_is_ours;
58+
59+
// Search for a variable by name in the current environ array.
60+
// Returns the index if found, or -1 if not found.
61+
// This function assumes the mutex is already held.
62+
int find_env_var(cpp::string_view name);
63+
64+
// Ensure environ has capacity for at least `needed` entries (plus null
65+
// terminator). May allocate or reallocate environ_storage. Returns true on
66+
// success, false on allocation failure. This function assumes the mutex is
67+
// already held.
68+
bool ensure_capacity(size_t needed);
69+
70+
// Get a pointer to the current environ array.
71+
// This may be app.env_ptr (startup environ) or environ_storage (our copy).
72+
char **get_environ_array();
73+
74+
// Initialize environ management from the startup environment.
75+
// This must be called before any setenv/unsetenv operations.
76+
// This function is thread-safe and idempotent.
77+
void init_environ();
78+
79+
} // namespace internal
80+
} // namespace LIBC_NAMESPACE_DECL
81+
82+
#endif // LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H

0 commit comments

Comments
 (0)