|
| 1 | +/* Internal header for all the files in hpy/debug/src. The public API is in |
| 2 | + include/hpy_debug.h |
| 3 | +*/ |
| 4 | +#ifndef HPY_DEBUG_INTERNAL_H |
| 5 | +#define HPY_DEBUG_INTERNAL_H |
| 6 | + |
| 7 | +#include <assert.h> |
| 8 | +#include "hpy.h" |
| 9 | +#include "hpy_debug.h" |
| 10 | + |
| 11 | +#define HPY_DEBUG_MAGIC 0xDEB00FF |
| 12 | + |
| 13 | +/* The Debug context is a wrapper around an underlying context, which we will |
| 14 | + call Universal. Inside the debug mode we manipulate handles which belongs |
| 15 | + to both contexts, so to make things easier we create two typedefs to make |
| 16 | + it clear what kind of handle we expect: UHPy and DHPy: |
| 17 | +
|
| 18 | + * UHPy are opaque from our point of view. |
| 19 | +
|
| 20 | + * DHPy are actually DebugHandle* in disguise. DebugHandles are wrappers |
| 21 | + around a UHPy, with a bunch of extra info. |
| 22 | +
|
| 23 | + To cast between DHPy and DebugHandle*, use as_DebugHandle and as_DHPy: |
| 24 | + these are just no-op casts. |
| 25 | +
|
| 26 | + Each DHPy wraps a corresponding UHPy: DHPys are created by calling |
| 27 | + DHPy_open, and they must be eventually closed by DHPy_close. Note that if |
| 28 | + you call DHPy_open twice on the same UHPy, you get two different DHPy. |
| 29 | +
|
| 30 | + To unwrap a DHPy and get the underlying UHPy, call DHPy_unwrap. If you call |
| 31 | + DHPy_unwrap multiple times on the same DHPy, you always get the same UHPy. |
| 32 | +
|
| 33 | + WARNING: both UHPy and DHPy are alias of HPy, so we need to take care of |
| 34 | + not mixing them, because the compiler cannot help. |
| 35 | +
|
| 36 | + Each DebugHandle has a "generation", which is just an int to be able to get |
| 37 | + only the handles which were created after a certain point. |
| 38 | +
|
| 39 | + DHPys/DebugHandles are memory-managed by using a free list: |
| 40 | +
|
| 41 | + - info->open_handles is a list of all DHPys which are currently open |
| 42 | +
|
| 43 | + - DHPy_close() moves a DHPy from info->open_handles to info->closed_handles |
| 44 | +
|
| 45 | + - if closed_handles is too big, the oldest DHPy is freed by DHPy_free() |
| 46 | +
|
| 47 | + - to allocate memory for a new DHPy, DHPy_open() does the following: |
| 48 | +
|
| 49 | + * if closed_handles is full, it reuses the memory of the oldest DHPy |
| 50 | + in the queue |
| 51 | +
|
| 52 | + * else, it malloc()s memory for a new DHPy |
| 53 | +
|
| 54 | +
|
| 55 | + Each DebugHandle can have some "raw" data associated with it. It is a |
| 56 | + generic pointer to any data. The validity, or life-time, of such pointer |
| 57 | + is supposed to be the same as the that of the handle and the debug mode |
| 58 | + enforces it. Additionally, the data can be also marked as write protected. |
| 59 | +
|
| 60 | + Example is the `const char*` handed out by `HPyUnicode_AsUTF8AndSize`. It |
| 61 | + must not be written by the user (users may discard the const modifier), and |
| 62 | + the pointer is considered invalid once the handle is closed, so it must not |
| 63 | + be accessed even for reading. Most Python implementations, will choose to |
| 64 | + hand out pointer to the actual internal data, which happen to stay valid and |
| 65 | + accessible and this may lead the users to a wrong conclusion that they can |
| 66 | + use the pointer after the handle is closed. |
| 67 | +
|
| 68 | + The memory protection mechanism is abstracted by several functions that |
| 69 | + may have different implementations depending on the compile-time |
| 70 | + configuration. Those are: |
| 71 | +
|
| 72 | + * `raw_data_copy`: makes a copy of some data, optionally the copy can be |
| 73 | + made read-only. |
| 74 | + * `raw_data_protect`: protects the result of `raw_data_copy` from reading |
| 75 | + * `raw_data_free`: if `raw_data_protect` retained any actual memory or other |
| 76 | + resources, this indicates that those can be freed |
| 77 | +
|
| 78 | + Any HPy context function that wishes to attach raw data to a handle should |
| 79 | + make a copy of the actual data by using `raw_data_copy`. This copy should be |
| 80 | + then set as the value of the associated_data field. Once the handle is |
| 81 | + closed, the raw data pointer is passed to raw_data_protect and once the handle |
| 82 | + is reused the raw data pointer is passed to raw_data_free. |
| 83 | +
|
| 84 | + This means that if the implementation of `raw_data_protect` retains some |
| 85 | + resources, we are leaking them. To mitigate this a bit, we have a limit on the |
| 86 | + overall size of data that can be leaked and once it is reached, we use |
| 87 | + raw_data_free immediately once the associated handle is closed. |
| 88 | +
|
| 89 | + Note that, for example, the mmap based implementation of `raw_data_copy` |
| 90 | + never allocates less than a page, so it actually takes more memory than |
| 91 | + what is the size of the raw data. This is, however, mostly covered by the |
| 92 | + limit on closed handles. For the default configuration we have: |
| 93 | +
|
| 94 | + DEFAULT_CLOSED_HANDLES_QUEUE_MAX_SIZE = 1024 |
| 95 | + DEFAULT_PROTECTED_RAW_DATA_MAX_SIZE = 1024 * 1024 * 10 |
| 96 | +
|
| 97 | + the total leaked raw data size limit of 10MB is larger than if we created |
| 98 | + and leaked 1024 handles with only a small raw data attached to them (4MB |
| 99 | + for 1024 pages of 4KB). This ratio may be different for larger pages or for |
| 100 | + different configuration of the limits. For the sake of keeping the |
| 101 | + implementation reasonably simple and portable, we choose to ignore this |
| 102 | + for the time being. |
| 103 | +*/ |
| 104 | + |
| 105 | +typedef HPy UHPy; |
| 106 | +typedef HPy DHPy; |
| 107 | + |
| 108 | +/* Under CPython: |
| 109 | + - UHPy always end with 1 (see hpy.universal's _py2h and _h2py) |
| 110 | + - DHPy are pointers, so they always end with 0 |
| 111 | +
|
| 112 | + DHPy_sanity_check is a minimal check to ensure that we are not treating a |
| 113 | + UHPy as a DHPy. Note that DHPy_sanity_check works fine also on HPy_NULL. |
| 114 | +
|
| 115 | + NOTE: UHPy_sanity_check works ONLY with CPython's hpy.universal, because |
| 116 | + UHPys are computed in such a way that the last bit it's always 1. On other |
| 117 | + implementations this assumption might not hold. By default, |
| 118 | + UHPy_sanity_check does nothing, unless you #define |
| 119 | + HPY_DEBUG_ENABLE_UHPY_SANITY_CHECK, which for CPython is done by setup.py |
| 120 | +*/ |
| 121 | +static inline void DHPy_sanity_check(DHPy dh) { |
| 122 | + assert( (dh._i & 1) == 0 ); |
| 123 | +} |
| 124 | + |
| 125 | +static inline void UHPy_sanity_check(UHPy uh) { |
| 126 | +#ifdef HPY_DEBUG_ENABLE_UHPY_SANITY_CHECK |
| 127 | + if (!HPy_IsNull(uh)) |
| 128 | + assert( (uh._i & 1) == 1 ); |
| 129 | +#endif |
| 130 | +} |
| 131 | + |
| 132 | +// NOTE: having a "generation" field is the easiest way to know when a handle |
| 133 | +// was created, but we waste 8 bytes per handle. Since all handles of the same |
| 134 | +// generation are stored sequentially in the open_handles list, a possible |
| 135 | +// alternative implementation is to put special placeholders inside the list |
| 136 | +// to mark the creation of a new generation |
| 137 | +typedef struct DebugHandle { |
| 138 | + UHPy uh; |
| 139 | + long generation; |
| 140 | + bool is_closed; |
| 141 | + // pointer to and size of any raw data associated with |
| 142 | + // the lifetime of the handle: |
| 143 | + void *associated_data; |
| 144 | + // allocation_stacktrace information if available |
| 145 | + char *allocation_stacktrace; |
| 146 | + HPy_ssize_t associated_data_size; |
| 147 | + struct DebugHandle *prev; |
| 148 | + struct DebugHandle *next; |
| 149 | +} DebugHandle; |
| 150 | + |
| 151 | +static inline DebugHandle * as_DebugHandle(DHPy dh) { |
| 152 | + DHPy_sanity_check(dh); |
| 153 | + return (DebugHandle *)dh._i; |
| 154 | +} |
| 155 | + |
| 156 | +static inline DHPy as_DHPy(DebugHandle *handle) { |
| 157 | + return (DHPy){(HPy_ssize_t)handle}; |
| 158 | +} |
| 159 | + |
| 160 | +DHPy DHPy_open(HPyContext *dctx, UHPy uh); |
| 161 | +void DHPy_close(HPyContext *dctx, DHPy dh); |
| 162 | +void DHPy_close_and_check(HPyContext *dctx, DHPy dh); |
| 163 | +void DHPy_free(HPyContext *dctx, DHPy dh); |
| 164 | +void DHPy_invalid_handle(HPyContext *dctx, DHPy dh); |
| 165 | + |
| 166 | +static inline UHPy DHPy_unwrap(HPyContext *dctx, DHPy dh) |
| 167 | +{ |
| 168 | + if (HPy_IsNull(dh)) |
| 169 | + return HPy_NULL; |
| 170 | + DebugHandle *handle = as_DebugHandle(dh); |
| 171 | + if (handle->is_closed) |
| 172 | + DHPy_invalid_handle(dctx, dh); |
| 173 | + return handle->uh; |
| 174 | +} |
| 175 | + |
| 176 | +/* === DHQueue === */ |
| 177 | + |
| 178 | +typedef struct { |
| 179 | + DebugHandle *head; |
| 180 | + DebugHandle *tail; |
| 181 | + HPy_ssize_t size; |
| 182 | +} DHQueue; |
| 183 | + |
| 184 | +void DHQueue_init(DHQueue *q); |
| 185 | +void DHQueue_append(DHQueue *q, DebugHandle *h); |
| 186 | +DebugHandle *DHQueue_popfront(DHQueue *q); |
| 187 | +void DHQueue_remove(DHQueue *q, DebugHandle *h); |
| 188 | +void DHQueue_sanity_check(DHQueue *q); |
| 189 | + |
| 190 | +/* === HPyDebugInfo === */ |
| 191 | + |
| 192 | +static const HPy_ssize_t DEFAULT_CLOSED_HANDLES_QUEUE_MAX_SIZE = 1024; |
| 193 | +static const HPy_ssize_t DEFAULT_PROTECTED_RAW_DATA_MAX_SIZE = 1024 * 1024 * 10; |
| 194 | + |
| 195 | +typedef struct { |
| 196 | + long magic_number; // used just for sanity checks |
| 197 | + HPyContext *uctx; |
| 198 | + long current_generation; |
| 199 | + |
| 200 | + // the following should be an HPyField, but it's complicate: |
| 201 | + // HPyFields should be used only on memory which is known by the GC, which |
| 202 | + // happens automatically if you use e.g. HPy_New, but currently |
| 203 | + // HPy_DebugInfo is malloced(). We need either: |
| 204 | + // 1. a generic HPy_GcMalloc() OR |
| 205 | + // 2. HPy_{Un}TrackMemory(), so that we can add manually allocated |
| 206 | + // memory as a GC root |
| 207 | + UHPy uh_on_invalid_handle; |
| 208 | + HPy_ssize_t closed_handles_queue_max_size; // configurable by the user |
| 209 | + HPy_ssize_t protected_raw_data_max_size; |
| 210 | + HPy_ssize_t protected_raw_data_size; |
| 211 | + // Limit for the stack traces captured for allocated handles |
| 212 | + // Value 0 implies that stack traces should not be captured |
| 213 | + HPy_ssize_t handle_alloc_stacktrace_limit; |
| 214 | + DHQueue open_handles; |
| 215 | + DHQueue closed_handles; |
| 216 | +} HPyDebugInfo; |
| 217 | + |
| 218 | +static inline HPyDebugInfo *get_info(HPyContext *dctx) |
| 219 | +{ |
| 220 | + HPyDebugInfo *info = (HPyDebugInfo*)dctx->_private; |
| 221 | + assert(info->magic_number == HPY_DEBUG_MAGIC); // sanity check |
| 222 | + return info; |
| 223 | +} |
| 224 | + |
| 225 | + |
| 226 | +void *raw_data_copy(const void* data, HPy_ssize_t size, bool write_protect); |
| 227 | +void raw_data_protect(void* data, HPy_ssize_t size); |
| 228 | +/* Return value: 0 indicates success, any different value indicates an error */ |
| 229 | +int raw_data_free(void *data, HPy_ssize_t size); |
| 230 | + |
| 231 | +void create_stacktrace(char **target, HPy_ssize_t max_frames_count); |
| 232 | + |
| 233 | +#endif /* HPY_DEBUG_INTERNAL_H */ |
0 commit comments