Skip to content

Commit b520f14

Browse files
authored
Merge pull request #2221 from Shopify/pure-C-constant-pool
Reimplement the RBS Constant Pool in pure C
2 parents 1b66e98 + 87714ba commit b520f14

File tree

5 files changed

+455
-66
lines changed

5 files changed

+455
-66
lines changed

ext/rbs_extension/location.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,13 @@ static VALUE location_end_pos(VALUE self) {
168168
return INT2FIX(loc->rg.end);
169169
}
170170

171-
static rbs_constant_id_t rbs_find_constant_id_from_ruby_symbol(VALUE symbol) {
171+
static rbs_constant_id_t rbs_constant_pool_insert_ruby_symbol(VALUE symbol) {
172172
VALUE name = rb_sym2str(symbol);
173173

174-
return rbs_constant_pool_find(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name));
174+
// Constants inserted here will never be freed, but that's acceptable because:
175+
// 1. Most symbols passed into here will be the ones already inserted into the constant pool by `parser.c`.
176+
// 2. Methods like `add_required_child` and `add_optional_child` will usually only get called with a few different symbols.
177+
return rbs_constant_pool_insert_constant(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name));
175178
}
176179

177180
static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VALUE end) {
@@ -181,7 +184,7 @@ static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VA
181184
rg.start = rbs_loc_position(FIX2INT(start));
182185
rg.end = rbs_loc_position(FIX2INT(end));
183186

184-
rbs_loc_add_required_child(loc, rbs_find_constant_id_from_ruby_symbol(name), rg);
187+
rbs_loc_add_required_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg);
185188

186189
return Qnil;
187190
}
@@ -193,15 +196,15 @@ static VALUE location_add_optional_child(VALUE self, VALUE name, VALUE start, VA
193196
rg.start = rbs_loc_position(FIX2INT(start));
194197
rg.end = rbs_loc_position(FIX2INT(end));
195198

196-
rbs_loc_add_optional_child(loc, rbs_find_constant_id_from_ruby_symbol(name), rg);
199+
rbs_loc_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg);
197200

198201
return Qnil;
199202
}
200203

201204
static VALUE location_add_optional_no_child(VALUE self, VALUE name) {
202205
rbs_loc *loc = rbs_check_location(self);
203206

204-
rbs_loc_add_optional_child(loc, rbs_find_constant_id_from_ruby_symbol(name), NULL_RANGE);
207+
rbs_loc_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), NULL_RANGE);
205208

206209
return Qnil;
207210
}
@@ -224,10 +227,16 @@ static VALUE rbs_new_location_from_loc_range(VALUE buffer, rbs_loc_range rg) {
224227
return obj;
225228
}
226229

230+
static rbs_constant_id_t rbs_constant_pool_find_ruby_symbol(VALUE symbol) {
231+
VALUE name = rb_sym2str(symbol);
232+
233+
return rbs_constant_pool_find(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name));
234+
}
235+
227236
static VALUE location_aref(VALUE self, VALUE name) {
228237
rbs_loc *loc = rbs_check_location(self);
229238

230-
rbs_constant_id_t id = rbs_find_constant_id_from_ruby_symbol(name);
239+
rbs_constant_id_t id = rbs_constant_pool_find_ruby_symbol(name);
231240

232241
if (loc->children != NULL && id != RBS_CONSTANT_ID_UNSET) {
233242
for (unsigned short i = 0; i < loc->children->len; i++) {
@@ -248,8 +257,7 @@ static VALUE location_aref(VALUE self, VALUE name) {
248257
}
249258

250259
static VALUE rbs_constant_to_ruby_symbol(rbs_constant_t *constant) {
251-
// Casts back the Ruby Symbol that was inserted by `rbs_constant_pool_insert_constant()`.
252-
return (VALUE) constant;
260+
return ID2SYM(rb_intern2((const char *) constant->start, constant->length));
253261
}
254262

255263
static VALUE location_optional_keys(VALUE self) {

ext/rbs_extension/main.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ Init_rbs_extension(void)
1818
rbs__init_location();
1919
rbs__init_parser();
2020

21-
rbs_constant_pool_init(RBS_GLOBAL_CONSTANT_POOL, 0);
21+
// Calculated based on the number of unique strings used with the `INTERN` macro in `parser.c`.
22+
//
23+
// ```bash
24+
// grep -o 'INTERN("\([^"]*\)")' ext/rbs_extension/parser.c \
25+
// | sed 's/INTERN("\(.*\)")/\1/' \
26+
// | sort -u \
27+
// | wc -l
28+
// ```
29+
const size_t num_uniquely_interned_strings = 26;
30+
rbs_constant_pool_init(RBS_GLOBAL_CONSTANT_POOL, num_uniquely_interned_strings);
31+
2232
ruby_vm_at_exit(Deinit_rbs_extension);
2333
}

ext/rbs_extension/parserstate.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,25 @@ parserstate *alloc_parser(VALUE buffer, lexstate *lexer, int start_pos, int end_
337337
.constant_pool = {},
338338
};
339339

340-
rbs_constant_pool_init(&parser->constant_pool, 0);
340+
// The parser's constant pool is mainly used for storing the names of type variables, which usually aren't many.
341+
// Below are some statistics gathered from the current test suite. We can see that 56% of parsers never add to their
342+
// constant pool at all. The initial capacity needs to be a power of 2. Picking 2 means that we won't need to realloc
343+
// in 85% of cases.
344+
//
345+
// TODO: recalculate these statistics based on a real world codebase, rather than the test suite.
346+
//
347+
// | Size | Count | Cumulative | % Coverage |
348+
// |------|-------|------------|------------|
349+
// | 0 | 7,862 | 7,862 | 56% |
350+
// | 1 | 3,196 | 11,058 | 79% |
351+
// | 2 | 778 | 12,719 | 85% |
352+
// | 3 | 883 | 11,941 | 91% |
353+
// | 4 | 478 | 13,197 | 95% |
354+
// | 5 | 316 | 13,513 | 97% |
355+
// | 6 | 288 | 13,801 | 99% |
356+
// | 7 | 144 | 13,945 | 100% |
357+
const size_t initial_pool_capacity = 2;
358+
rbs_constant_pool_init(&parser->constant_pool, initial_pool_capacity);
341359

342360
parser_advance(parser);
343361
parser_advance(parser);

include/rbs/util/rbs_constant_pool.h

Lines changed: 123 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,124 @@
2626
/**
2727
* A constant id is a unique identifier for a constant in the constant pool.
2828
*/
29-
typedef uintptr_t rbs_constant_id_t;
29+
typedef uint32_t rbs_constant_id_t;
30+
31+
/**
32+
* A list of constant IDs. Usually used to represent a set of locals.
33+
*/
34+
typedef struct {
35+
/** The number of constant ids in the list. */
36+
size_t size;
37+
38+
/** The number of constant ids that have been allocated in the list. */
39+
size_t capacity;
40+
41+
/** The constant ids in the list. */
42+
rbs_constant_id_t *ids;
43+
} rbs_constant_id_list_t;
44+
45+
/**
46+
* Initialize a list of constant ids.
47+
*
48+
* @param list The list to initialize.
49+
*/
50+
void rbs_constant_id_list_init(rbs_constant_id_list_t *list);
51+
52+
/**
53+
* Initialize a list of constant ids with a given capacity.
54+
*
55+
* @param list The list to initialize.
56+
* @param capacity The initial capacity of the list.
57+
*/
58+
void rbs_constant_id_list_init_capacity(rbs_constant_id_list_t *list, size_t capacity);
59+
60+
/**
61+
* Append a constant id to a list of constant ids. Returns false if any
62+
* potential reallocations fail.
63+
*
64+
* @param list The list to append to.
65+
* @param id The id to append.
66+
* @return Whether the append succeeded.
67+
*/
68+
bool rbs_constant_id_list_append(rbs_constant_id_list_t *list, rbs_constant_id_t id);
69+
70+
/**
71+
* Insert a constant id into a list of constant ids at the specified index.
72+
*
73+
* @param list The list to insert into.
74+
* @param index The index at which to insert.
75+
* @param id The id to insert.
76+
*/
77+
void rbs_constant_id_list_insert(rbs_constant_id_list_t *list, size_t index, rbs_constant_id_t id);
78+
79+
/**
80+
* Checks if the current constant id list includes the given constant id.
81+
*
82+
* @param list The list to check.
83+
* @param id The id to check for.
84+
* @return Whether the list includes the given id.
85+
*/
86+
bool rbs_constant_id_list_includes(rbs_constant_id_list_t *list, rbs_constant_id_t id);
87+
88+
/**
89+
* Free the memory associated with a list of constant ids.
90+
*
91+
* @param list The list to free.
92+
*/
93+
void rbs_constant_id_list_free(rbs_constant_id_list_t *list);
94+
95+
/**
96+
* The type of bucket in the constant pool hash map. This determines how the
97+
* bucket should be freed.
98+
*/
99+
typedef unsigned int rbs_constant_pool_bucket_type_t;
100+
101+
/** By default, each constant is a slice of the source. */
102+
static const rbs_constant_pool_bucket_type_t RBS_CONSTANT_POOL_BUCKET_DEFAULT = 0;
103+
104+
/** An owned constant is one for which memory has been allocated. */
105+
static const rbs_constant_pool_bucket_type_t RBS_CONSTANT_POOL_BUCKET_OWNED = 1;
106+
107+
/** A constant constant is known at compile time. */
108+
static const rbs_constant_pool_bucket_type_t RBS_CONSTANT_POOL_BUCKET_CONSTANT = 2;
109+
110+
/** A bucket in the hash map. */
111+
typedef struct {
112+
/** The incremental ID used for indexing back into the pool. */
113+
unsigned int id: 30;
114+
115+
/** The type of the bucket, which determines how to free it. */
116+
rbs_constant_pool_bucket_type_t type: 2;
117+
118+
/** The hash of the bucket. */
119+
uint32_t hash;
120+
} rbs_constant_pool_bucket_t;
30121

31122
/** A constant in the pool which effectively stores a string. */
32-
typedef uintptr_t rbs_constant_t;
123+
typedef struct {
124+
/** A pointer to the start of the string. */
125+
const uint8_t *start;
126+
127+
/** The length of the string. */
128+
size_t length;
129+
} rbs_constant_t;
33130

34131
/** The overall constant pool, which stores constants found while parsing. */
35132
typedef struct {
36-
void *dummy; // Workaround for structs not being allowed to be empty.
133+
/** The buckets in the hash map. */
134+
rbs_constant_pool_bucket_t *buckets;
135+
136+
/** The constants that are stored in the buckets. */
137+
rbs_constant_t *constants;
138+
139+
/** The number of buckets in the hash map. */
140+
uint32_t size;
141+
142+
/** The number of buckets that have been allocated in the hash map. */
143+
uint32_t capacity;
37144
} rbs_constant_pool_t;
38145

39-
// A temporary stand-in for the constant pool until start using a real implementation.
40-
// For now, it just defers to Ruby's ID interning mechanism (`rb_intern3`).
146+
// A global constant pool for storing permenant keywords, such as the names of location children in `parser.c`.
41147
extern rbs_constant_pool_t *RBS_GLOBAL_CONSTANT_POOL;
42148

43149
/**
@@ -80,6 +186,18 @@ rbs_constant_id_t rbs_constant_pool_find(const rbs_constant_pool_t *pool, const
80186
*/
81187
rbs_constant_id_t rbs_constant_pool_insert_shared(rbs_constant_pool_t *pool, const uint8_t *start, size_t length);
82188

189+
/**
190+
* Insert a constant into a constant pool from memory that is now owned by the
191+
* constant pool. Returns the id of the constant, or 0 if any potential calls to
192+
* resize fail.
193+
*
194+
* @param pool The pool to insert the constant into.
195+
* @param start A pointer to the start of the constant.
196+
* @param length The length of the constant.
197+
* @return The id of the constant.
198+
*/
199+
rbs_constant_id_t rbs_constant_pool_insert_owned(rbs_constant_pool_t *pool, uint8_t *start, size_t length);
200+
83201
/**
84202
* Insert a constant into a constant pool from memory that is constant. Returns
85203
* the id of the constant, or 0 if any potential calls to resize fail.

0 commit comments

Comments
 (0)