Skip to content

Commit 05d26bf

Browse files
committed
Use slices instead of locations
In the C API, we want to use slices instead of locations in the AST. In this case a "slice" is effectively the same thing as the location, expect it is represented using a 32-bit offset and a 32-bit length. This will cut down on half of the space of all of the locations in the AST. Note that from the Ruby/Java/JavaScript side, this is effectively an invisible change. This only impacts the C/Rust side.
1 parent c774ec2 commit 05d26bf

File tree

34 files changed

+1574
-1718
lines changed

34 files changed

+1574
-1718
lines changed

config.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -653,10 +653,6 @@ tokens:
653653
comment: "a separator between words in a list"
654654
- name: __END__
655655
comment: "marker for the point in the file at which the parser should stop"
656-
- name: MISSING
657-
comment: "a token that was expected but not found"
658-
- name: NOT_PROVIDED
659-
comment: "a token that was not present but it is okay"
660656
flags:
661657
- name: ArgumentsNodeFlags
662658
values:

docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ The available values for `type` are:
6161
* `constant` - A field that is an integer that represents an index in the constant pool. This is a `pm_constant_id_t` in C.
6262
* `constant[]` - A field that is an array of constants. This is a `pm_constant_id_list_t` in C.
6363
* `location` - A field that is a location. This is a `pm_location_t` in C.
64-
* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
64+
* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `length` field will be `0`.
6565
* `uint8` - A field that is an 8-bit unsigned integer. This is a `uint8_t` in C.
6666
* `uint32` - A field that is a 32-bit unsigned integer. This is a `uint32_t` in C.
6767

ext/prism/extension.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -455,23 +455,23 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free
455455
* Create a new Location instance from the given parser and bounds.
456456
*/
457457
static inline VALUE
458-
parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
459-
VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
458+
parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) {
459+
VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
460460
return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
461461
}
462462

463463
/**
464464
* Create a new Location instance from the given parser and location.
465465
*/
466-
#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
467-
parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
466+
#define PARSER_LOCATION(source, freeze, location) \
467+
parser_location(source, freeze, location.start, location.length)
468468

469469
/**
470470
* Build a new Comment instance from the given parser and comment.
471471
*/
472472
static inline VALUE
473-
parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
474-
VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
473+
parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) {
474+
VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) };
475475
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
476476
return rb_class_new_instance_freeze(1, argv, type, freeze);
477477
}
@@ -488,7 +488,7 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
488488
comment != NULL;
489489
comment = (const pm_comment_t *) comment->node.next
490490
) {
491-
VALUE value = parser_comment(parser, source, freeze, comment);
491+
VALUE value = parser_comment(source, freeze, comment);
492492
rb_ary_push(comments, value);
493493
}
494494

@@ -500,9 +500,9 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
500500
* Build a new MagicComment instance from the given parser and magic comment.
501501
*/
502502
static inline VALUE
503-
parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
504-
VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
505-
VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
503+
parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
504+
VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length);
505+
VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length);
506506
VALUE argv[] = { key_loc, value_loc };
507507
return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
508508
}
@@ -519,7 +519,7 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
519519
magic_comment != NULL;
520520
magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
521521
) {
522-
VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
522+
VALUE value = parser_magic_comment(source, freeze, magic_comment);
523523
rb_ary_push(magic_comments, value);
524524
}
525525

@@ -533,10 +533,10 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
533533
*/
534534
static VALUE
535535
parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
536-
if (parser->data_loc.end == NULL) {
536+
if (parser->data_loc.length == 0) {
537537
return Qnil;
538538
} else {
539-
return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
539+
return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length);
540540
}
541541
}
542542

@@ -554,7 +554,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo
554554
) {
555555
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
556556
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
557-
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
557+
VALUE location = PARSER_LOCATION(source, freeze, error->location);
558558

559559
VALUE level = Qnil;
560560
switch (error->level) {
@@ -594,7 +594,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source,
594594
) {
595595
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
596596
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
597-
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
597+
VALUE location = PARSER_LOCATION(source, freeze, warning->location);
598598

599599
VALUE level = Qnil;
600600
switch (warning->level) {

include/prism.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void
143143
/**
144144
* Serialize the given list of comments to the given buffer.
145145
*
146-
* @param parser The parser to serialize.
147146
* @param list The list of comments to serialize.
148147
* @param buffer The buffer to serialize to.
149148
*/
150-
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
149+
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer);
151150

152151
/**
153152
* Serialize the name of the encoding to the buffer.

include/prism/defines.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,37 @@
257257
#define PRISM_FALLTHROUGH
258258
#endif
259259

260+
/**
261+
* We need to align nodes in the AST to a pointer boundary so that it can be
262+
* safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to
263+
* specify alignment in a compiler-agnostic way.
264+
*/
265+
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */
266+
#include <stdalign.h>
267+
268+
/** Specify alignment for a type or variable. */
269+
#define PRISM_ALIGNAS(size) alignas(size)
270+
271+
/** Get the alignment requirement of a type. */
272+
#define PRISM_ALIGNOF(type) alignof(type)
273+
#elif defined(__GNUC__) || defined(__clang__)
274+
/** Specify alignment for a type or variable. */
275+
#define PRISM_ALIGNAS(size) __attribute__((aligned(size)))
276+
277+
/** Get the alignment requirement of a type. */
278+
#define PRISM_ALIGNOF(type) __alignof__(type)
279+
#elif defined(_MSC_VER)
280+
/** Specify alignment for a type or variable. */
281+
#define PRISM_ALIGNAS(size) __declspec(align(size))
282+
283+
/** Get the alignment requirement of a type. */
284+
#define PRISM_ALIGNOF(type) __alignof(type)
285+
#else
286+
/** Void because this platform does not support specifying alignment. */
287+
#define PRISM_ALIGNAS(size)
288+
289+
/** Fallback to sizeof as alignment requirement of a type. */
290+
#define PRISM_ALIGNOF(type) sizeof(type)
291+
#endif
292+
260293
#endif

include/prism/parser.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -479,17 +479,11 @@ typedef struct {
479479
/** The embedded base node. */
480480
pm_list_node_t node;
481481

482-
/** A pointer to the start of the key in the source. */
483-
const uint8_t *key_start;
482+
/** The key of the magic comment. */
483+
pm_location_t key;
484484

485-
/** A pointer to the start of the value in the source. */
486-
const uint8_t *value_start;
487-
488-
/** The length of the key in the source. */
489-
uint32_t key_length;
490-
491-
/** The length of the value in the source. */
492-
uint32_t value_length;
485+
/** The value of the magic comment. */
486+
pm_location_t value;
493487
} pm_magic_comment_t;
494488

495489
/**

include/prism/static_literals.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,14 @@ typedef struct {
9292
* Add a node to the set of static literals.
9393
*
9494
* @param newline_list The list of newline offsets to use to calculate lines.
95+
* @param start The start of the source being parsed.
9596
* @param start_line The line number that the parser starts on.
9697
* @param literals The set of static literals to add the node to.
9798
* @param node The node to add to the set.
9899
* @param replace Whether to replace the previous node if one already exists.
99100
* @return A pointer to the node that is being overwritten, if there is one.
100101
*/
101-
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
102+
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
102103

103104
/**
104105
* Free the internal memory associated with the given static literals set.
@@ -112,10 +113,11 @@ void pm_static_literals_free(pm_static_literals_t *literals);
112113
*
113114
* @param buffer The buffer to write the string to.
114115
* @param newline_list The list of newline offsets to use to calculate lines.
116+
* @param start The start of the source being parsed.
115117
* @param start_line The line number that the parser starts on.
116118
* @param encoding_name The name of the encoding of the source being parsed.
117119
* @param node The node to create a string representation of.
118120
*/
119-
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
121+
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node);
120122

121123
#endif

include/prism/util/pm_char.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
3131
* @param string The string to search.
3232
* @param length The maximum number of characters to search.
3333
* @param newline_list The list of newlines to populate.
34+
* @param start_offset The offset at which the string occurs in the source, for
35+
* the purpose of tracking newlines.
3436
* @return The number of characters at the start of the string that are
3537
* whitespace.
3638
*/
37-
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
39+
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset);
3840

3941
/**
4042
* Returns the number of characters at the start of the string that are inline

include/prism/util/pm_newline_list.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,14 @@
2626
* sorted/inserted in ascending order.
2727
*/
2828
typedef struct {
29-
/** A pointer to the start of the source string. */
30-
const uint8_t *start;
31-
3229
/** The number of offsets in the list. */
3330
size_t size;
3431

3532
/** The capacity of the list that has been allocated. */
3633
size_t capacity;
3734

3835
/** The list of offsets. */
39-
size_t *offsets;
36+
uint32_t *offsets;
4037
} pm_newline_list_t;
4138

4239
/**
@@ -55,53 +52,51 @@ typedef struct {
5552
* allocation of the offsets succeeds, otherwise returns false.
5653
*
5754
* @param list The list to initialize.
58-
* @param start A pointer to the start of the source string.
5955
* @param capacity The initial capacity of the list.
6056
* @return True if the allocation of the offsets succeeds, otherwise false.
6157
*/
62-
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
58+
bool pm_newline_list_init(pm_newline_list_t *list, size_t capacity);
6359

6460
/**
6561
* Clear out the newlines that have been appended to the list.
6662
*
6763
* @param list The list to clear.
6864
*/
69-
void
70-
pm_newline_list_clear(pm_newline_list_t *list);
65+
void pm_newline_list_clear(pm_newline_list_t *list);
7166

7267
/**
7368
* Append a new offset to the newline list. Returns true if the reallocation of
7469
* the offsets succeeds (if one was necessary), otherwise returns false.
7570
*
7671
* @param list The list to append to.
77-
* @param cursor A pointer to the offset to append.
72+
* @param cursor The offset to append.
7873
* @return True if the reallocation of the offsets succeeds (if one was
7974
* necessary), otherwise false.
8075
*/
81-
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
76+
bool pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor);
8277

8378
/**
8479
* Returns the line of the given offset. If the offset is not in the list, the
8580
* line of the closest offset less than the given offset is returned.
8681
*
8782
* @param list The list to search.
88-
* @param cursor A pointer to the offset to search for.
83+
* @param cursor The offset to search for.
8984
* @param start_line The line to start counting from.
9085
* @return The line of the given offset.
9186
*/
92-
int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
87+
int32_t pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);
9388

9489
/**
9590
* Returns the line and column of the given offset. If the offset is not in the
9691
* list, the line and column of the closest offset less than the given offset
9792
* are returned.
9893
*
9994
* @param list The list to search.
100-
* @param cursor A pointer to the offset to search for.
95+
* @param cursor The offset to search for.
10196
* @param start_line The line to start counting from.
10297
* @return The line and column of the given offset.
10398
*/
104-
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
99+
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);
105100

106101
/**
107102
* Free the internal memory allocated for the newline list.

lib/prism/translation/parser/compiler.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1767,7 +1767,7 @@ def visit_symbol_node(node)
17671767
end
17681768
else
17691769
parts =
1770-
if node.value == ""
1770+
if node.value_loc.nil?
17711771
[]
17721772
elsif node.value.include?("\n")
17731773
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)

0 commit comments

Comments
 (0)