diff --git a/docs/ruby_api.md b/docs/ruby_api.md index 394d322916..6cfde4c1f3 100644 --- a/docs/ruby_api.md +++ b/docs/ruby_api.md @@ -23,7 +23,7 @@ The full API is documented below. * `Prism.parse_stream(io)` - parse the syntax tree corresponding to the source that is read out of the given IO object using the `#gets` method and return it within a parse result * `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens * `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens -* `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree +* `Prism.load(source, serialized, freeze = false)` - load the serialized syntax tree using the source as a reference into a syntax tree * `Prism.parse_comments(source)` - parse the comments corresponding to the given source string and return them * `Prism.parse_file_comments(source)` - parse the comments corresponding to the given source file and return them * `Prism.parse_success?(source)` - parse the syntax tree corresponding to the given source string and return true if it was parsed without errors diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 26415c2b6d..e8f678d341 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -30,6 +30,7 @@ VALUE rb_cPrismDebugEncoding; ID rb_id_option_command_line; ID rb_id_option_encoding; ID rb_id_option_filepath; +ID rb_id_option_freeze; ID rb_id_option_frozen_string_literal; ID rb_id_option_line; ID rb_id_option_main_script; @@ -180,6 +181,8 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value)); } else if (key_id == rb_id_option_partial_script) { if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value)); + } else if (key_id == rb_id_option_freeze) { + if (!NIL_P(value)) pm_options_freeze_set(options, RTEST(value)); } else { rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key); } @@ -344,6 +347,7 @@ dump(int argc, VALUE *argv, VALUE self) { #endif VALUE value = dump_input(&input, &options); + if (options.freeze) rb_obj_freeze(value); #ifdef PRISM_BUILD_DEBUG xfree(dup); @@ -383,56 +387,90 @@ dump_file(int argc, VALUE *argv, VALUE self) { /* Extracting values for the parse result */ /******************************************************************************/ +/** + * The same as rb_class_new_instance, but accepts an additional boolean to + * indicate whether or not the resulting class instance should be frozen. + */ +static inline VALUE +rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool freeze) { + VALUE value = rb_class_new_instance(argc, argv, klass); + if (freeze) rb_obj_freeze(value); + return value; +} + +/** + * Create a new Location instance from the given parser and bounds. + */ +static inline VALUE +parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) { + VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) }; + return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze); +} + +/** + * Create a new Location instance from the given parser and location. + */ +#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \ + parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start)) + +/** + * Build a new Comment instance from the given parser and comment. + */ +static inline VALUE +parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) { + VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) }; + VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; + return rb_class_new_instance_freeze(1, argv, type, freeze); +} + /** * Extract the comments out of the parser into an array. */ static VALUE -parser_comments(pm_parser_t *parser, VALUE source) { +parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { VALUE comments = rb_ary_new_capa(parser->comment_list.size); - for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) { - VALUE location_argv[] = { - source, - LONG2FIX(comment->location.start - parser->start), - LONG2FIX(comment->location.end - comment->location.start) - }; - - VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; - VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) }; - rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type)); + for ( + const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head; + comment != NULL; + comment = (const pm_comment_t *) comment->node.next + ) { + VALUE value = parser_comment(parser, source, freeze, comment); + rb_ary_push(comments, value); } + if (freeze) rb_obj_freeze(comments); return comments; } +/** + * Build a new MagicComment instance from the given parser and magic comment. + */ +static inline VALUE +parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { + VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length); + VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length); + VALUE argv[] = { key_loc, value_loc }; + return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze); +} + /** * Extract the magic comments out of the parser into an array. */ static VALUE -parser_magic_comments(pm_parser_t *parser, VALUE source) { +parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size); - for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { - VALUE key_loc_argv[] = { - source, - LONG2FIX(magic_comment->key_start - parser->start), - LONG2FIX(magic_comment->key_length) - }; - - VALUE value_loc_argv[] = { - source, - LONG2FIX(magic_comment->value_start - parser->start), - LONG2FIX(magic_comment->value_length) - }; - - VALUE magic_comment_argv[] = { - rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation), - rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation) - }; - - rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment)); + for ( + const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head; + magic_comment != NULL; + magic_comment = (const pm_magic_comment_t *) magic_comment->node.next + ) { + VALUE value = parser_magic_comment(parser, source, freeze, magic_comment); + rb_ary_push(magic_comments, value); } + if (freeze) rb_obj_freeze(magic_comments); return magic_comments; } @@ -441,17 +479,11 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) { * exists. */ static VALUE -parser_data_loc(const pm_parser_t *parser, VALUE source) { +parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { if (parser->data_loc.end == NULL) { return Qnil; } else { - VALUE argv[] = { - source, - LONG2FIX(parser->data_loc.start - parser->start), - LONG2FIX(parser->data_loc.end - parser->data_loc.start) - }; - - return rb_class_new_instance(3, argv, rb_cPrismLocation); + return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc); } } @@ -459,16 +491,17 @@ parser_data_loc(const pm_parser_t *parser, VALUE source) { * Extract the errors out of the parser into an array. */ static VALUE -parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { +parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { VALUE errors = rb_ary_new_capa(parser->error_list.size); - pm_diagnostic_t *error; - for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { - VALUE location_argv[] = { - source, - LONG2FIX(error->location.start - parser->start), - LONG2FIX(error->location.end - error->location.start) - }; + for ( + const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head; + error != NULL; + error = (const pm_diagnostic_t *) error->node.next + ) { + VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding)); + VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location); VALUE level = Qnil; switch (error->level) { @@ -485,16 +518,12 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level); } - VALUE error_argv[] = { - ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))), - rb_enc_str_new_cstr(error->message, encoding), - rb_class_new_instance(3, location_argv, rb_cPrismLocation), - level - }; - - rb_ary_push(errors, rb_class_new_instance(4, error_argv, rb_cPrismParseError)); + VALUE argv[] = { type, message, location, level }; + VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze); + rb_ary_push(errors, value); } + if (freeze) rb_obj_freeze(errors); return errors; } @@ -502,16 +531,17 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { * Extract the warnings out of the parser into an array. */ static VALUE -parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { +parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { VALUE warnings = rb_ary_new_capa(parser->warning_list.size); - pm_diagnostic_t *warning; - for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) { - VALUE location_argv[] = { - source, - LONG2FIX(warning->location.start - parser->start), - LONG2FIX(warning->location.end - warning->location.start) - }; + for ( + const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head; + warning != NULL; + warning = (const pm_diagnostic_t *) warning->node.next + ) { + VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding)); + VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location); VALUE level = Qnil; switch (warning->level) { @@ -525,16 +555,12 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level); } - VALUE warning_argv[] = { - ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))), - rb_enc_str_new_cstr(warning->message, encoding), - rb_class_new_instance(3, location_argv, rb_cPrismLocation), - level - }; - - rb_ary_push(warnings, rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning)); + VALUE argv[] = { type, message, location, level }; + VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze); + rb_ary_push(warnings, value); } + if (freeze) rb_obj_freeze(warnings); return warnings; } @@ -542,18 +568,18 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { * Create a new parse result from the given parser, value, encoding, and source. */ static VALUE -parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) { +parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) { VALUE result_argv[] = { value, - parser_comments(parser, source), - parser_magic_comments(parser, source), - parser_data_loc(parser, source), - parser_errors(parser, encoding, source), - parser_warnings(parser, encoding, source), + parser_comments(parser, source, freeze), + parser_magic_comments(parser, source, freeze), + parser_data_loc(parser, source, freeze), + parser_errors(parser, encoding, source, freeze), + parser_warnings(parser, encoding, source, freeze), source }; - return rb_class_new_instance(7, result_argv, class); + return rb_class_new_instance_freeze(7, result_argv, class, freeze); } /******************************************************************************/ @@ -569,6 +595,7 @@ typedef struct { VALUE source; VALUE tokens; rb_encoding *encoding; + bool freeze; } parse_lex_data_t; /** @@ -580,10 +607,13 @@ static void parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; - VALUE yields = rb_assoc_new( - pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source), - INT2FIX(parser->lex_state) - ); + VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze); + VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state)); + + if (parse_lex_data->freeze) { + rb_obj_freeze(value); + rb_obj_freeze(yields); + } rb_ary_push(parse_lex_data->tokens, yields); } @@ -603,14 +633,37 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { // one or two tokens, since the encoding can only change at the top of the // file. VALUE tokens = parse_lex_data->tokens; + VALUE next_tokens = rb_ary_new(); + for (long index = 0; index < RARRAY_LEN(tokens); index++) { VALUE yields = rb_ary_entry(tokens, index); VALUE token = rb_ary_entry(yields, 0); VALUE value = rb_ivar_get(token, rb_intern("@value")); - rb_enc_associate(value, parse_lex_data->encoding); - ENC_CODERANGE_CLEAR(value); + VALUE next_value = rb_str_dup(value); + + rb_enc_associate(next_value, parse_lex_data->encoding); + if (parse_lex_data->freeze) rb_obj_freeze(next_value); + + VALUE next_token_argv[] = { + parse_lex_data->source, + rb_ivar_get(token, rb_intern("@type")), + next_value, + rb_ivar_get(token, rb_intern("@location")) + }; + + VALUE next_token = rb_class_new_instance(4, next_token_argv, rb_cPrismToken); + VALUE next_yields = rb_assoc_new(next_token, rb_ary_entry(yields, 1)); + + if (parse_lex_data->freeze) { + rb_obj_freeze(next_token); + rb_obj_freeze(next_yields); + } + + rb_ary_push(next_tokens, next_yields); } + + rb_ary_replace(parse_lex_data->tokens, next_tokens); } /** @@ -630,7 +683,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod parse_lex_data_t parse_lex_data = { .source = source, .tokens = rb_ary_new(), - .encoding = rb_utf8_encoding() + .encoding = rb_utf8_encoding(), + .freeze = options->freeze, }; parse_lex_data_t *data = &parse_lex_data; @@ -653,14 +707,22 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index])); } + if (options->freeze) { + rb_obj_freeze(source_string); + rb_obj_freeze(offsets); + rb_obj_freeze(source); + rb_obj_freeze(parse_lex_data.tokens); + } + VALUE result; if (return_nodes) { VALUE value = rb_ary_new_capa(2); - rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source)); + rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze)); rb_ary_push(value, parse_lex_data.tokens); - result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source); + if (options->freeze) rb_obj_freeze(value); + result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze); } else { - result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source); + result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze); } pm_node_destroy(&parser, node); @@ -726,9 +788,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) { pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding->name); - VALUE source = pm_source_new(&parser, encoding); - VALUE value = pm_ast_new(&parser, node, encoding, source); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ; + VALUE source = pm_source_new(&parser, encoding, options->freeze); + VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze); + VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze); + + if (options->freeze) { + rb_obj_freeze(source); + } pm_node_destroy(&parser, node); pm_parser_free(&parser); @@ -750,6 +816,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) { * encoding or nil. * * `filepath` - the filepath of the source being parsed. This should be a * string or nil. + * * `freeze` - whether or not to deeply freeze the AST. This should be a + * boolean or nil. * * `frozen_string_literal` - whether or not the frozen string literal pragma * has been set. This should be a boolean or nil. * * `line` - the line number that the parse starts on. This should be an @@ -769,12 +837,12 @@ parse_input(pm_string_t *input, const pm_options_t *options) { * parsed. This should be an array of arrays of symbols or nil. Scopes are * ordered from the outermost scope to the innermost one. * * `version` - the version of Ruby syntax that prism should used to parse Ruby - * code. By default prism assumes you want to parse with the latest version - * of Ruby syntax (which you can trigger with `nil` or `"latest"`). You - * may also restrict the syntax to a specific version of Ruby, e.g., with `"3.3.0"`. - * To parse with the same syntax version that the current Ruby is running - * use `version: RUBY_VERSION`. Raises ArgumentError if the version is not - * currently supported by Prism. + * code. By default prism assumes you want to parse with the latest + * version of Ruby syntax (which you can trigger with `nil` or + * `"latest"`). You may also restrict the syntax to a specific version of + * Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that + * the current Ruby is running use `version: RUBY_VERSION`. Raises + * ArgumentError if the version is not currently supported by Prism. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { @@ -922,9 +990,9 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options); rb_encoding *encoding = rb_enc_find(parser.encoding->name); - VALUE source = pm_source_new(&parser, encoding); - VALUE value = pm_ast_new(&parser, node, encoding, source); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source); + VALUE source = pm_source_new(&parser, encoding, options.freeze); + VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze); + VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze); pm_node_destroy(&parser, node); pm_buffer_free(&buffer); @@ -944,8 +1012,8 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding->name); - VALUE source = pm_source_new(&parser, encoding); - VALUE comments = parser_comments(&parser, source); + VALUE source = pm_source_new(&parser, encoding, options->freeze); + VALUE comments = parser_comments(&parser, source, options->freeze); pm_node_destroy(&parser, node); pm_parser_free(&parser); @@ -1240,6 +1308,7 @@ Init_prism(void) { rb_id_option_command_line = rb_intern_const("command_line"); rb_id_option_encoding = rb_intern_const("encoding"); rb_id_option_filepath = rb_intern_const("filepath"); + rb_id_option_freeze = rb_intern_const("freeze"); rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal"); rb_id_option_line = rb_intern_const("line"); rb_id_option_main_script = rb_intern_const("main_script"); diff --git a/ext/prism/extension.h b/ext/prism/extension.h index 6fe3965d9a..9b0f554831 100644 --- a/ext/prism/extension.h +++ b/ext/prism/extension.h @@ -7,9 +7,9 @@ #include #include "prism.h" -VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding); -VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source); -VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source); +VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze); +VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze); +VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze); VALUE pm_integer_new(const pm_integer_t *integer); void Init_prism_api_node(void); diff --git a/include/prism/options.h b/include/prism/options.h index c40b4d828a..45eb81caa8 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -160,6 +160,13 @@ typedef struct pm_options { * inside another script. */ bool partial_script; + + /** + * Whether or not the parser should freeze the nodes that it creates. This + * makes it possible to have a deeply frozen AST that is safe to share + * between concurrency primitives. + */ + bool freeze; } pm_options_t; /** @@ -285,6 +292,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b */ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script); +/** + * Set the freeze option on the given options struct. + * + * @param options The options struct to set the freeze value on. + * @param freeze The freeze value to set. + */ +PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze); + /** * Allocate and zero out the scopes array on the given options struct. * @@ -355,6 +370,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * | `1` | encoding locked | * | `1` | main script | * | `1` | partial script | + * | `1` | freeze | * | `4` | the number of scopes | * | ... | the scopes | * diff --git a/java/org/prism/ParsingOptions.java b/java/org/prism/ParsingOptions.java index 379d0d2202..0fc9d03e47 100644 --- a/java/org/prism/ParsingOptions.java +++ b/java/org/prism/ParsingOptions.java @@ -82,6 +82,9 @@ public static byte[] serialize(byte[] filepath, int line, byte[] encoding, boole // partialScript output.write(partialScript ? 1 : 0); + // freeze + output.write(0); + // scopes // number of scopes diff --git a/javascript/src/parsePrism.js b/javascript/src/parsePrism.js index 8a613767ed..1d0233e9c9 100644 --- a/javascript/src/parsePrism.js +++ b/javascript/src/parsePrism.js @@ -122,6 +122,9 @@ function dumpOptions(options) { template.push("C"); values.push(dumpBooleanOption(options.partial_script)); + template.push("C"); + values.push(0); + template.push("L"); if (options.scopes) { const scopes = options.scopes; diff --git a/lib/prism.rb b/lib/prism.rb index 94f4c8ca5f..6cae171f5e 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -59,11 +59,11 @@ def self.lex_ripper(source) end # :call-seq: - # Prism::load(source, serialized) -> ParseResult + # Prism::load(source, serialized, freeze) -> ParseResult # # Load the serialized AST using the source as a reference into a tree. - def self.load(source, serialized) - Serialize.load(source, serialized) + def self.load(source, serialized, freeze = false) + Serialize.load_parse(source, serialized, freeze) end end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 9ab1557838..eda61b3ead 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -15,7 +15,8 @@ module LibRubyParser # :nodoc: # must align with the build shared library from make/rake. libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" - if File.exist? libprism_in_build + + if File.exist?(libprism_in_build) INCLUDE_DIR = File.expand_path("../../include", __dir__) ffi_lib libprism_in_build else @@ -279,7 +280,7 @@ def parse_stream(stream, **options) # access to the IO object already through the closure of the lambda, we # can pass a null pointer here and not worry. LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) - Prism.load(source, buffer.read) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) end end @@ -354,50 +355,37 @@ def profile_file(filepath, **options) def dump_common(string, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - buffer.read + + dumped = buffer.read + dumped.freeze if options.fetch(:freeze, false) + + dumped end end def lex_common(string, code, options) # :nodoc: - serialized = LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - buffer.read + Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) end - - Serialize.load_tokens(Source.for(code), serialized) end def parse_common(string, code, options) # :nodoc: serialized = dump_common(string, options) - Prism.load(code, serialized) + Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) end def parse_comments_common(string, code, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - - source = Source.for(code) - loader = Serialize::Loader.new(source, buffer.read) - - loader.load_header - loader.load_encoding - loader.load_start_line - loader.load_comments + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) end end def parse_lex_common(string, code, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - - source = Source.for(code) - loader = Serialize::Loader.new(source, buffer.read) - - tokens = loader.load_tokens - node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes - tokens.each { |token,| token.value.force_encoding(loader.encoding) } - - ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) end end @@ -482,6 +470,9 @@ def dump_options(options) template << "C" values << (options.fetch(:partial_script, false) ? 1 : 0) + template << "C" + values << (options.fetch(:freeze, false) ? 1 : 0) + template << "L" if (scopes = options[:scopes]) values << scopes.length diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 7aee20c9de..e76ea7e17e 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -48,6 +48,16 @@ def initialize(source, start_line = 1, offsets = []) @offsets = offsets # set after parsing is done end + # Replace the value of start_line with the given value. + def replace_start_line(start_line) + @start_line = start_line + end + + # Replace the value of offsets with the given value. + def replace_offsets(offsets) + @offsets.replace(offsets) + end + # Returns the encoding of the source code, which is set by parameters to the # parser or by the encoding magic comment. def encoding @@ -132,6 +142,13 @@ def code_units_column(byte_offset, encoding) code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) end + # Freeze this object and the objects it contains. + def deep_freeze + source.freeze + offsets.freeze + freeze + end + private # Binary search through the offsets to find the line number for the given @@ -854,5 +871,12 @@ def inspect location super end + + # Freeze this object and the objects it contains. + def deep_freeze + value.freeze + location.freeze + freeze + end end end diff --git a/rbi/prism.rbi b/rbi/prism.rbi index d7838a8279..1ba5d1fc3d 100644 --- a/rbi/prism.rbi +++ b/rbi/prism.rbi @@ -1,17 +1,17 @@ # typed: strict module Prism - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(String) } - def self.dump(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(String) } + def self.dump(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(String) } - def self.dump_file(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(String) } + def self.dump_file(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::LexResult) } - def self.lex(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::LexResult) } + def self.lex(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::LexResult) } - def self.lex_file(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::LexResult) } + def self.lex_file(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end sig { params(source: String, options: T::Hash[Symbol, T.untyped]).returns(Prism::LexCompat::Result) } def self.lex_compat(source, **options); end @@ -19,45 +19,45 @@ module Prism sig { params(source: String).returns(T::Array[T.untyped]) } def self.lex_ripper(source); end - sig { params(source: String, serialized: String).returns(Prism::ParseResult) } - def self.load(source, serialized); end + sig { params(source: String, serialized: String, freeze: T.nilable(T::Boolean)).returns(Prism::ParseResult) } + def self.load(source, serialized, freeze = false); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } - def self.parse(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } + def self.parse(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } - def self.parse_file(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } + def self.parse_file(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).void } - def self.profile(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).void } + def self.profile(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).void } - def self.profile_file(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).void } + def self.profile_file(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(stream: T.any(IO, StringIO), command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } - def self.parse_stream(stream, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(stream: T.any(IO, StringIO), command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseResult) } + def self.parse_stream(stream, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(String, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Array[Prism::Comment]) } - def self.parse_comments(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(String, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Array[Prism::Comment]) } + def self.parse_comments(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Array[Prism::Comment]) } - def self.parse_file_comments(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Array[Prism::Comment]) } + def self.parse_file_comments(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseLexResult) } - def self.parse_lex(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseLexResult) } + def self.parse_lex(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseLexResult) } - def self.parse_lex_file(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(Prism::ParseLexResult) } + def self.parse_lex_file(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } - def self.parse_success?(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } + def self.parse_success?(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } - def self.parse_failure?(source, command_line: nil, encoding: nil, filepath: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(source: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), filepath: T.nilable(String), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } + def self.parse_failure?(source, command_line: nil, encoding: nil, filepath: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } - def self.parse_file_success?(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } + def self.parse_file_success?(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end - sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } - def self.parse_file_failure?(filepath, command_line: nil, encoding: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end + sig { params(filepath: String, command_line: T.nilable(String), encoding: T.nilable(T.any(FalseClass, Encoding)), freeze: T.nilable(T::Boolean), frozen_string_literal: T.nilable(T::Boolean), line: T.nilable(Integer), main_script: T.nilable(T::Boolean), partial_script: T.nilable(T::Boolean), scopes: T.nilable(T::Array[T::Array[Symbol]]), version: T.nilable(String)).returns(T::Boolean) } + def self.parse_file_failure?(filepath, command_line: nil, encoding: nil, freeze: nil, frozen_string_literal: nil, line: nil, main_script: nil, partial_script: nil, scopes: nil, version: nil); end end diff --git a/rbi/prism/parse_result.rbi b/rbi/prism/parse_result.rbi index ef1f051e76..8d52ed3daf 100644 --- a/rbi/prism/parse_result.rbi +++ b/rbi/prism/parse_result.rbi @@ -13,6 +13,12 @@ class Prism::Source sig { params(source: String, start_line: Integer, offsets: T::Array[Integer]).void } def initialize(source, start_line = 1, offsets = []); end + sig { params(start_line: Integer).void } + def replace_start_line(start_line); end + + sig { params(offsets: T::Array[Integer]).void } + def replace_offsets(offsets); end + sig { returns(Encoding) } def encoding; end diff --git a/sig/prism/parse_result.rbs b/sig/prism/parse_result.rbs index 7a4d1a1c4e..164421114f 100644 --- a/sig/prism/parse_result.rbs +++ b/sig/prism/parse_result.rbs @@ -9,6 +9,8 @@ module Prism attr_reader offsets: Array[Integer] def initialize: (String source, ?Integer start_line, ?Array[Integer] offsets) -> void + def replace_start_line: (Integer start_line) -> void + def replace_offsets: (Array[Integer] offsets) -> void def encoding: () -> Encoding def lines: () -> Array[String] def slice: (Integer byte_offset, Integer length) -> String diff --git a/sig/prism/serialize.rbs b/sig/prism/serialize.rbs index bf2a4f5b21..71a7c5c1c9 100644 --- a/sig/prism/serialize.rbs +++ b/sig/prism/serialize.rbs @@ -1,6 +1,8 @@ module Prism module Serialize - def self.load: (String, String) -> ParseResult - def self.load_tokens: (Source, String) -> LexResult + def self.load_parse: (String, String, bool) -> ParseResult + def self.load_lex: (String, String, bool) -> LexResult + def self.load_parse_comments: (String, String, bool) -> Array[comment] + def self.load_parse_lex: (String, String, bool) -> ParseLexResult end end diff --git a/src/options.c b/src/options.c index 31ceeb200e..b5be140820 100644 --- a/src/options.c +++ b/src/options.c @@ -139,6 +139,14 @@ pm_options_partial_script_set(pm_options_t *options, bool partial_script) { options->partial_script = partial_script; } +/** + * Set the freeze option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_freeze_set(pm_options_t *options, bool freeze) { + options->freeze = freeze; +} + // For some reason, GCC analyzer thinks we're leaking allocated scopes and // locals here, even though we definitely aren't. This is a false positive. // Ideally we wouldn't need to suppress this. @@ -274,6 +282,7 @@ pm_options_read(pm_options_t *options, const char *data) { options->encoding_locked = ((uint8_t) *data++) > 0; options->main_script = ((uint8_t) *data++) > 0; options->partial_script = ((uint8_t) *data++) > 0; + options->freeze = ((uint8_t) *data++) > 0; uint32_t scopes_count = pm_options_read_u32(data); data += 4; diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index 777ebfa8fc..23af8886a7 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -12,24 +12,34 @@ static VALUE rb_cPrism<%= node.name %>; <%- end -%> static VALUE -pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start))); - return ULL2NUM(value); +pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) { + if (freeze) { + VALUE location_argv[] = { + source, + LONG2FIX(start - parser->start), + LONG2FIX(end - start) + }; + + return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation)); + } else { + uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start))); + return ULL2NUM(value); + } } VALUE -pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source) { +pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) { ID type = rb_intern(pm_token_type_name(token->type)); - VALUE location = pm_location_new(parser, token->start, token->end); + VALUE location = pm_location_new(parser, token->start, token->end, source, freeze); - VALUE argv[] = { - source, - ID2SYM(type), - rb_enc_str_new((const char *) token->start, token->end - token->start, encoding), - location - }; + VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding); + if (freeze) rb_obj_freeze(slice); - return rb_class_new_instance(4, argv, rb_cPrismToken); + VALUE argv[] = { source, ID2SYM(type), slice, location }; + VALUE value = rb_class_new_instance(4, argv, rb_cPrismToken); + if (freeze) rb_obj_freeze(value); + + return value; } static VALUE @@ -68,7 +78,7 @@ pm_integer_new(const pm_integer_t *integer) { // Create a Prism::Source object from the given parser, after pm_parse() was called. VALUE -pm_source_new(const pm_parser_t *parser, rb_encoding *encoding) { +pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) { VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding); VALUE offsets = rb_ary_new_capa(parser->newline_list.size); @@ -76,7 +86,15 @@ pm_source_new(const pm_parser_t *parser, rb_encoding *encoding) { rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index])); } - return rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets); + if (freeze) { + rb_obj_freeze(source_string); + rb_obj_freeze(offsets); + } + + VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets); + if (freeze) rb_obj_freeze(source); + + return source; } typedef struct pm_node_stack_node { @@ -106,7 +124,7 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) { } VALUE -pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source) { +pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) { VALUE constants = rb_ary_new_capa(parser->constant_pool.size); for (uint32_t index = 0; index < parser->constant_pool.size; index++) { @@ -182,7 +200,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi argv[1] = ULONG2NUM(node->node_id); // location - argv[2] = pm_location_new(parser, node->location.start, node->location.end); + argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze); // flags argv[3] = ULONG2NUM(node->flags); @@ -199,6 +217,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi for (size_t index = 0; index < cast-><%= field.name %>.size; index++) { rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack)); } + if (freeze) rb_obj_freeze(argv[<%= index %>]); <%- when Prism::Template::StringField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding); @@ -215,12 +234,13 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi assert(cast-><%= field.name %>.ids[index] != 0); rb_ary_push(argv[<%= index %>], RARRAY_AREF(constants, cast-><%= field.name %>.ids[index] - 1)); } + if (freeze) rb_obj_freeze(argv[<%= index %>]); <%- when Prism::Template::LocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end); + argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); <%- when Prism::Template::OptionalLocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end); + argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); <%- when Prism::Template::UInt8Field -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>); @@ -238,7 +258,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi <%- end -%> <%- end -%> - rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>)); + VALUE value = rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>); + if (freeze) rb_obj_freeze(value); + + rb_ary_push(value_stack, value); break; } <%- end -%> diff --git a/templates/lib/prism/serialize.rb.erb b/templates/lib/prism/serialize.rb.erb index d40aa881e7..52821e0f7d 100644 --- a/templates/lib/prism/serialize.rb.erb +++ b/templates/lib/prism/serialize.rb.erb @@ -16,15 +16,41 @@ module Prism # strings. PATCH_VERSION = 0 - # Deserialize the AST represented by the given string into a parse result. - def self.load(input, serialized) + # Deserialize the dumped output from a request to parse or parse_file. + # + # The formatting of the source of this method is purposeful to illustrate + # the structure of the serialized data. + def self.load_parse(input, serialized, freeze) input = input.dup source = Source.for(input) - loader = Loader.new(source, serialized) - result = loader.load_result - input.force_encoding(loader.encoding) + loader.load_header + encoding = loader.load_encoding + start_line = loader.load_varsint + offsets = loader.load_line_offsets(freeze) + + source.replace_start_line(start_line) + source.replace_offsets(offsets) + + comments = loader.load_comments(freeze) + magic_comments = loader.load_magic_comments(freeze) + data_loc = loader.load_optional_location_object(freeze) + errors = loader.load_errors(encoding, freeze) + warnings = loader.load_warnings(encoding, freeze) + cpool_base = loader.load_uint32 + cpool_size = loader.load_varuint + + constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size) + + node = loader.load_node(constant_pool, encoding, freeze) + loader.load_constant_pool(constant_pool) + raise unless loader.eof? + + result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source) + result.freeze if freeze + + input.force_encoding(encoding) # This is an extremely niche use-case where the file was marked as binary # but it contained UTF-8-encoded characters. In that case we will actually @@ -35,96 +61,267 @@ module Prism input.force_encoding(Encoding::BINARY) unless input.valid_encoding? end + if freeze + input.freeze + source.deep_freeze + end + result end - # Deserialize the tokens represented by the given string into a parse - # result. - def self.load_tokens(source, serialized) - Loader.new(source, serialized).load_tokens_result + # Deserialize the dumped output from a request to lex or lex_file. + # + # The formatting of the source of this method is purposeful to illustrate + # the structure of the serialized data. + def self.load_lex(input, serialized, freeze) + source = Source.for(input) + loader = Loader.new(source, serialized) + + tokens = loader.load_tokens + encoding = loader.load_encoding + start_line = loader.load_varsint + offsets = loader.load_line_offsets(freeze) + + source.replace_start_line(start_line) + source.replace_offsets(offsets) + + comments = loader.load_comments(freeze) + magic_comments = loader.load_magic_comments(freeze) + data_loc = loader.load_optional_location_object(freeze) + errors = loader.load_errors(encoding, freeze) + warnings = loader.load_warnings(encoding, freeze) + raise unless loader.eof? + + result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source) + + tokens.each do |token| + token[0].value.force_encoding(encoding) + + if freeze + token[0].deep_freeze + token.freeze + end + end + + if freeze + source.deep_freeze + tokens.freeze + result.freeze + end + + result end - class Loader # :nodoc: - if RUBY_ENGINE == "truffleruby" - # StringIO is synchronized and that adds a high overhead on TruffleRuby. - class FastStringIO # :nodoc: - attr_accessor :pos - - def initialize(string) - @string = string - @pos = 0 - end + # Deserialize the dumped output from a request to parse_comments or + # parse_file_comments. + # + # The formatting of the source of this method is purposeful to illustrate + # the structure of the serialized data. + def self.load_parse_comments(input, serialized, freeze) + source = Source.for(input) + loader = Loader.new(source, serialized) - def getbyte - byte = @string.getbyte(@pos) - @pos += 1 - byte - end + loader.load_header + loader.load_encoding + start_line = loader.load_varsint - def read(n) - slice = @string.byteslice(@pos, n) - @pos += n - slice - end + source.replace_start_line(start_line) + + result = loader.load_comments(freeze) + raise unless loader.eof? + + source.deep_freeze if freeze + result + end + + # Deserialize the dumped output from a request to parse_lex or + # parse_lex_file. + # + # The formatting of the source of this method is purposeful to illustrate + # the structure of the serialized data. + def self.load_parse_lex(input, serialized, freeze) + source = Source.for(input) + loader = Loader.new(source, serialized) + + tokens = loader.load_tokens + loader.load_header + encoding = loader.load_encoding + start_line = loader.load_varsint + offsets = loader.load_line_offsets(freeze) + + source.replace_start_line(start_line) + source.replace_offsets(offsets) + + comments = loader.load_comments(freeze) + magic_comments = loader.load_magic_comments(freeze) + data_loc = loader.load_optional_location_object(freeze) + errors = loader.load_errors(encoding, freeze) + warnings = loader.load_warnings(encoding, freeze) + cpool_base = loader.load_uint32 + cpool_size = loader.load_varuint + + constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size) + + node = loader.load_node(constant_pool, encoding, freeze) + loader.load_constant_pool(constant_pool) + raise unless loader.eof? + + value = [node, tokens] + result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source) + + tokens.each do |token| + token[0].value.force_encoding(encoding) + + if freeze + token[0].deep_freeze + token.freeze + end + end + + if freeze + source.deep_freeze + tokens.freeze + value.freeze + result.freeze + end + + result + end - def eof? - @pos >= @string.bytesize + class ConstantPool # :nodoc: + attr_reader :size + + def initialize(input, serialized, base, size) + @input = input + @serialized = serialized + @base = base + @size = size + @pool = Array.new(size, nil) + end + + def get(index, encoding) + @pool[index] ||= + begin + offset = @base + index * 8 + start = @serialized.unpack1("L", offset: offset) + length = @serialized.unpack1("L", offset: offset + 4) + + if start.nobits?(1 << 31) + @input.byteslice(start, length).force_encoding(encoding).to_sym + else + @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym + end end + end + end + + if RUBY_ENGINE == "truffleruby" + # StringIO is synchronized and that adds a high overhead on TruffleRuby. + class FastStringIO # :nodoc: + attr_accessor :pos + + def initialize(string) + @string = string + @pos = 0 + end + + def getbyte + byte = @string.getbyte(@pos) + @pos += 1 + byte + end + + def read(n) + slice = @string.byteslice(@pos, n) + @pos += n + slice + end + + def eof? + @pos >= @string.bytesize end - else - FastStringIO = ::StringIO end - private_constant :FastStringIO + else + FastStringIO = ::StringIO # :nodoc: + end - attr_reader :encoding, :input, :serialized, :io - attr_reader :constant_pool_offset, :constant_pool, :source - attr_reader :start_line + class Loader # :nodoc: + attr_reader :input, :io, :source def initialize(source, serialized) - @encoding = Encoding::UTF_8 - @input = source.source.dup raise unless serialized.encoding == Encoding::BINARY - @serialized = serialized @io = FastStringIO.new(serialized) + @source = source + define_load_node_lambdas if RUBY_ENGINE != "ruby" + end - @constant_pool_offset = nil - @constant_pool = nil + def eof? + io.getbyte + io.eof? + end - @source = source - define_load_node_lambdas unless RUBY_ENGINE == "ruby" + def load_constant_pool(constant_pool) + trailer = 0 + + constant_pool.size.times do |index| + start, length = io.read(8).unpack("L2") + trailer += length if start.anybits?(1 << 31) + end + + io.read(trailer) end def load_header raise "Invalid serialization" if io.read(5) != "PRISM" raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] - only_semantic_fields = io.getbyte - unless only_semantic_fields == 0 - raise "Invalid serialization (location fields must be included but are not)" - end + raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0 end def load_encoding - @encoding = Encoding.find(io.read(load_varuint)) - @input = input.force_encoding(@encoding).freeze - @encoding + encoding = Encoding.find(io.read(load_varuint)) + @input = input.force_encoding(encoding).freeze + encoding end - def load_start_line - source.instance_variable_set :@start_line, load_varsint + def load_line_offsets(freeze) + offsets = Array.new(load_varuint) { load_varuint } + offsets.freeze if freeze + offsets end - def load_line_offsets - source.instance_variable_set :@offsets, Array.new(load_varuint) { load_varuint } + def load_comments(freeze) + comments = + Array.new(load_varuint) do + comment = + case load_varuint + when 0 then InlineComment.new(load_location_object(freeze)) + when 1 then EmbDocComment.new(load_location_object(freeze)) + end + + comment.freeze if freeze + comment + end + + comments.freeze if freeze + comments end - def load_comments - Array.new(load_varuint) do - case load_varuint - when 0 then InlineComment.new(load_location_object) - when 1 then EmbDocComment.new(load_location_object) + def load_magic_comments(freeze) + magic_comments = + Array.new(load_varuint) do + magic_comment = + MagicComment.new( + load_location_object(freeze), + load_location_object(freeze) + ) + + magic_comment.freeze if freeze + magic_comment end - end + + magic_comments.freeze if freeze + magic_comments end DIAGNOSTIC_TYPES = [ @@ -138,60 +335,88 @@ module Prism private_constant :DIAGNOSTIC_TYPES - def load_metadata - comments = load_comments - magic_comments = Array.new(load_varuint) { MagicComment.new(load_location_object, load_location_object) } - data_loc = load_optional_location_object - errors = Array.new(load_varuint) { ParseError.new(DIAGNOSTIC_TYPES.fetch(load_varuint), load_embedded_string, load_location_object, load_error_level) } - warnings = Array.new(load_varuint) { ParseWarning.new(DIAGNOSTIC_TYPES.fetch(load_varuint), load_embedded_string, load_location_object, load_warning_level) } - [comments, magic_comments, data_loc, errors, warnings] - end + def load_error_level + level = io.getbyte - def load_tokens - tokens = [] - while type = TOKEN_TYPES.fetch(load_varuint) - start = load_varuint - length = load_varuint - lex_state = load_varuint - location = Location.new(@source, start, length) - tokens << [Token.new(source, type, location.slice, location), lex_state] + case level + when 0 + :syntax + when 1 + :argument + when 2 + :load + else + raise "Unknown level: #{level}" end - - tokens end - def load_tokens_result - tokens = load_tokens - encoding = load_encoding - load_start_line - load_line_offsets - comments, magic_comments, data_loc, errors, warnings = load_metadata - tokens.each { |token,| token.value.force_encoding(encoding) } + def load_errors(encoding, freeze) + errors = + Array.new(load_varuint) do + error = + ParseError.new( + DIAGNOSTIC_TYPES.fetch(load_varuint), + load_embedded_string(encoding), + load_location_object(freeze), + load_error_level + ) + + error.freeze if freeze + error + end - raise "Expected to consume all bytes while deserializing" unless @io.eof? - LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source) + errors.freeze if freeze + errors end - def load_nodes - load_header - load_encoding - load_start_line - load_line_offsets + def load_warning_level + level = io.getbyte - comments, magic_comments, data_loc, errors, warnings = load_metadata + case level + when 0 + :default + when 1 + :verbose + else + raise "Unknown level: #{level}" + end + end - @constant_pool_offset = load_uint32 - @constant_pool = Array.new(load_varuint, nil) + def load_warnings(encoding, freeze) + warnings = + Array.new(load_varuint) do + warning = + ParseWarning.new( + DIAGNOSTIC_TYPES.fetch(load_varuint), + load_embedded_string(encoding), + load_location_object(freeze), + load_warning_level + ) + + warning.freeze if freeze + warning + end - [load_node, comments, magic_comments, data_loc, errors, warnings] + warnings.freeze if freeze + warnings end - def load_result - node, comments, magic_comments, data_loc, errors, warnings = load_nodes - ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source) - end + def load_tokens + tokens = [] - private + while (type = TOKEN_TYPES.fetch(load_varuint)) + start = load_varuint + length = load_varuint + lex_state = load_varuint + + location = Location.new(@source, start, length) + token = Token.new(@source, type, location.slice, location) + + tokens << [token, lex_state] + end + + tokens + end # variable-length integer using https://en.wikipedia.org/wiki/LEB128 # This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints @@ -233,110 +458,63 @@ module Prism io.read(4).unpack1("L") end - def load_optional_node + def load_optional_node(constant_pool, encoding, freeze) if io.getbyte != 0 io.pos -= 1 - load_node + load_node(constant_pool, encoding, freeze) end end - def load_embedded_string + def load_embedded_string(encoding) io.read(load_varuint).force_encoding(encoding).freeze end - def load_string - type = io.getbyte - case type + def load_string(encoding) + case (type = io.getbyte) when 1 input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze when 2 - load_embedded_string + load_embedded_string(encoding) else raise "Unknown serialized string type: #{type}" end end - def load_location - (load_varuint << 32) | load_varuint - end - - def load_location_object - Location.new(source, load_varuint, load_varuint) + def load_location_object(freeze) + location = Location.new(source, load_varuint, load_varuint) + location.freeze if freeze + location end - def load_optional_location - load_location if io.getbyte != 0 + def load_location(freeze) + return load_location_object(freeze) if freeze + (load_varuint << 32) | load_varuint end - def load_optional_location_object - load_location_object if io.getbyte != 0 + def load_optional_location(freeze) + load_location(freeze) if io.getbyte != 0 end - def load_constant(index) - constant = constant_pool[index] - - unless constant - offset = constant_pool_offset + index * 8 - start = @serialized.unpack1("L", offset: offset) - length = @serialized.unpack1("L", offset: offset + 4) - - constant = - if start.nobits?(1 << 31) - input.byteslice(start, length).force_encoding(@encoding).to_sym - else - @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym - end - - constant_pool[index] = constant - end - - constant + def load_optional_location_object(freeze) + load_location_object(freeze) if io.getbyte != 0 end - def load_required_constant - load_constant(load_varuint - 1) - end - - def load_optional_constant + def load_constant(constant_pool, encoding) index = load_varuint - load_constant(index - 1) if index != 0 + constant_pool.get(index - 1, encoding) end - def load_error_level - level = io.getbyte - - case level - when 0 - :syntax - when 1 - :argument - when 2 - :load - else - raise "Unknown level: #{level}" - end - end - - def load_warning_level - level = io.getbyte - - case level - when 0 - :default - when 1 - :verbose - else - raise "Unknown level: #{level}" - end + def load_optional_constant(constant_pool, encoding) + index = load_varuint + constant_pool.get(index - 1, encoding) if index != 0 end if RUBY_ENGINE == "ruby" - def load_node + def load_node(constant_pool, encoding, freeze) type = io.getbyte node_id = load_varuint - location = load_location - - case type + location = load_location(freeze) + value = case type <%- nodes.each_with_index do |node, index| -%> when <%= index + 1 %> then <%- if node.needs_serialized_length? -%> @@ -344,15 +522,15 @@ module Prism <%- end -%> <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field| case field - when Prism::Template::NodeField then "load_node" - when Prism::Template::OptionalNodeField then "load_optional_node" - when Prism::Template::StringField then "load_string" - when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node }" - when Prism::Template::ConstantField then "load_required_constant" - when Prism::Template::OptionalConstantField then "load_optional_constant" - when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }" - when Prism::Template::LocationField then "load_location" - when Prism::Template::OptionalLocationField then "load_optional_location" + when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)" + when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)" + when Prism::Template::StringField then "load_string(encoding)" + when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }" + when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)" + when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)" + when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }" + when Prism::Template::LocationField then "load_location(freeze)" + when Prism::Template::OptionalLocationField then "load_optional_location(freeze)" when Prism::Template::UInt8Field then "io.getbyte" when Prism::Template::UInt32Field then "load_varuint" when Prism::Template::IntegerField then "load_integer" @@ -362,34 +540,36 @@ module Prism }].join(", ") -%>) <%- end -%> end + + value.freeze if freeze + value end else - def load_node - type = io.getbyte - @load_node_lambdas[type].call + def load_node(constant_pool, encoding, freeze) + @load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze) end def define_load_node_lambdas @load_node_lambdas = [ nil, <%- nodes.each do |node| -%> - -> { + -> (constant_pool, encoding, freeze) { node_id = load_varuint - location = load_location + location = load_location(freeze) <%- if node.needs_serialized_length? -%> load_uint32 <%- end -%> - <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field| + value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field| case field - when Prism::Template::NodeField then "load_node" - when Prism::Template::OptionalNodeField then "load_optional_node" - when Prism::Template::StringField then "load_string" - when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node }" - when Prism::Template::ConstantField then "load_required_constant" - when Prism::Template::OptionalConstantField then "load_optional_constant" - when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }" - when Prism::Template::LocationField then "load_location" - when Prism::Template::OptionalLocationField then "load_optional_location" + when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)" + when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)" + when Prism::Template::StringField then "load_string(encoding)" + when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }" + when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)" + when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)" + when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }" + when Prism::Template::LocationField then "load_location(freeze)" + when Prism::Template::OptionalLocationField then "load_optional_location(freeze)" when Prism::Template::UInt8Field then "io.getbyte" when Prism::Template::UInt32Field then "load_varuint" when Prism::Template::IntegerField then "load_integer" @@ -397,6 +577,8 @@ module Prism else raise end }].join(", ") -%>) + value.freeze if freeze + value }, <%- end -%> ] @@ -411,5 +593,10 @@ module Prism <%= token.name.to_sym.inspect %>, <%- end -%> ] + + private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION + private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES end + + private_constant :Serialize end diff --git a/templates/sig/prism.rbs.erb b/templates/sig/prism.rbs.erb index 7c03218ab5..96cadec9dd 100644 --- a/templates/sig/prism.rbs.erb +++ b/templates/sig/prism.rbs.erb @@ -21,6 +21,7 @@ module Prism String source, ?encoding: Encoding | false, ?filepath: String, + ?freeze: bool, ?frozen_string_literal: bool, ?line: Integer, ?main_script: bool, @@ -32,7 +33,8 @@ module Prism def self.load: ( String source, - String serialized + String serialized, + ?bool freeze ) -> ParseResult def self.lex_ripper: ( @@ -56,6 +58,7 @@ module Prism def self.<%= method %>: ( String filepath, ?encoding: Encoding | false, + ?freeze: bool, ?frozen_string_literal: bool, ?line: Integer, ?main_script: bool, @@ -73,6 +76,7 @@ module Prism _Stream stream, ?encoding: Encoding | false, ?filepath: String, + ?freeze: bool, ?frozen_string_literal: bool, ?line: Integer, ?main_script: bool, diff --git a/test/prism/api/freeze_test.rb b/test/prism/api/freeze_test.rb new file mode 100644 index 0000000000..5533a00331 --- /dev/null +++ b/test/prism/api/freeze_test.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class FreezeTest < TestCase + def test_parse + assert_frozen(Prism.parse("1 + 2; %i{foo} + %i{bar}", freeze: true)) + end + + def test_lex + assert_frozen(Prism.lex("1 + 2; %i{foo} + %i{bar}", freeze: true)) + end + + def test_parse_lex + assert_frozen(Prism.parse_lex("1 + 2; %i{foo} + %i{bar}", freeze: true)) + assert_frozen(Prism.parse_lex("# encoding: euc-jp\n%i{foo}", freeze: true)) + end + + def test_parse_comments + assert_frozen(Prism.parse_comments("# comment", freeze: true)) + end + + def test_parse_stream + assert_frozen(Prism.parse_stream(StringIO.new("1 + 2; %i{foo} + %i{bar}"), freeze: true)) + end + + if !ENV["PRISM_BUILD_MINIMAL"] + def test_dump + assert_frozen(Prism.dump("1 + 2; %i{foo} + %i{bar}", freeze: true)) + end + end + + private + + def assert_frozen_each(value) + assert_predicate value, :frozen? + + value.instance_variables.each do |name| + case (child = value.instance_variable_get(name)) + when Array + child.each { |item| assert_frozen_each(item) } + when Hash + child.each { |key, item| assert_frozen_each(key); assert_frozen_each(item) } + else + assert_frozen_each(child) + end + end + end + + if defined?(Ractor.shareable?) + def assert_frozen(value) + assert_frozen_each(value) + assert Ractor.shareable?(value), -> { binding.irb } + end + else + alias assert_frozen assert_frozen_each + end + end +end