Skip to content

Commit cfd4672

Browse files
authored
Merge pull request #3602 from tenderlove/eof-callback
Add an `feof` callback to Prism stream parsing
2 parents b35e37b + 802a123 commit cfd4672

File tree

4 files changed

+38
-11
lines changed

4 files changed

+38
-11
lines changed

ext/prism/extension.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,14 @@ profile_file(int argc, VALUE *argv, VALUE self) {
994994
return Qnil;
995995
}
996996

997+
static int
998+
parse_stream_eof(void *stream) {
999+
if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
1000+
return 1;
1001+
}
1002+
return 0;
1003+
}
1004+
9971005
/**
9981006
* An implementation of fgets that is suitable for use with Ruby IO objects.
9991007
*/
@@ -1034,7 +1042,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
10341042
pm_parser_t parser;
10351043
pm_buffer_t buffer;
10361044

1037-
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
1045+
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
10381046
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
10391047

10401048
VALUE source = pm_source_new(&parser, encoding, options.freeze);

include/prism.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,25 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
8787
*/
8888
typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
8989

90+
/**
91+
* This function is used in pm_parse_stream to check whether a stream is EOF.
92+
* It closely mirrors that of feof so that feof can be used as the
93+
* default implementation.
94+
*/
95+
typedef int (pm_parse_stream_feof_t)(void *stream);
96+
9097
/**
9198
* Parse a stream of Ruby source and return the tree.
9299
*
93100
* @param parser The parser to use.
94101
* @param buffer The buffer to use.
95102
* @param stream The stream to parse.
96103
* @param stream_fgets The function to use to read from the stream.
104+
* @param stream_feof The function to use to determine if the stream has hit eof.
97105
* @param options The optional options to use when parsing.
98106
* @return The AST representing the source.
99107
*/
100-
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options);
108+
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
101109

102110
// We optionally support serializing to a binary string. For systems that don't
103111
// want or need this functionality, it can be turned off with the
@@ -111,9 +119,10 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buff
111119
* @param buffer The buffer to serialize to.
112120
* @param stream The stream to parse.
113121
* @param stream_fgets The function to use to read from the stream.
122+
* @param stream_feof The function to use to tell if the stream has hit eof.
114123
* @param data The optional data to pass to the parser.
115124
*/
116-
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data);
125+
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data);
117126

118127
/**
119128
* Serialize the given list of comments to the given buffer.

lib/prism/ffi.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def self.load_exported_functions_from(header, *functions, callbacks)
8686
end
8787

8888
callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
89+
callback :pm_parse_stream_feof_t, [:pointer], :int
8990
enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
9091
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
9192

@@ -101,7 +102,7 @@ def self.load_exported_functions_from(header, *functions, callbacks)
101102
"pm_string_query_local",
102103
"pm_string_query_constant",
103104
"pm_string_query_method_name",
104-
[:pm_parse_stream_fgets_t]
105+
[:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t]
105106
)
106107

107108
load_exported_functions_from(
@@ -281,12 +282,14 @@ def parse_stream(stream, **options)
281282
end
282283
}
283284

285+
eof_callback = -> (_) { stream.eof? }
286+
284287
# In the pm_serialize_parse_stream function it accepts a pointer to the
285288
# IO object as a void* and then passes it through to the callback as the
286289
# third argument, but it never touches it itself. As such, since we have
287290
# access to the IO object already through the closure of the lambda, we
288291
# can pass a null pointer here and not worry.
289-
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
292+
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options))
290293
Prism.load(source, buffer.read, options.fetch(:freeze, false))
291294
end
292295
end

src/prism.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22842,7 +22842,7 @@ pm_parse(pm_parser_t *parser) {
2284222842
* otherwise return true.
2284322843
*/
2284422844
static bool
22845-
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
22845+
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
2284622846
#define LINE_SIZE 4096
2284722847
char line[LINE_SIZE];
2284822848

@@ -22878,6 +22878,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t
2287822878
if (strncmp(line, "__END__\r\n", 9) == 0) return false;
2287922879
break;
2288022880
}
22881+
22882+
// All data should be read via gets. If the string returned by gets
22883+
// _doesn't_ end with a newline, then we assume we hit EOF condition.
22884+
if (stream_feof(stream)) {
22885+
break;
22886+
}
2288122887
}
2288222888

2288322889
return true;
@@ -22913,16 +22919,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
2291322919
* can stream stdin in to Ruby so we need to support a streaming API.
2291422920
*/
2291522921
PRISM_EXPORTED_FUNCTION pm_node_t *
22916-
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
22922+
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
2291722923
pm_buffer_init(buffer);
2291822924

22919-
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22925+
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22926+
2292022927
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
2292122928
pm_node_t *node = pm_parse(parser);
2292222929

2292322930
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
2292422931
pm_node_destroy(parser, node);
22925-
eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22932+
eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
2292622933

2292722934
pm_parser_free(parser);
2292822935
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
@@ -23014,13 +23021,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
2301423021
* given stream into to the given buffer.
2301523022
*/
2301623023
PRISM_EXPORTED_FUNCTION void
23017-
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
23024+
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
2301823025
pm_parser_t parser;
2301923026
pm_options_t options = { 0 };
2302023027
pm_options_read(&options, data);
2302123028

2302223029
pm_buffer_t parser_buffer;
23023-
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
23030+
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
2302423031
pm_serialize_header(buffer);
2302523032
pm_serialize_content(&parser, node, buffer);
2302623033
pm_buffer_append_byte(buffer, '\0');

0 commit comments

Comments
 (0)