Skip to content

Commit 30245c6

Browse files
authored
Merge pull request ruby#2863 from ruby/byte-offset
Specify input range by byte offsets
2 parents 3a9c363 + 1ceb1dc commit 30245c6

File tree

7 files changed

+87
-24
lines changed

7 files changed

+87
-24
lines changed

β€Žinclude/rbs/lexer.hβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ typedef struct {
136136
* */
137137
typedef struct {
138138
rbs_string_t string;
139-
int start_pos; /* The character position that defines the start of the input */
140-
int end_pos; /* The character position that defines the end of the input */
139+
int start_pos; /* The byte position that defines the start of the input */
140+
int end_pos; /* The byte position that defines the end of the input */
141141
rbs_position_t current; /* The current position: just before the current_character */
142142
rbs_position_t start; /* The start position of the current token */
143143

β€Žlib/rbs/parser_aux.rbβ€Ž

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55

66
module RBS
77
class Parser
8-
def self.parse_type(source, range: 0..., variables: [], require_eof: false, void_allowed: true, self_allowed: true, classish_allowed: true)
8+
def self.parse_type(source, range: nil, byte_range: 0..., variables: [], require_eof: false, void_allowed: true, self_allowed: true, classish_allowed: true)
99
buf = buffer(source)
10-
_parse_type(buf, range.begin || 0, range.end || buf.last_position, variables, require_eof, void_allowed, self_allowed, classish_allowed)
10+
byte_range = byte_range(range, buf.content) if range
11+
_parse_type(buf, byte_range.begin || 0, byte_range.end || buf.content.bytesize, variables, require_eof, void_allowed, self_allowed, classish_allowed)
1112
end
1213

13-
def self.parse_method_type(source, range: 0..., variables: [], require_eof: false)
14+
def self.parse_method_type(source, range: nil, byte_range: 0..., variables: [], require_eof: false)
1415
buf = buffer(source)
15-
_parse_method_type(buf, range.begin || 0, range.end || buf.last_position, variables, require_eof)
16+
byte_range = byte_range(range, buf.content) if range
17+
_parse_method_type(buf, byte_range.begin || 0, byte_range.end || buf.content.bytesize, variables, require_eof)
1618
end
1719

1820
def self.parse_signature(source)
@@ -25,7 +27,8 @@ def self.parse_signature(source)
2527
else
2628
0
2729
end
28-
dirs, decls = _parse_signature(buf, start_pos, buf.last_position)
30+
content = buf.content
31+
dirs, decls = _parse_signature(buf, start_pos, content.bytesize)
2932

3033
if resolved
3134
dirs = dirs.dup if dirs.frozen?
@@ -37,7 +40,7 @@ def self.parse_signature(source)
3740

3841
def self.parse_type_params(source, module_type_params: true)
3942
buf = buffer(source)
40-
_parse_type_params(buf, 0, buf.last_position, module_type_params)
43+
_parse_type_params(buf, 0, buf.content.bytesize, module_type_params)
4144
end
4245

4346
def self.magic_comment(buf)
@@ -66,7 +69,7 @@ def self.magic_comment(buf)
6669

6770
def self.lex(source)
6871
buf = buffer(source)
69-
list = _lex(buf, buf.last_position)
72+
list = _lex(buf, buf.content.bytesize)
7073
value = list.map do |type, location|
7174
Token.new(type: type, location: location)
7275
end
@@ -125,5 +128,15 @@ def self.parse_inline_trailing_annotation(source, range, variables: [])
125128
buf = buffer(source)
126129
_parse_inline_trailing_annotation(buf, range.begin || 0, range.end || buf.last_position, variables)
127130
end
131+
132+
def self.byte_range(char_range, content)
133+
start_offset = char_range.begin
134+
end_offset = char_range.end
135+
136+
start_prefix = content[0, start_offset] or raise if start_offset
137+
end_prefix = content[0, end_offset] or raise if end_offset
138+
139+
start_prefix&.bytesize...end_prefix&.bytesize
140+
end
128141
end
129142
end

β€Žsig/parser.rbsβ€Ž

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ module RBS
2020

2121
# Parse a method type and return it
2222
#
23-
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
23+
# When `byte_range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
2424
#
2525
# ```ruby
26-
# RBS::Parser.parse_method_type("() -> void") # => `() -> void`
27-
# RBS::Parser.parse_method_type("() -> void", range: 0...) # => `() -> void`
28-
# RBS::Parser.parse_method_type("() -> void () -> String", range: 11...) # => `() -> String`
29-
# RBS::Parser.parse_method_type("() -> void () -> String", range: 23...) # => nil
26+
# RBS::Parser.parse_method_type("() -> void") # => `() -> void`
27+
# RBS::Parser.parse_method_type("() -> void", byte_range: 0...) # => `() -> void`
28+
# RBS::Parser.parse_method_type("() -> void () -> String", byte_range: 11...) # => `() -> String`
29+
# RBS::Parser.parse_method_type("() -> void () -> String", byte_range: 23...) # => nil
3030
# ```
3131
#
3232
# When `require_eof` is `true`, an error is raised if more tokens are left in the input.
@@ -39,17 +39,18 @@ module RBS
3939
# RBS::Parser.parse_method_type("", require_eof: true) # => nil
4040
# ```
4141
#
42-
def self.parse_method_type: (Buffer | String, ?range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?
42+
def self.parse_method_type: (Buffer | String, ?byte_range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?
43+
| %a{deprecated: Use `byte_range:` keyword instead of `range:`} (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?
4344

4445
# Parse a type and return it
4546
#
46-
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
47+
# When `byte_range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
4748
#
4849
# ```ruby
49-
# RBS::Parser.parse_type("String") # => `String`
50-
# RBS::Parser.parse_type("String", range: 0...) # => `String`
51-
# RBS::Parser.parse_type("String Integer", pos: 7...) # => `Integer`
52-
# RBS::Parser.parse_type("String Integer", pos: 14...) # => nil
50+
# RBS::Parser.parse_type("String") # => `String`
51+
# RBS::Parser.parse_type("String", byte_range: 0...) # => `String`
52+
# RBS::Parser.parse_type("String Integer", byte_range: 7...) # => `Integer`
53+
# RBS::Parser.parse_type("String Integer", byte_range: 14...) # => nil
5354
# ```
5455
#
5556
# When `require_eof` is `true`, an error is raised if more tokens are left in the input.
@@ -76,7 +77,8 @@ module RBS
7677
# RBS::Parser.parse_type("self", self_allowed: false) # => Raises an syntax error
7778
# ```
7879
#
79-
def self.parse_type: (Buffer | String, ?range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?
80+
def self.parse_type: (Buffer | String, ?byte_range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?
81+
| %a{deprecated: Use `byte_range:` keyword instead of `range:`} (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?
8082

8183
# Parse whole RBS file and return an array of declarations
8284
#
@@ -130,6 +132,8 @@ module RBS
130132

131133
def self.buffer: (String | Buffer source) -> Buffer
132134

135+
def self.byte_range: (Range[Integer?] char_range, String content) -> Range[Integer?]
136+
133137
def self._parse_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, bool require_eof, bool void_allowed, bool self_allowed, bool classish_allowed) -> Types::t?
134138

135139
def self._parse_method_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, bool require_eof) -> MethodType?

β€Žsrc/lexstate.cβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ unsigned int rbs_peek(rbs_lexer_t *lexer) {
118118
}
119119

120120
bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *byte_len) {
121-
if (RBS_UNLIKELY(lexer->current.char_pos == lexer->end_pos)) {
121+
if (RBS_UNLIKELY(lexer->current.byte_pos == lexer->end_pos)) {
122122
return false;
123123
}
124124

β€Žsrc/parser.cβ€Ž

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3475,7 +3475,9 @@ rbs_lexer_t *rbs_lexer_new(rbs_allocator_t *allocator, rbs_string_t string, cons
34753475
}
34763476

34773477
if (start_pos > 0) {
3478-
rbs_skipn(lexer, start_pos);
3478+
while (lexer->current.byte_pos < start_pos) {
3479+
rbs_skip(lexer);
3480+
}
34793481
}
34803482

34813483
lexer->start = lexer->current;

β€Žtest/rbs/parser_test.rbβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ def test_buffer_location
812812

813813
def test_negative_range
814814
assert_raises ArgumentError do
815-
RBS::Parser.parse_type("a", range: -2...-1)
815+
RBS::Parser.parse_type("a", byte_range: -2...-1)
816816
end
817817
end
818818

β€Žtest/rbs/type_parsing_test.rbβ€Ž

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,4 +980,48 @@ def test_parse__string_unicode_escape__non_unicode
980980
assert_equal "[\\u30eb]", type.literal
981981
end
982982
end
983+
984+
def test_parse__byte_range
985+
input = '["πŸ•", "🐈"]'
986+
987+
Parser.parse_type(input).yield_self do |type|
988+
assert_instance_of Types::Tuple, type
989+
end
990+
991+
Parser.parse_type(input, byte_range: '["πŸ•", '.bytesize...).yield_self do |type|
992+
assert_instance_of Types::Literal, type
993+
assert_equal "🐈", type.literal
994+
end
995+
996+
Parser.parse_type(input, byte_range: '["πŸ•", '.bytesize...'["πŸ•", "🐈"'.bytesize, require_eof: true).yield_self do |type|
997+
assert_instance_of Types::Literal, type
998+
assert_equal "🐈", type.literal
999+
end
1000+
1001+
Parser.parse_type(input, byte_range: '["πŸ•", '.bytesize..'["πŸ•", "🐈"'.bytesize, require_eof: true).yield_self do |type|
1002+
assert_instance_of Types::Literal, type
1003+
assert_equal "🐈", type.literal
1004+
end
1005+
end
1006+
1007+
def test_parse__range_works
1008+
input = '["πŸ•", "🐈"]'
1009+
1010+
Parser.parse_type(input, range: 6...9, require_eof: true).yield_self do |type|
1011+
assert_instance_of Types::Literal, type
1012+
assert_equal "🐈", type.literal
1013+
end
1014+
end
1015+
1016+
def test_parse__byte_range_incorrect
1017+
# We want a better error handling ergonomics, but currently simply raises a syntax error.
1018+
1019+
input = '"πŸ•πŸˆ"'
1020+
1021+
exn = assert_raises RBS::ParsingError do
1022+
Parser.parse_type(input, byte_range: 2...)
1023+
end
1024+
1025+
assert_equal "a.rbs:1:2...1:3: Syntax error: unexpected token for simple type, token=`🐈` (ErrorToken)", exn.message
1026+
end
9831027
end

0 commit comments

Comments
Β (0)