Skip to content

Commit ec071c8

Browse files
[DOC] Tweaks for String#byterindex (ruby#13485)
1 parent 077dbb8 commit ec071c8

File tree

2 files changed

+68
-42
lines changed

2 files changed

+68
-42
lines changed

string.c

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4954,7 +4954,7 @@ str_ensure_byte_pos(VALUE str, long pos)
49544954
*
49554955
* s = 'foo' # => "foo"
49564956
* s.size # => 3 # Three 1-byte characters.
4957-
s.bytesize # => 3 # Three bytes.
4957+
* s.bytesize # => 3 # Three bytes.
49584958
* s.byteindex('f') # => 0
49594959
* s.byteindex('o') # => 1
49604960
* s.byteindex('oo') # => 1
@@ -5260,65 +5260,90 @@ rb_str_byterindex(VALUE str, VALUE sub, long pos)
52605260
return str_rindex(str, sub, s, enc);
52615261
}
52625262

5263-
52645263
/*
52655264
* call-seq:
5266-
* byterindex(substring, offset = self.bytesize) -> integer or nil
5267-
* byterindex(regexp, offset = self.bytesize) -> integer or nil
5265+
* byterindex(object, offset = self.bytesize) -> integer or nil
52685266
*
5269-
* Returns the Integer byte-based index of the _last_ occurrence of the given +substring+,
5270-
* or +nil+ if none found:
5267+
* Returns the 0-based integer index of a substring of +self+
5268+
* that is the _last_ match for the given +object+ (a string or Regexp) and +offset+,
5269+
* or +nil+ if there is no such substring;
5270+
* the returned index is the count of _bytes_ (not characters).
52715271
*
5272-
* 'foo'.byterindex('f') # => 0
5273-
* 'foo'.byterindex('o') # => 2
5274-
* 'foo'.byterindex('oo') # => 1
5275-
* 'foo'.byterindex('ooo') # => nil
5272+
* When +object+ is a string,
5273+
* returns the index of the _last_ found substring equal to +object+:
52765274
*
5277-
* Returns the Integer byte-based index of the _last_ match for the given Regexp +regexp+,
5278-
* or +nil+ if none found:
5275+
* s = 'foo' # => "foo"
5276+
* s.size # => 3 # Three 1-byte characters.
5277+
* s.bytesize # => 3 # Three bytes.
5278+
* s.byterindex('f') # => 0
5279+
s.byterindex('o') # => 2
5280+
s.byterindex('oo') # => 1
5281+
s.byterindex('ooo') # => nil
52795282
*
5280-
* 'foo'.byterindex(/f/) # => 0
5281-
* 'foo'.byterindex(/o/) # => 2
5282-
* 'foo'.byterindex(/oo/) # => 1
5283-
* 'foo'.byterindex(/ooo/) # => nil
5283+
* When +object+ is a Regexp,
5284+
* returns the index of the last found substring matching +object+;
5285+
* updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]:
52845286
*
5285-
* The _last_ match means starting at the possible last position, not
5286-
* the last of longest matches.
5287+
* s = 'foo'
5288+
* s.byterindex(/f/) # => 0
5289+
* $~ # => #<MatchData "f">
5290+
* s.byterindex(/o/) # => 2
5291+
* s.byterindex(/oo/) # => 1
5292+
* s.byterindex(/ooo/) # => nil
5293+
* $~ # => nil
52875294
*
5288-
* 'foo'.byterindex(/o+/) # => 2
5289-
* $~ #=> #<MatchData "o">
5295+
* The last match means starting at the possible last position,
5296+
* not the last of the longest matches:
52905297
*
5291-
* To get the last longest match, needs to combine with negative
5292-
* lookbehind.
5298+
* s = 'foo'
5299+
* s.byterindex(/o+/) # => 2
5300+
* $~ #=> #<MatchData "o">
52935301
*
5294-
* 'foo'.byterindex(/(?<!o)o+/) # => 1
5295-
* $~ #=> #<MatchData "oo">
5302+
* To get the last longest match, use a negative lookbehind:
52965303
*
5297-
* Or String#byteindex with negative lookforward.
5304+
* s = 'foo'
5305+
* s.byterindex(/(?<!o)o+/) # => 1
5306+
* $~ # => #<MatchData "oo">
52985307
*
5299-
* 'foo'.byteindex(/o+(?!.*o)/) # => 1
5300-
* $~ #=> #<MatchData "oo">
5308+
* Or use method #byteindex with negative lookahead:
53015309
*
5302-
* Integer argument +offset+, if given and non-negative, specifies the maximum starting byte-based position in the
5303-
* string to _end_ the search:
5310+
* s = 'foo'
5311+
* s.byteindex(/o+(?!.*o)/) # => 1
5312+
* $~ #=> #<MatchData "oo">
53045313
*
5305-
* 'foo'.byterindex('o', 0) # => nil
5306-
* 'foo'.byterindex('o', 1) # => 1
5307-
* 'foo'.byterindex('o', 2) # => 2
5308-
* 'foo'.byterindex('o', 3) # => 2
5314+
* \Integer argument +offset+, if given, specifies the 0-based index
5315+
* of the byte where searching is to end.
53095316
*
5310-
* If +offset+ is a negative Integer, the maximum starting position in the
5311-
* string to _end_ the search is the sum of the string's length and +offset+:
5317+
* When +offset+ is non-negative,
5318+
* searching ends at byte position +offset+:
53125319
*
5313-
* 'foo'.byterindex('o', -1) # => 2
5314-
* 'foo'.byterindex('o', -2) # => 1
5315-
* 'foo'.byterindex('o', -3) # => nil
5316-
* 'foo'.byterindex('o', -4) # => nil
5320+
* s = 'foo'
5321+
* s.byterindex('o', 0) # => nil
5322+
* s.byterindex('o', 1) # => 1
5323+
* s.byterindex('o', 2) # => 2
5324+
* s.byterindex('o', 3) # => 2
5325+
*
5326+
* When +offset+ is negative, counts backward from the end of +self+:
5327+
*
5328+
* s = 'foo'
5329+
* s.byterindex('o', -1) # => 2
5330+
* s.byterindex('o', -2) # => 1
5331+
* s.byterindex('o', -3) # => nil
5332+
*
5333+
* Raises IndexError if the byte at +offset+ is not the first byte of a character:
53175334
*
5318-
* If +offset+ does not land on character (codepoint) boundary, +IndexError+ is
5319-
* raised.
5335+
* s = "\uFFFF\uFFFF" # => "\uFFFF\uFFFF"
5336+
* s.size # => 2 # Two 3-byte characters.
5337+
* s.bytesize # => 6 # Six bytes.
5338+
* s.byterindex("\uFFFF") # => 3
5339+
* s.byterindex("\uFFFF", 1) # Raises IndexError
5340+
* s.byterindex("\uFFFF", 2) # Raises IndexError
5341+
* s.byterindex("\uFFFF", 3) # => 3
5342+
* s.byterindex("\uFFFF", 4) # Raises IndexError
5343+
* s.byterindex("\uFFFF", 5) # Raises IndexError
5344+
* s.byterindex("\uFFFF", 6) # => nil
53205345
*
5321-
* Related: String#byteindex.
5346+
* Related: see {Querying}[rdoc-ref:String@Querying].
53225347
*/
53235348

53245349
static VALUE

string.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@
343343
# - #=~: Returns the index of the first substring that matches a given
344344
# Regexp or other object; returns +nil+ if no match is found.
345345
# - #byteindex: Returns the byte index of the first occurrence of a given substring.
346+
# - #byterindex: Returns the byte index of the last occurrence of a given substring.
346347
# - #index: Returns the index of the _first_ occurrence of a given substring;
347348
# returns +nil+ if none found.
348349
# - #rindex: Returns the index of the _last_ occurrence of a given substring;

0 commit comments

Comments
 (0)