diff --git a/benchmark/file_join.yml b/benchmark/file_join.yml
new file mode 100644
index 00000000000000..845257cf1e4a31
--- /dev/null
+++ b/benchmark/file_join.yml
@@ -0,0 +1,7 @@
+prelude: |
+ # frozen_string_literal: true
+benchmark:
+ two_strings: File.join(__FILE__, "path")
+ many_strings: File.join(__FILE__, "path", "a", "b", "c", "d")
+ array: File.join([__FILE__, "path", "a", "b", "c", "d"])
+ mixed: File.join(__FILE__, "path", "a", "b", ["c", "d"])
diff --git a/depend b/depend
index cfafc77703b9fa..7372902666b97b 100644
--- a/depend
+++ b/depend
@@ -799,6 +799,7 @@ box.$(OBJEXT): {$(VPATH)}constant.h
box.$(OBJEXT): {$(VPATH)}darray.h
box.$(OBJEXT): {$(VPATH)}debug_counter.h
box.$(OBJEXT): {$(VPATH)}defines.h
+box.$(OBJEXT): {$(VPATH)}encindex.h
box.$(OBJEXT): {$(VPATH)}encoding.h
box.$(OBJEXT): {$(VPATH)}eval_intern.h
box.$(OBJEXT): {$(VPATH)}id.h
@@ -1250,6 +1251,7 @@ class.$(OBJEXT): {$(VPATH)}config.h
class.$(OBJEXT): {$(VPATH)}constant.h
class.$(OBJEXT): {$(VPATH)}debug_counter.h
class.$(OBJEXT): {$(VPATH)}defines.h
+class.$(OBJEXT): {$(VPATH)}encindex.h
class.$(OBJEXT): {$(VPATH)}encoding.h
class.$(OBJEXT): {$(VPATH)}id.h
class.$(OBJEXT): {$(VPATH)}id_table.h
@@ -1449,6 +1451,7 @@ compar.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
compar.$(OBJEXT): {$(VPATH)}compar.c
compar.$(OBJEXT): {$(VPATH)}config.h
compar.$(OBJEXT): {$(VPATH)}defines.h
+compar.$(OBJEXT): {$(VPATH)}encindex.h
compar.$(OBJEXT): {$(VPATH)}encoding.h
compar.$(OBJEXT): {$(VPATH)}id.h
compar.$(OBJEXT): {$(VPATH)}intern.h
@@ -1921,6 +1924,7 @@ complex.$(OBJEXT): {$(VPATH)}config.h
complex.$(OBJEXT): {$(VPATH)}constant.h
complex.$(OBJEXT): {$(VPATH)}debug_counter.h
complex.$(OBJEXT): {$(VPATH)}defines.h
+complex.$(OBJEXT): {$(VPATH)}encindex.h
complex.$(OBJEXT): {$(VPATH)}encoding.h
complex.$(OBJEXT): {$(VPATH)}id.h
complex.$(OBJEXT): {$(VPATH)}id_table.h
@@ -2126,6 +2130,7 @@ concurrent_set.$(OBJEXT): {$(VPATH)}concurrent_set.c
concurrent_set.$(OBJEXT): {$(VPATH)}config.h
concurrent_set.$(OBJEXT): {$(VPATH)}debug_counter.h
concurrent_set.$(OBJEXT): {$(VPATH)}defines.h
+concurrent_set.$(OBJEXT): {$(VPATH)}encindex.h
concurrent_set.$(OBJEXT): {$(VPATH)}encoding.h
concurrent_set.$(OBJEXT): {$(VPATH)}id.h
concurrent_set.$(OBJEXT): {$(VPATH)}id_table.h
@@ -2364,6 +2369,7 @@ cont.$(OBJEXT): {$(VPATH)}constant.h
cont.$(OBJEXT): {$(VPATH)}cont.c
cont.$(OBJEXT): {$(VPATH)}debug_counter.h
cont.$(OBJEXT): {$(VPATH)}defines.h
+cont.$(OBJEXT): {$(VPATH)}encindex.h
cont.$(OBJEXT): {$(VPATH)}encoding.h
cont.$(OBJEXT): {$(VPATH)}eval_intern.h
cont.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@@ -4906,6 +4912,7 @@ enumerator.$(OBJEXT): {$(VPATH)}config.h
enumerator.$(OBJEXT): {$(VPATH)}constant.h
enumerator.$(OBJEXT): {$(VPATH)}debug_counter.h
enumerator.$(OBJEXT): {$(VPATH)}defines.h
+enumerator.$(OBJEXT): {$(VPATH)}encindex.h
enumerator.$(OBJEXT): {$(VPATH)}encoding.h
enumerator.$(OBJEXT): {$(VPATH)}enumerator.c
enumerator.$(OBJEXT): {$(VPATH)}id.h
@@ -5126,6 +5133,7 @@ error.$(OBJEXT): {$(VPATH)}config.h
error.$(OBJEXT): {$(VPATH)}constant.h
error.$(OBJEXT): {$(VPATH)}debug_counter.h
error.$(OBJEXT): {$(VPATH)}defines.h
+error.$(OBJEXT): {$(VPATH)}encindex.h
error.$(OBJEXT): {$(VPATH)}encoding.h
error.$(OBJEXT): {$(VPATH)}error.c
error.$(OBJEXT): {$(VPATH)}id.h
@@ -5373,6 +5381,7 @@ eval.$(OBJEXT): {$(VPATH)}config.h
eval.$(OBJEXT): {$(VPATH)}constant.h
eval.$(OBJEXT): {$(VPATH)}debug_counter.h
eval.$(OBJEXT): {$(VPATH)}defines.h
+eval.$(OBJEXT): {$(VPATH)}encindex.h
eval.$(OBJEXT): {$(VPATH)}encoding.h
eval.$(OBJEXT): {$(VPATH)}eval.c
eval.$(OBJEXT): {$(VPATH)}eval_error.c
@@ -5584,6 +5593,7 @@ file.$(OBJEXT): $(top_srcdir)/internal/array.h
file.$(OBJEXT): $(top_srcdir)/internal/class.h
file.$(OBJEXT): $(top_srcdir)/internal/compilers.h
file.$(OBJEXT): $(top_srcdir)/internal/dir.h
+file.$(OBJEXT): $(top_srcdir)/internal/encoding.h
file.$(OBJEXT): $(top_srcdir)/internal/error.h
file.$(OBJEXT): $(top_srcdir)/internal/file.h
file.$(OBJEXT): $(top_srcdir)/internal/gc.h
@@ -5865,6 +5875,7 @@ gc.$(OBJEXT): {$(VPATH)}darray.h
gc.$(OBJEXT): {$(VPATH)}debug.h
gc.$(OBJEXT): {$(VPATH)}debug_counter.h
gc.$(OBJEXT): {$(VPATH)}defines.h
+gc.$(OBJEXT): {$(VPATH)}encindex.h
gc.$(OBJEXT): {$(VPATH)}encoding.h
gc.$(OBJEXT): {$(VPATH)}eval_intern.h
gc.$(OBJEXT): {$(VPATH)}gc.c
@@ -6373,6 +6384,7 @@ hash.$(OBJEXT): {$(VPATH)}config.h
hash.$(OBJEXT): {$(VPATH)}constant.h
hash.$(OBJEXT): {$(VPATH)}debug_counter.h
hash.$(OBJEXT): {$(VPATH)}defines.h
+hash.$(OBJEXT): {$(VPATH)}encindex.h
hash.$(OBJEXT): {$(VPATH)}encoding.h
hash.$(OBJEXT): {$(VPATH)}hash.c
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
@@ -7203,6 +7215,7 @@ io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
io_buffer.$(OBJEXT): {$(VPATH)}config.h
io_buffer.$(OBJEXT): {$(VPATH)}defines.h
+io_buffer.$(OBJEXT): {$(VPATH)}encindex.h
io_buffer.$(OBJEXT): {$(VPATH)}encoding.h
io_buffer.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
io_buffer.$(OBJEXT): {$(VPATH)}id.h
@@ -7454,6 +7467,7 @@ iseq.$(OBJEXT): {$(VPATH)}config.h
iseq.$(OBJEXT): {$(VPATH)}constant.h
iseq.$(OBJEXT): {$(VPATH)}debug_counter.h
iseq.$(OBJEXT): {$(VPATH)}defines.h
+iseq.$(OBJEXT): {$(VPATH)}encindex.h
iseq.$(OBJEXT): {$(VPATH)}encoding.h
iseq.$(OBJEXT): {$(VPATH)}eval_intern.h
iseq.$(OBJEXT): {$(VPATH)}id.h
@@ -7702,6 +7716,7 @@ jit.$(OBJEXT): {$(VPATH)}config.h
jit.$(OBJEXT): {$(VPATH)}constant.h
jit.$(OBJEXT): {$(VPATH)}debug_counter.h
jit.$(OBJEXT): {$(VPATH)}defines.h
+jit.$(OBJEXT): {$(VPATH)}encindex.h
jit.$(OBJEXT): {$(VPATH)}encoding.h
jit.$(OBJEXT): {$(VPATH)}id.h
jit.$(OBJEXT): {$(VPATH)}id_table.h
@@ -7956,6 +7971,7 @@ load.$(OBJEXT): {$(VPATH)}constant.h
load.$(OBJEXT): {$(VPATH)}darray.h
load.$(OBJEXT): {$(VPATH)}defines.h
load.$(OBJEXT): {$(VPATH)}dln.h
+load.$(OBJEXT): {$(VPATH)}encindex.h
load.$(OBJEXT): {$(VPATH)}encoding.h
load.$(OBJEXT): {$(VPATH)}eval_intern.h
load.$(OBJEXT): {$(VPATH)}id.h
@@ -9979,6 +9995,7 @@ numeric.$(OBJEXT): {$(VPATH)}builtin.h
numeric.$(OBJEXT): {$(VPATH)}config.h
numeric.$(OBJEXT): {$(VPATH)}constant.h
numeric.$(OBJEXT): {$(VPATH)}defines.h
+numeric.$(OBJEXT): {$(VPATH)}encindex.h
numeric.$(OBJEXT): {$(VPATH)}encoding.h
numeric.$(OBJEXT): {$(VPATH)}id.h
numeric.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10200,6 +10217,7 @@ object.$(OBJEXT): {$(VPATH)}config.h
object.$(OBJEXT): {$(VPATH)}constant.h
object.$(OBJEXT): {$(VPATH)}debug_counter.h
object.$(OBJEXT): {$(VPATH)}defines.h
+object.$(OBJEXT): {$(VPATH)}encindex.h
object.$(OBJEXT): {$(VPATH)}encoding.h
object.$(OBJEXT): {$(VPATH)}id.h
object.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10418,6 +10436,7 @@ pack.$(OBJEXT): {$(VPATH)}builtin.h
pack.$(OBJEXT): {$(VPATH)}config.h
pack.$(OBJEXT): {$(VPATH)}constant.h
pack.$(OBJEXT): {$(VPATH)}defines.h
+pack.$(OBJEXT): {$(VPATH)}encindex.h
pack.$(OBJEXT): {$(VPATH)}encoding.h
pack.$(OBJEXT): {$(VPATH)}id.h
pack.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10644,6 +10663,7 @@ parse.$(OBJEXT): {$(VPATH)}config.h
parse.$(OBJEXT): {$(VPATH)}constant.h
parse.$(OBJEXT): {$(VPATH)}defines.h
parse.$(OBJEXT): {$(VPATH)}defs/keywords
+parse.$(OBJEXT): {$(VPATH)}encindex.h
parse.$(OBJEXT): {$(VPATH)}encoding.h
parse.$(OBJEXT): {$(VPATH)}id.h
parse.$(OBJEXT): {$(VPATH)}id_table.h
@@ -12125,6 +12145,7 @@ proc.$(OBJEXT): {$(VPATH)}config.h
proc.$(OBJEXT): {$(VPATH)}constant.h
proc.$(OBJEXT): {$(VPATH)}debug_counter.h
proc.$(OBJEXT): {$(VPATH)}defines.h
+proc.$(OBJEXT): {$(VPATH)}encindex.h
proc.$(OBJEXT): {$(VPATH)}encoding.h
proc.$(OBJEXT): {$(VPATH)}eval_intern.h
proc.$(OBJEXT): {$(VPATH)}id.h
@@ -12356,6 +12377,7 @@ process.$(OBJEXT): {$(VPATH)}constant.h
process.$(OBJEXT): {$(VPATH)}debug_counter.h
process.$(OBJEXT): {$(VPATH)}defines.h
process.$(OBJEXT): {$(VPATH)}dln.h
+process.$(OBJEXT): {$(VPATH)}encindex.h
process.$(OBJEXT): {$(VPATH)}encoding.h
process.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
process.$(OBJEXT): {$(VPATH)}hrtime.h
@@ -12585,6 +12607,7 @@ ractor.$(OBJEXT): {$(VPATH)}config.h
ractor.$(OBJEXT): {$(VPATH)}constant.h
ractor.$(OBJEXT): {$(VPATH)}debug_counter.h
ractor.$(OBJEXT): {$(VPATH)}defines.h
+ractor.$(OBJEXT): {$(VPATH)}encindex.h
ractor.$(OBJEXT): {$(VPATH)}encoding.h
ractor.$(OBJEXT): {$(VPATH)}eval_intern.h
ractor.$(OBJEXT): {$(VPATH)}id.h
@@ -13018,6 +13041,7 @@ range.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
range.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
range.$(OBJEXT): {$(VPATH)}config.h
range.$(OBJEXT): {$(VPATH)}defines.h
+range.$(OBJEXT): {$(VPATH)}encindex.h
range.$(OBJEXT): {$(VPATH)}encoding.h
range.$(OBJEXT): {$(VPATH)}id.h
range.$(OBJEXT): {$(VPATH)}id_table.h
@@ -14688,6 +14712,7 @@ ruby.$(OBJEXT): {$(VPATH)}constant.h
ruby.$(OBJEXT): {$(VPATH)}debug_counter.h
ruby.$(OBJEXT): {$(VPATH)}defines.h
ruby.$(OBJEXT): {$(VPATH)}dln.h
+ruby.$(OBJEXT): {$(VPATH)}encindex.h
ruby.$(OBJEXT): {$(VPATH)}encoding.h
ruby.$(OBJEXT): {$(VPATH)}eval_intern.h
ruby.$(OBJEXT): {$(VPATH)}id.h
@@ -14896,6 +14921,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
ruby_parser.$(OBJEXT): {$(VPATH)}config.h
ruby_parser.$(OBJEXT): {$(VPATH)}defines.h
+ruby_parser.$(OBJEXT): {$(VPATH)}encindex.h
ruby_parser.$(OBJEXT): {$(VPATH)}encoding.h
ruby_parser.$(OBJEXT): {$(VPATH)}intern.h
ruby_parser.$(OBJEXT): {$(VPATH)}internal.h
@@ -15306,6 +15332,7 @@ set.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
set.$(OBJEXT): {$(VPATH)}config.h
set.$(OBJEXT): {$(VPATH)}constant.h
set.$(OBJEXT): {$(VPATH)}defines.h
+set.$(OBJEXT): {$(VPATH)}encindex.h
set.$(OBJEXT): {$(VPATH)}encoding.h
set.$(OBJEXT): {$(VPATH)}id.h
set.$(OBJEXT): {$(VPATH)}id_table.h
@@ -15678,6 +15705,7 @@ shape.$(OBJEXT): {$(VPATH)}config.h
shape.$(OBJEXT): {$(VPATH)}constant.h
shape.$(OBJEXT): {$(VPATH)}debug_counter.h
shape.$(OBJEXT): {$(VPATH)}defines.h
+shape.$(OBJEXT): {$(VPATH)}encindex.h
shape.$(OBJEXT): {$(VPATH)}encoding.h
shape.$(OBJEXT): {$(VPATH)}id.h
shape.$(OBJEXT): {$(VPATH)}id_table.h
@@ -15892,6 +15920,7 @@ signal.$(OBJEXT): {$(VPATH)}config.h
signal.$(OBJEXT): {$(VPATH)}constant.h
signal.$(OBJEXT): {$(VPATH)}debug_counter.h
signal.$(OBJEXT): {$(VPATH)}defines.h
+signal.$(OBJEXT): {$(VPATH)}encindex.h
signal.$(OBJEXT): {$(VPATH)}encoding.h
signal.$(OBJEXT): {$(VPATH)}eval_intern.h
signal.$(OBJEXT): {$(VPATH)}id.h
@@ -16101,6 +16130,7 @@ sprintf.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
sprintf.$(OBJEXT): {$(VPATH)}config.h
sprintf.$(OBJEXT): {$(VPATH)}constant.h
sprintf.$(OBJEXT): {$(VPATH)}defines.h
+sprintf.$(OBJEXT): {$(VPATH)}encindex.h
sprintf.$(OBJEXT): {$(VPATH)}encoding.h
sprintf.$(OBJEXT): {$(VPATH)}id.h
sprintf.$(OBJEXT): {$(VPATH)}id_table.h
@@ -16457,6 +16487,7 @@ strftime.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
strftime.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
strftime.$(OBJEXT): {$(VPATH)}config.h
strftime.$(OBJEXT): {$(VPATH)}defines.h
+strftime.$(OBJEXT): {$(VPATH)}encindex.h
strftime.$(OBJEXT): {$(VPATH)}encoding.h
strftime.$(OBJEXT): {$(VPATH)}intern.h
strftime.$(OBJEXT): {$(VPATH)}internal.h
@@ -16925,6 +16956,7 @@ struct.$(OBJEXT): {$(VPATH)}config.h
struct.$(OBJEXT): {$(VPATH)}constant.h
struct.$(OBJEXT): {$(VPATH)}debug_counter.h
struct.$(OBJEXT): {$(VPATH)}defines.h
+struct.$(OBJEXT): {$(VPATH)}encindex.h
struct.$(OBJEXT): {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}id.h
struct.$(OBJEXT): {$(VPATH)}id_table.h
@@ -17141,6 +17173,7 @@ symbol.$(OBJEXT): {$(VPATH)}constant.h
symbol.$(OBJEXT): {$(VPATH)}darray.h
symbol.$(OBJEXT): {$(VPATH)}debug_counter.h
symbol.$(OBJEXT): {$(VPATH)}defines.h
+symbol.$(OBJEXT): {$(VPATH)}encindex.h
symbol.$(OBJEXT): {$(VPATH)}encoding.h
symbol.$(OBJEXT): {$(VPATH)}id.c
symbol.$(OBJEXT): {$(VPATH)}id.h
@@ -17398,6 +17431,7 @@ thread.$(OBJEXT): {$(VPATH)}constant.h
thread.$(OBJEXT): {$(VPATH)}debug.h
thread.$(OBJEXT): {$(VPATH)}debug_counter.h
thread.$(OBJEXT): {$(VPATH)}defines.h
+thread.$(OBJEXT): {$(VPATH)}encindex.h
thread.$(OBJEXT): {$(VPATH)}encoding.h
thread.$(OBJEXT): {$(VPATH)}eval_intern.h
thread.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@@ -17628,6 +17662,7 @@ time.$(OBJEXT): {$(VPATH)}builtin.h
time.$(OBJEXT): {$(VPATH)}config.h
time.$(OBJEXT): {$(VPATH)}constant.h
time.$(OBJEXT): {$(VPATH)}defines.h
+time.$(OBJEXT): {$(VPATH)}encindex.h
time.$(OBJEXT): {$(VPATH)}encoding.h
time.$(OBJEXT): {$(VPATH)}id.h
time.$(OBJEXT): {$(VPATH)}id_table.h
@@ -17830,6 +17865,7 @@ transcode.$(OBJEXT): {$(VPATH)}config.h
transcode.$(OBJEXT): {$(VPATH)}constant.h
transcode.$(OBJEXT): {$(VPATH)}debug_counter.h
transcode.$(OBJEXT): {$(VPATH)}defines.h
+transcode.$(OBJEXT): {$(VPATH)}encindex.h
transcode.$(OBJEXT): {$(VPATH)}encoding.h
transcode.$(OBJEXT): {$(VPATH)}id.h
transcode.$(OBJEXT): {$(VPATH)}id_table.h
@@ -18211,6 +18247,7 @@ variable.$(OBJEXT): {$(VPATH)}config.h
variable.$(OBJEXT): {$(VPATH)}constant.h
variable.$(OBJEXT): {$(VPATH)}debug_counter.h
variable.$(OBJEXT): {$(VPATH)}defines.h
+variable.$(OBJEXT): {$(VPATH)}encindex.h
variable.$(OBJEXT): {$(VPATH)}encoding.h
variable.$(OBJEXT): {$(VPATH)}id.h
variable.$(OBJEXT): {$(VPATH)}id_table.h
@@ -18687,6 +18724,7 @@ vm.$(OBJEXT): {$(VPATH)}constant.h
vm.$(OBJEXT): {$(VPATH)}debug_counter.h
vm.$(OBJEXT): {$(VPATH)}defines.h
vm.$(OBJEXT): {$(VPATH)}defs/opt_operand.def
+vm.$(OBJEXT): {$(VPATH)}encindex.h
vm.$(OBJEXT): {$(VPATH)}encoding.h
vm.$(OBJEXT): {$(VPATH)}eval_intern.h
vm.$(OBJEXT): {$(VPATH)}id.h
@@ -18951,6 +18989,7 @@ vm_backtrace.$(OBJEXT): {$(VPATH)}constant.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug_counter.h
vm_backtrace.$(OBJEXT): {$(VPATH)}defines.h
+vm_backtrace.$(OBJEXT): {$(VPATH)}encindex.h
vm_backtrace.$(OBJEXT): {$(VPATH)}encoding.h
vm_backtrace.$(OBJEXT): {$(VPATH)}eval_intern.h
vm_backtrace.$(OBJEXT): {$(VPATH)}id.h
@@ -20087,6 +20126,7 @@ yjit.$(OBJEXT): {$(VPATH)}constant.h
yjit.$(OBJEXT): {$(VPATH)}debug.h
yjit.$(OBJEXT): {$(VPATH)}debug_counter.h
yjit.$(OBJEXT): {$(VPATH)}defines.h
+yjit.$(OBJEXT): {$(VPATH)}encindex.h
yjit.$(OBJEXT): {$(VPATH)}encoding.h
yjit.$(OBJEXT): {$(VPATH)}id.h
yjit.$(OBJEXT): {$(VPATH)}id_table.h
@@ -20342,6 +20382,7 @@ zjit.$(OBJEXT): {$(VPATH)}constant.h
zjit.$(OBJEXT): {$(VPATH)}debug.h
zjit.$(OBJEXT): {$(VPATH)}debug_counter.h
zjit.$(OBJEXT): {$(VPATH)}defines.h
+zjit.$(OBJEXT): {$(VPATH)}encindex.h
zjit.$(OBJEXT): {$(VPATH)}encoding.h
zjit.$(OBJEXT): {$(VPATH)}id.h
zjit.$(OBJEXT): {$(VPATH)}id_table.h
diff --git a/doc/contributing/documentation_guide.md b/doc/contributing/documentation_guide.md
index 29a1c72b02f0de..4b1e2ac9adaf2e 100644
--- a/doc/contributing/documentation_guide.md
+++ b/doc/contributing/documentation_guide.md
@@ -265,16 +265,16 @@ and _never_ when referring to the class itself.
When writing an explicit link, follow these guidelines.
-#### +rdoc-ref+ Scheme
+#### `rdoc-ref` Scheme
-Use the +rdoc-ref+ scheme for:
+Use the `rdoc-ref` scheme for:
- A link in core documentation to other core documentation.
- A link in core documentation to documentation in a standard library package.
- A link in a standard library package to other documentation in that same
standard library package.
-See section "+rdoc-ref+ Scheme" in [links].
+See section "`rdoc-ref` Scheme" in [links].
#### URL-Based Link
@@ -297,7 +297,7 @@ The name of a variable (as specified in its call-seq) should be marked up as
[monofont].
Also, use monofont text for the name of a transient variable
-(i.e., one defined and used only in the discussion, such as +n+).
+(i.e., one defined and used only in the discussion, such as `n`).
### HTML Tags
@@ -491,10 +491,10 @@ Return types:
- If the method can return multiple different types,
separate the types with "or" and, if necessary, commas.
-- If the method can return multiple types, use +object+.
-- If the method returns the receiver, use +self+.
+- If the method can return multiple types, use `object`.
+- If the method returns the receiver, use `self`.
- If the method returns an object of the same class,
- prefix `new_` if and only if the object is not +self+;
+ prefix `new_` if and only if the object is not `self`;
example: `new_array`.
Aliases:
diff --git a/doc/language/exceptions.md b/doc/language/exceptions.md
index 2c47455911376f..5f8f0ece69d2e6 100644
--- a/doc/language/exceptions.md
+++ b/doc/language/exceptions.md
@@ -504,18 +504,18 @@ These methods return backtrace information:
By default, Ruby sets the backtrace of the exception to the location where it
was raised.
-The developer might adjust this by either providing +backtrace+ argument
+The developer might adjust this by either providing `backtrace` argument
to Kernel#raise, or using Exception#set_backtrace.
Note that:
-- by default, both +backtrace+ and +backtrace_locations+ represent the same backtrace;
+- by default, both `backtrace` and `backtrace_locations` represent the same backtrace;
- if the developer sets the backtrace by one of the above methods to an array of
Thread::Backtrace::Location, they still represent the same backtrace;
- if the developer sets the backtrace to a string or an array of strings:
- - by Kernel#raise: +backtrace_locations+ become +nil+;
- - by Exception#set_backtrace: +backtrace_locations+ preserve the original
+ - by Kernel#raise: `backtrace_locations` become `nil`;
+ - by Exception#set_backtrace: `backtrace_locations` preserve the original
value;
-- if the developer sets the backtrace to +nil+ by Exception#set_backtrace,
- +backtrace_locations+ preserve the original value; but if the exception is then
- reraised, both +backtrace+ and +backtrace_locations+ become the location of reraise.
+- if the developer sets the backtrace to `nil` by Exception#set_backtrace,
+ `backtrace_locations` preserve the original value; but if the exception is then
+ reraised, both `backtrace` and `backtrace_locations` become the location of reraise.
diff --git a/doc/language/options.md b/doc/language/options.md
index cca87f42dedf0f..3421d73f552b40 100644
--- a/doc/language/options.md
+++ b/doc/language/options.md
@@ -640,7 +640,7 @@ Option `--encoding` is an alias for
Option `--external-encoding`
sets the default external encoding for the invoked Ruby program;
-for values of +encoding+,
+for values of `encoding`,
see {Encoding: Names and Aliases}[rdoc-ref:encodings.rdoc@Names+and+Aliases].
```console
@@ -662,7 +662,7 @@ For a shorter help message, use option `-h`.
Option `--internal-encoding`
sets the default internal encoding for the invoked Ruby program;
-for values of +encoding+,
+for values of `encoding`,
see {Encoding: Names and Aliases}[rdoc-ref:encodings.rdoc@Names+and+Aliases].
```console
diff --git a/ext/-test-/stack/depend b/ext/-test-/stack/depend
index 31571c882e6eb7..77e93bb201db09 100644
--- a/ext/-test-/stack/depend
+++ b/ext/-test-/stack/depend
@@ -172,6 +172,7 @@ stack.o: $(hdrdir)/ruby/oniguruma.h
stack.o: $(hdrdir)/ruby/ruby.h
stack.o: $(hdrdir)/ruby/st.h
stack.o: $(hdrdir)/ruby/subst.h
+stack.o: $(top_srcdir)/encindex.h
stack.o: $(top_srcdir)/internal/compilers.h
stack.o: $(top_srcdir)/internal/string.h
stack.o: stack.c
diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend
index de6e775accf395..478ae3b82b7500 100644
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@@ -172,6 +172,7 @@ capacity.o: $(hdrdir)/ruby/oniguruma.h
capacity.o: $(hdrdir)/ruby/ruby.h
capacity.o: $(hdrdir)/ruby/st.h
capacity.o: $(hdrdir)/ruby/subst.h
+capacity.o: $(top_srcdir)/encindex.h
capacity.o: $(top_srcdir)/internal/compilers.h
capacity.o: $(top_srcdir)/internal/string.h
capacity.o: capacity.c
@@ -679,6 +680,7 @@ cstr.o: $(hdrdir)/ruby/oniguruma.h
cstr.o: $(hdrdir)/ruby/ruby.h
cstr.o: $(hdrdir)/ruby/st.h
cstr.o: $(hdrdir)/ruby/subst.h
+cstr.o: $(top_srcdir)/encindex.h
cstr.o: $(top_srcdir)/internal.h
cstr.o: $(top_srcdir)/internal/compilers.h
cstr.o: $(top_srcdir)/internal/string.h
@@ -1535,6 +1537,7 @@ fstring.o: $(hdrdir)/ruby/oniguruma.h
fstring.o: $(hdrdir)/ruby/ruby.h
fstring.o: $(hdrdir)/ruby/st.h
fstring.o: $(hdrdir)/ruby/subst.h
+fstring.o: $(top_srcdir)/encindex.h
fstring.o: $(top_srcdir)/internal/compilers.h
fstring.o: $(top_srcdir)/internal/string.h
fstring.o: fstring.c
diff --git a/ext/objspace/depend b/ext/objspace/depend
index 04b26eb6c26567..d9dfc0c42b4740 100644
--- a/ext/objspace/depend
+++ b/ext/objspace/depend
@@ -602,6 +602,7 @@ objspace_dump.o: $(top_srcdir)/ccan/list/list.h
objspace_dump.o: $(top_srcdir)/ccan/str/str.h
objspace_dump.o: $(top_srcdir)/constant.h
objspace_dump.o: $(top_srcdir)/debug_counter.h
+objspace_dump.o: $(top_srcdir)/encindex.h
objspace_dump.o: $(top_srcdir)/id_table.h
objspace_dump.o: $(top_srcdir)/internal.h
objspace_dump.o: $(top_srcdir)/internal/array.h
diff --git a/ext/ripper/depend b/ext/ripper/depend
index bd2de759065ba2..944da25ee94f38 100644
--- a/ext/ripper/depend
+++ b/ext/ripper/depend
@@ -578,6 +578,7 @@ ripper.o: $(top_srcdir)/ccan/container_of/container_of.h
ripper.o: $(top_srcdir)/ccan/list/list.h
ripper.o: $(top_srcdir)/ccan/str/str.h
ripper.o: $(top_srcdir)/constant.h
+ripper.o: $(top_srcdir)/encindex.h
ripper.o: $(top_srcdir)/id_table.h
ripper.o: $(top_srcdir)/internal.h
ripper.o: $(top_srcdir)/internal/array.h
diff --git a/ext/socket/depend b/ext/socket/depend
index 3573dc45e2fa9f..77f6239a3da52b 100644
--- a/ext/socket/depend
+++ b/ext/socket/depend
@@ -193,6 +193,7 @@ ancdata.o: $(top_srcdir)/ccan/check_type/check_type.h
ancdata.o: $(top_srcdir)/ccan/container_of/container_of.h
ancdata.o: $(top_srcdir)/ccan/list/list.h
ancdata.o: $(top_srcdir)/ccan/str/str.h
+ancdata.o: $(top_srcdir)/encindex.h
ancdata.o: $(top_srcdir)/id_table.h
ancdata.o: $(top_srcdir)/internal.h
ancdata.o: $(top_srcdir)/internal/array.h
@@ -408,6 +409,7 @@ basicsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
basicsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
basicsocket.o: $(top_srcdir)/ccan/list/list.h
basicsocket.o: $(top_srcdir)/ccan/str/str.h
+basicsocket.o: $(top_srcdir)/encindex.h
basicsocket.o: $(top_srcdir)/id_table.h
basicsocket.o: $(top_srcdir)/internal.h
basicsocket.o: $(top_srcdir)/internal/array.h
@@ -623,6 +625,7 @@ constants.o: $(top_srcdir)/ccan/check_type/check_type.h
constants.o: $(top_srcdir)/ccan/container_of/container_of.h
constants.o: $(top_srcdir)/ccan/list/list.h
constants.o: $(top_srcdir)/ccan/str/str.h
+constants.o: $(top_srcdir)/encindex.h
constants.o: $(top_srcdir)/id_table.h
constants.o: $(top_srcdir)/internal.h
constants.o: $(top_srcdir)/internal/array.h
@@ -839,6 +842,7 @@ ifaddr.o: $(top_srcdir)/ccan/check_type/check_type.h
ifaddr.o: $(top_srcdir)/ccan/container_of/container_of.h
ifaddr.o: $(top_srcdir)/ccan/list/list.h
ifaddr.o: $(top_srcdir)/ccan/str/str.h
+ifaddr.o: $(top_srcdir)/encindex.h
ifaddr.o: $(top_srcdir)/id_table.h
ifaddr.o: $(top_srcdir)/internal.h
ifaddr.o: $(top_srcdir)/internal/array.h
@@ -1054,6 +1058,7 @@ init.o: $(top_srcdir)/ccan/check_type/check_type.h
init.o: $(top_srcdir)/ccan/container_of/container_of.h
init.o: $(top_srcdir)/ccan/list/list.h
init.o: $(top_srcdir)/ccan/str/str.h
+init.o: $(top_srcdir)/encindex.h
init.o: $(top_srcdir)/id_table.h
init.o: $(top_srcdir)/internal.h
init.o: $(top_srcdir)/internal/array.h
@@ -1269,6 +1274,7 @@ ipsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
ipsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
ipsocket.o: $(top_srcdir)/ccan/list/list.h
ipsocket.o: $(top_srcdir)/ccan/str/str.h
+ipsocket.o: $(top_srcdir)/encindex.h
ipsocket.o: $(top_srcdir)/id_table.h
ipsocket.o: $(top_srcdir)/internal.h
ipsocket.o: $(top_srcdir)/internal/array.h
@@ -1484,6 +1490,7 @@ option.o: $(top_srcdir)/ccan/check_type/check_type.h
option.o: $(top_srcdir)/ccan/container_of/container_of.h
option.o: $(top_srcdir)/ccan/list/list.h
option.o: $(top_srcdir)/ccan/str/str.h
+option.o: $(top_srcdir)/encindex.h
option.o: $(top_srcdir)/id_table.h
option.o: $(top_srcdir)/internal.h
option.o: $(top_srcdir)/internal/array.h
@@ -1699,6 +1706,7 @@ raddrinfo.o: $(top_srcdir)/ccan/check_type/check_type.h
raddrinfo.o: $(top_srcdir)/ccan/container_of/container_of.h
raddrinfo.o: $(top_srcdir)/ccan/list/list.h
raddrinfo.o: $(top_srcdir)/ccan/str/str.h
+raddrinfo.o: $(top_srcdir)/encindex.h
raddrinfo.o: $(top_srcdir)/id_table.h
raddrinfo.o: $(top_srcdir)/internal.h
raddrinfo.o: $(top_srcdir)/internal/array.h
@@ -1914,6 +1922,7 @@ socket.o: $(top_srcdir)/ccan/check_type/check_type.h
socket.o: $(top_srcdir)/ccan/container_of/container_of.h
socket.o: $(top_srcdir)/ccan/list/list.h
socket.o: $(top_srcdir)/ccan/str/str.h
+socket.o: $(top_srcdir)/encindex.h
socket.o: $(top_srcdir)/id_table.h
socket.o: $(top_srcdir)/internal.h
socket.o: $(top_srcdir)/internal/array.h
@@ -2129,6 +2138,7 @@ sockssocket.o: $(top_srcdir)/ccan/check_type/check_type.h
sockssocket.o: $(top_srcdir)/ccan/container_of/container_of.h
sockssocket.o: $(top_srcdir)/ccan/list/list.h
sockssocket.o: $(top_srcdir)/ccan/str/str.h
+sockssocket.o: $(top_srcdir)/encindex.h
sockssocket.o: $(top_srcdir)/id_table.h
sockssocket.o: $(top_srcdir)/internal.h
sockssocket.o: $(top_srcdir)/internal/array.h
@@ -2344,6 +2354,7 @@ tcpserver.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpserver.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpserver.o: $(top_srcdir)/ccan/list/list.h
tcpserver.o: $(top_srcdir)/ccan/str/str.h
+tcpserver.o: $(top_srcdir)/encindex.h
tcpserver.o: $(top_srcdir)/id_table.h
tcpserver.o: $(top_srcdir)/internal.h
tcpserver.o: $(top_srcdir)/internal/array.h
@@ -2559,6 +2570,7 @@ tcpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpsocket.o: $(top_srcdir)/ccan/list/list.h
tcpsocket.o: $(top_srcdir)/ccan/str/str.h
+tcpsocket.o: $(top_srcdir)/encindex.h
tcpsocket.o: $(top_srcdir)/id_table.h
tcpsocket.o: $(top_srcdir)/internal.h
tcpsocket.o: $(top_srcdir)/internal/array.h
@@ -2774,6 +2786,7 @@ udpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
udpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
udpsocket.o: $(top_srcdir)/ccan/list/list.h
udpsocket.o: $(top_srcdir)/ccan/str/str.h
+udpsocket.o: $(top_srcdir)/encindex.h
udpsocket.o: $(top_srcdir)/id_table.h
udpsocket.o: $(top_srcdir)/internal.h
udpsocket.o: $(top_srcdir)/internal/array.h
@@ -2989,6 +3002,7 @@ unixserver.o: $(top_srcdir)/ccan/check_type/check_type.h
unixserver.o: $(top_srcdir)/ccan/container_of/container_of.h
unixserver.o: $(top_srcdir)/ccan/list/list.h
unixserver.o: $(top_srcdir)/ccan/str/str.h
+unixserver.o: $(top_srcdir)/encindex.h
unixserver.o: $(top_srcdir)/id_table.h
unixserver.o: $(top_srcdir)/internal.h
unixserver.o: $(top_srcdir)/internal/array.h
@@ -3204,6 +3218,7 @@ unixsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
unixsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
unixsocket.o: $(top_srcdir)/ccan/list/list.h
unixsocket.o: $(top_srcdir)/ccan/str/str.h
+unixsocket.o: $(top_srcdir)/encindex.h
unixsocket.o: $(top_srcdir)/id_table.h
unixsocket.o: $(top_srcdir)/internal.h
unixsocket.o: $(top_srcdir)/internal/array.h
diff --git a/file.c b/file.c
index 89294ea009e7b3..867b041a44af0f 100644
--- a/file.c
+++ b/file.c
@@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp;
#include "internal.h"
#include "internal/compilers.h"
#include "internal/dir.h"
+#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/file.h"
#include "internal/io.h"
@@ -3723,7 +3724,7 @@ rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
static rb_encoding *
fs_enc_check(VALUE path1, VALUE path2)
{
- rb_encoding *enc = rb_enc_check(path1, path2);
+ rb_encoding *enc = rb_enc_check_str(path1, path2);
int encidx = rb_enc_to_index(enc);
if (encidx == ENCINDEX_US_ASCII) {
encidx = rb_enc_get_index(path1);
@@ -4651,7 +4652,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
return resolved;
}
-static VALUE rb_file_join(VALUE ary);
+static VALUE rb_file_join(long argc, VALUE *args);
#ifndef HAVE_REALPATH
static VALUE
@@ -4692,7 +4693,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum
unresolved_path = rb_str_dup_frozen(path);
if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) {
- unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path));
+ VALUE paths[2] = {basedir, unresolved_path};
+ unresolved_path = rb_file_join(2, paths);
}
if (origenc) unresolved_path = TO_OSPATH(unresolved_path);
@@ -5255,15 +5257,17 @@ rb_file_s_split(VALUE klass, VALUE path)
return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef));
}
+static VALUE rb_file_join_ary(VALUE ary);
+
static VALUE
file_inspect_join(VALUE ary, VALUE arg, int recur)
{
if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array");
- return rb_file_join(arg);
+ return rb_file_join_ary(arg);
}
static VALUE
-rb_file_join(VALUE ary)
+rb_file_join_ary(VALUE ary)
{
long len, i;
VALUE result, tmp;
@@ -5328,6 +5332,77 @@ rb_file_join(VALUE ary)
return result;
}
+static inline VALUE
+rb_file_join_fastpath(long argc, VALUE *args)
+{
+ long size = argc;
+
+ long i;
+ for (i = 0; i < argc; i++) {
+ VALUE tmp = args[i];
+ if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) {
+ size += RSTRING_LEN(tmp);
+ }
+ else {
+ return 0;
+ }
+ }
+
+ VALUE result = rb_str_buf_new(size);
+
+ int encidx = ENCODING_GET_INLINED(args[0]);
+ ENCODING_SET_INLINED(result, encidx);
+ rb_str_buf_append(result, args[0]);
+
+ const char *name = RSTRING_PTR(result);
+ for (i = 1; i < argc; i++) {
+ VALUE tmp = args[i];
+ long len = RSTRING_LEN(result);
+
+ const char *tmp_s;
+ long tmp_len;
+ RSTRING_GETMEM(tmp, tmp_s, tmp_len);
+
+ if (isdirsep(tmp_s[0])) {
+ // right side has a leading separator, remove left side separators.
+ long trailing_seps = 0;
+ while (isdirsep(name[len - trailing_seps - 1])) {
+ trailing_seps++;
+ }
+ rb_str_set_len(result, len - trailing_seps);
+ }
+ else if (!isdirsep(name[len - 1])) {
+ // neither side have a separator, append one;
+ rb_str_cat(result, "/", 1);
+ }
+
+ if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) {
+ rb_encoding *new_enc = fs_enc_check(result, tmp);
+ rb_enc_associate(result, new_enc);
+ encidx = rb_enc_to_index(new_enc);
+ }
+
+ rb_str_buf_cat(result, tmp_s, tmp_len);
+ }
+
+ rb_str_null_check(result);
+ return result;
+}
+
+static inline VALUE
+rb_file_join(long argc, VALUE *args)
+{
+ if (RB_UNLIKELY(argc == 0)) {
+ return rb_str_new(0, 0);
+ }
+
+ VALUE result = rb_file_join_fastpath(argc, args);
+ if (RB_LIKELY(result)) {
+ return result;
+ }
+
+ return rb_file_join_ary(rb_ary_new_from_values(argc, args));
+}
/*
* call-seq:
* File.join(string, ...) -> string
@@ -5340,9 +5415,9 @@ rb_file_join(VALUE ary)
*/
static VALUE
-rb_file_s_join(VALUE klass, VALUE args)
+rb_file_s_join(int argc, VALUE *argv, VALUE klass)
{
- return rb_file_join(args);
+ return rb_file_join(argc, argv);
}
#if defined(HAVE_TRUNCATE)
@@ -5482,7 +5557,7 @@ rb_thread_flock(void *data)
* call-seq:
* flock(locking_constant) -> 0 or false
*
- * Locks or unlocks file +self+ according to the given `locking_constant`,
+ * Locks or unlocks file `self` according to the given `locking_constant`,
* a bitwise OR of the values in the table below.
*
* Not available on all platforms.
@@ -5492,10 +5567,10 @@ rb_thread_flock(void *data)
*
* | Constant | Lock | Effect
* |-----------------|--------------|-----------------------------------------------------------------------------------------------------------------|
- * | +File::LOCK_EX+ | Exclusive | Only one process may hold an exclusive lock for +self+ at a time. |
- * | +File::LOCK_NB+ | Non-blocking | No blocking; may be combined with +File::LOCK_SH+ or +File::LOCK_EX+ using the bitwise OR operator \|. |
- * | +File::LOCK_SH+ | Shared | Multiple processes may each hold a shared lock for +self+ at the same time. |
- * | +File::LOCK_UN+ | Unlock | Remove an existing lock held by this process. |
+ * | `File::LOCK_EX` | Exclusive | Only one process may hold an exclusive lock for `self` at a time. |
+ * | `File::LOCK_NB` | Non-blocking | No blocking; may be combined with `File::LOCK_SH` or `File::LOCK_EX` using the bitwise OR operator \|. |
+ * | `File::LOCK_SH` | Shared | Multiple processes may each hold a shared lock for `self` at the same time. |
+ * | `File::LOCK_UN` | Unlock | Remove an existing lock held by this process. |
*
* Example:
*
@@ -5622,11 +5697,11 @@ test_check(int n, int argc, VALUE *argv)
* | 'z' | Whether the entity exists and is of length zero. |
*
* - This test operates only on the entity at `path0`,
- * and returns an integer size or +nil+:
+ * and returns an integer size or `nil`:
*
* | Character | Test |
* |:------------:|:---------------------------------------------------------------------------------------------|
- * | 's' | Returns positive integer size if the entity exists and has non-zero length, +nil+ otherwise. |
+ * | 's' | Returns positive integer size if the entity exists and has non-zero length, `nil` otherwise. |
*
* - Each of these tests operates only on the entity at `path0`,
* and returns a Time object;
@@ -7584,7 +7659,7 @@ Init_File(void)
/* separates directory parts in path */
rb_define_const(rb_cFile, "SEPARATOR", separator);
rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1);
- rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2);
+ rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1);
#ifdef DOSISH
/* platform specific alternative separator */
diff --git a/gc.c b/gc.c
index 1fe3dbf0aebb4b..ab0539cd3358a3 100644
--- a/gc.c
+++ b/gc.c
@@ -1709,63 +1709,68 @@ rb_gc_copy_finalizer(VALUE dest, VALUE obj)
/*
* call-seq:
- * ObjectSpace.define_finalizer(obj, aProc=proc())
+ * ObjectSpace.define_finalizer(obj) {|id| ... } -> array
+ * ObjectSpace.define_finalizer(obj, finalizer) -> array
*
- * Adds aProc as a finalizer, to be called after obj
- * was destroyed. The object ID of the obj will be passed
- * as an argument to aProc. If aProc is a lambda or
- * method, make sure it can be called with a single argument.
+ * Adds a new finalizer for +obj+ that is called when +obj+ is destroyed
+ * by the garbage collector or when Ruby shuts down (which ever comes first).
*
- * The return value is an array [0, aProc].
+ * With a block given, uses the block as the callback. Without a block given,
+ * uses a callable object +finalizer+ as the callback. The callback is called
+ * when +obj+ is destroyed with a single argument +id+ which is the object
+ * ID of +obj+ (see Object#object_id).
*
- * The two recommended patterns are to either create the finaliser proc
- * in a non-instance method where it can safely capture the needed state,
- * or to use a custom callable object that stores the needed state
- * explicitly as instance variables.
+ * The return value is an array [0, callback], where +callback+
+ * is a Proc created from the block if one was given or +finalizer+ otherwise.
*
- * class Foo
- * def initialize(data_needed_for_finalization)
- * ObjectSpace.define_finalizer(self, self.class.create_finalizer(data_needed_for_finalization))
- * end
+ * Note that defining a finalizer in an instance method of the object may prevent
+ * the object from being garbage collected since if the block or +finalizer+ refers
+ * to +obj+ then +obj+ will never be reclaimed by the garbage collector. For example,
+ * the following script demonstrates the issue:
*
- * def self.create_finalizer(data_needed_for_finalization)
- * proc {
- * puts "finalizing #{data_needed_for_finalization}"
- * }
+ * class Foo
+ * def define_final
+ * ObjectSpace.define_finalizer(self) do |id|
+ * puts "Running finalizer for #{id}!"
+ * end
* end
* end
*
- * class Bar
- * class Remover
- * def initialize(data_needed_for_finalization)
- * @data_needed_for_finalization = data_needed_for_finalization
- * end
+ * obj = Foo.new
+ * obj.define_final
*
- * def call(id)
- * puts "finalizing #{@data_needed_for_finalization}"
- * end
+ * There are two patterns to solve this issue:
+ *
+ * - Create the finalizer in a non-instance method so it can safely capture
+ * the needed state:
+ *
+ * class Foo
+ * def define_final
+ * ObjectSpace.define_finalizer(self, self.class.create_finalizer)
* end
*
- * def initialize(data_needed_for_finalization)
- * ObjectSpace.define_finalizer(self, Remover.new(data_needed_for_finalization))
+ * def self.create_finalizer
+ * proc do |id|
+ * puts "Running finalizer for #{id}!"
+ * end
* end
* end
*
- * Note that if your finalizer references the object to be
- * finalized it will never be run on GC, although it will still be
- * run at exit. You will get a warning if you capture the object
- * to be finalized as the receiver of the finalizer.
+ * - Use a callable object:
+ *
+ * class Foo
+ * class Finalizer
+ * def call(id)
+ * puts "Running finalizer for #{id}!"
+ * end
+ * end
*
- * class CapturesSelf
- * def initialize(name)
- * ObjectSpace.define_finalizer(self, proc {
- * # this finalizer will only be run on exit
- * puts "finalizing #{name}"
- * })
+ * def define_final
+ * ObjectSpace.define_finalizer(self, Finalizer.new)
* end
* end
*
- * Also note that finalization can be unpredictable and is never guaranteed
+ * Note that finalization can be unpredictable and is never guaranteed
* to be run except on exit.
*/
diff --git a/internal/string.h b/internal/string.h
index d6fea62061ddfa..cd1e8d79296ef4 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -14,6 +14,7 @@
#include "ruby/internal/stdbool.h" /* for bool */
#include "ruby/encoding.h" /* for rb_encoding */
#include "ruby/ruby.h" /* for VALUE */
+#include "encindex.h"
#define STR_SHARED FL_USER0 /* = ELTS_SHARED */
#define STR_NOEMBED FL_USER1
@@ -29,6 +30,26 @@ enum ruby_rstring_private_flags {
# undef rb_fstring_cstr
#endif
+static inline bool
+rb_str_encindex_fastpath(int encindex)
+{
+ // The overwhelming majority of strings are in one of these 3 encodings.
+ switch (encindex) {
+ case ENCINDEX_ASCII_8BIT:
+ case ENCINDEX_UTF_8:
+ case ENCINDEX_US_ASCII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+rb_str_enc_fastpath(VALUE str)
+{
+ return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
+}
+
/* string.c */
VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE);
@@ -63,6 +84,7 @@ bool rb_str_reembeddable_p(VALUE);
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
VALUE rb_str_with_debug_created_info(VALUE, VALUE, int);
VALUE rb_str_frozen_bare_string(VALUE);
+const char *rb_str_null_check(VALUE);
/* error.c */
void rb_warn_unchilled_literal(VALUE str);
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 46f6130357e024..b7c54178ac3193 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -225,14 +225,6 @@ def state
end
end
- # Ripper doesn't include the rest of the token in the event, so we need to
- # trim it down to just the content on the first line when comparing.
- class EndContentToken < Token
- def ==(other) # :nodoc:
- [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
- end
- end
-
# Tokens where state should be ignored
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
class IgnoreStateToken < Token
@@ -680,7 +672,10 @@ def result
token =
case event
when :on___end__
- EndContentToken.new([[lineno, column], event, value, lex_state])
+ # Ripper doesn't include the rest of the token in the event, so we need to
+ # trim it down to just the content on the first line.
+ value = value[0..value.index("\n")]
+ Token.new([[lineno, column], event, value, lex_state])
when :on_comment
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_heredoc_end
diff --git a/string.c b/string.c
index 2d74c46a360aa8..1e0b9929ef150c 100644
--- a/string.c
+++ b/string.c
@@ -146,27 +146,7 @@ VALUE rb_cSymbol;
RSTRING(str)->len = (n); \
} while (0)
-static inline bool
-str_encindex_fastpath(int encindex)
-{
- // The overwhelming majority of strings are in one of these 3 encodings.
- switch (encindex) {
- case ENCINDEX_ASCII_8BIT:
- case ENCINDEX_UTF_8:
- case ENCINDEX_US_ASCII:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool
-str_enc_fastpath(VALUE str)
-{
- return str_encindex_fastpath(ENCODING_GET_INLINED(str));
-}
-
-#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
+#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
@@ -960,7 +940,7 @@ static inline bool
rb_enc_str_asciicompat(VALUE str)
{
int encindex = ENCODING_GET_INLINED(str);
- return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
+ return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
}
int
@@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str)
rb_raise(rb_eTypeError, "not encoding capable object");
}
- if (RB_LIKELY(str_encindex_fastpath(encindex))) {
+ if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
return;
}
@@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w)
{
char *s = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
- rb_encoding *enc = rb_enc_get(str);
- const int minlen = rb_enc_mbminlen(enc);
+ int minlen = 1;
+
+ if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+ rb_encoding *enc = rb_enc_get(str);
+ minlen = rb_enc_mbminlen(enc);
- if (minlen > 1) {
- *w = 1;
- if (str_null_char(s, len, minlen, enc)) {
- return NULL;
+ if (minlen > 1) {
+ *w = 1;
+ if (str_null_char(s, len, minlen, enc)) {
+ return NULL;
+ }
+ return str_fill_term(str, s, len, minlen);
}
- return str_fill_term(str, s, len, minlen);
}
+
*w = 0;
if (!s || memchr(s, 0, len)) {
return NULL;
@@ -2917,6 +2902,34 @@ str_null_check(VALUE str, int *w)
return s;
}
+const char *
+rb_str_null_check(VALUE str)
+{
+ RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
+
+ char *s;
+ long len;
+ RSTRING_GETMEM(str, s, len);
+
+ if (RB_LIKELY(rb_str_enc_fastpath(str))) {
+ if (!s || memchr(s, 0, len)) {
+ rb_raise(rb_eArgError, "string contains null byte");
+ }
+ }
+ else {
+ int w;
+ const char *s = str_null_check(str, &w);
+ if (!s) {
+ if (w) {
+ rb_raise(rb_eArgError, "string contains null char");
+ }
+ rb_raise(rb_eArgError, "string contains null byte");
+ }
+ }
+
+ return s;
+}
+
char *
rb_str_to_cstr(VALUE str)
{
@@ -3765,7 +3778,7 @@ rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
- if (str_enc_fastpath(str)) {
+ if (rb_str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
// If RHS is 7bit we can do simple concatenation
diff --git a/test/prism/fixtures/__END__.txt b/test/prism/fixtures/__END__.txt
new file mode 100644
index 00000000000000..c0f4f28004cdc5
--- /dev/null
+++ b/test/prism/fixtures/__END__.txt
@@ -0,0 +1,3 @@
+foo
+__END__
+Available in DATA constant