Skip to content

Commit 7e0e998

Browse files
committed
rb_file_join_fastpath: optimize searching for separators
`chompdirsep` searches from the start of the string each time, which perhaps is necessary for certain encodings (not even sure?) but for the common encodings it's very wasteful. Instead we can start from the back of the string and only compare one or two characters in most cases. Also replace `StringValueCStr` for the simpler `rb_str_null_check` as we only care about whether the string contains `NULL` bytes, we don't care whether it is NULL terminated or not. We also only check the final string for NULLs. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-18T12:55:15Z spedup-file-join 5948e92e03) +PRISM [arm64-darwin25] warming up.... | |compare-ruby|built-ruby| |:-------------|-----------:|---------:| |two_strings | 2.477M| 19.317M| | | -| 7.80x| |many_strings | 547.577k| 10.298M| | | -| 18.81x| |array | 515.280k| 523.291k| | | -| 1.02x| |mixed | 621.840k| 635.422k| | | -| 1.02x| ```
1 parent 6cd4549 commit 7e0e998

File tree

3 files changed

+44
-22
lines changed

3 files changed

+44
-22
lines changed

file.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3714,22 +3714,6 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc)
37143714
return (char *)path;
37153715
}
37163716

3717-
static char *
3718-
single_byte_chompdirsep(const char *path, const char *end)
3719-
{
3720-
while (path < end) {
3721-
if (isdirsep(*path)) {
3722-
const char *last = path++;
3723-
while (path < end && isdirsep(*path)) path++;
3724-
if (path >= end) return (char *)last;
3725-
}
3726-
else {
3727-
path++;
3728-
}
3729-
}
3730-
return (char *)path;
3731-
}
3732-
37333717
char *
37343718
rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
37353719
{
@@ -5374,14 +5358,22 @@ rb_file_join_fastpath(long argc, VALUE *args)
53745358
const char *name = RSTRING_PTR(result);
53755359
for (i = 1; i < argc; i++) {
53765360
VALUE tmp = args[i];
5377-
StringValueCStr(tmp);
53785361
long len = RSTRING_LEN(result);
53795362

5380-
const char *tail = single_byte_chompdirsep(name, name + len);
5381-
if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
5382-
rb_str_set_len(result, tail - name);
5363+
const char *tmp_s;
5364+
long tmp_len;
5365+
RSTRING_GETMEM(tmp, tmp_s, tmp_len);
5366+
5367+
if (isdirsep(tmp_s[0])) {
5368+
// right side has a leading separator, remove left side separators.
5369+
long trailing_seps = 0;
5370+
while (isdirsep(name[len - trailing_seps - 1])) {
5371+
trailing_seps++;
5372+
}
5373+
rb_str_set_len(result, len - trailing_seps);
53835374
}
5384-
else if (!*tail) {
5375+
else if (!isdirsep(name[len - 1])) {
5376+
// neither side have a separator, append one;
53855377
rb_str_cat(result, "/", 1);
53865378
}
53875379

@@ -5391,9 +5383,10 @@ rb_file_join_fastpath(long argc, VALUE *args)
53915383
encidx = rb_enc_to_index(new_enc);
53925384
}
53935385

5394-
rb_str_buf_append(result, tmp);
5386+
rb_str_buf_cat(result, tmp_s, tmp_len);
53955387
}
53965388

5389+
rb_str_null_check(result);
53975390
return result;
53985391
}
53995392

internal/string.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ bool rb_str_reembeddable_p(VALUE);
8484
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
8585
VALUE rb_str_with_debug_created_info(VALUE, VALUE, int);
8686
VALUE rb_str_frozen_bare_string(VALUE);
87+
const char *rb_str_null_check(VALUE);
8788

8889
/* error.c */
8990
void rb_warn_unchilled_literal(VALUE str);

string.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,6 +2902,34 @@ str_null_check(VALUE str, int *w)
29022902
return s;
29032903
}
29042904

2905+
const char *
2906+
rb_str_null_check(VALUE str)
2907+
{
2908+
RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
2909+
2910+
char *s;
2911+
long len;
2912+
RSTRING_GETMEM(str, s, len);
2913+
2914+
if (RB_LIKELY(rb_str_enc_fastpath(str))) {
2915+
if (!s || memchr(s, 0, len)) {
2916+
rb_raise(rb_eArgError, "string contains null byte");
2917+
}
2918+
}
2919+
else {
2920+
int w;
2921+
const char *s = str_null_check(str, &w);
2922+
if (!s) {
2923+
if (w) {
2924+
rb_raise(rb_eArgError, "string contains null char");
2925+
}
2926+
rb_raise(rb_eArgError, "string contains null byte");
2927+
}
2928+
}
2929+
2930+
return s;
2931+
}
2932+
29052933
char *
29062934
rb_str_to_cstr(VALUE str)
29072935
{

0 commit comments

Comments
 (0)