Skip to content

Commit c5b753a

Browse files
committed
[GR-34937] Adopt TruffleString in TruffleRuby and replace Rope
PullRequest: truffleruby/3432
2 parents 925274f + d216490 commit c5b753a

File tree

312 files changed

+8759
-13592
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

312 files changed

+8759
-13592
lines changed

bench/micro/file/write.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
kilobyte = 'x' * 1024
1212

13-
if defined?(Truffle::Ropes.flatten_rope)
14-
kilobyte = Truffle::Ropes.flatten_rope(kilobyte)
13+
if defined?(Truffle::Debug.flatten_string)
14+
kilobyte = Truffle::Debug.flatten_string(kilobyte)
1515
end
1616

1717
benchmark 'core-write-kilobyte' do
@@ -20,8 +20,8 @@
2020

2121
gigabyte = 'x' * 1024 * 1024 * 1024
2222

23-
if defined?(Truffle::Ropes.flatten_rope)
24-
gigabyte = Truffle::Ropes.flatten_rope(gigabyte)
23+
if defined?(Truffle::Debug.flatten_string)
24+
gigabyte = Truffle::Debug.flatten_string(gigabyte)
2525
end
2626

2727
benchmark 'core-write-gigabyte' do

bench/micro/string/flatten.rb

Lines changed: 0 additions & 21 deletions
This file was deleted.

bench/micro/string/substring.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
side = 512 * 1024
1010
big_string = ("a".b * side + "é".b + "z".b * side)[1...-1]
1111
result = big_string.byteslice(4, 8)
12-
# Truffle::Ropes.debug_print_rope(big_string, false)
12+
# Truffle::Debug.tstring_to_debug_string(big_string)
1313

1414
benchmark "core-string-many-substrings-of-large-substring" do
1515
i = 0

doc/contributor/truffle-string.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# TruffleString in TruffleRuby
2+
3+
TruffleRuby uses `TruffleString` to represent Ruby Strings, but wraps them in either a RubyString or a ImmutableRubyString object.
4+
5+
## Encodings Compatibility
6+
7+
The notion of encodings compatibility is mostly the same between Ruby and TruffleString but differs in one point:
8+
* An empty Ruby String is always considered compatible with any other Ruby String of any encoding.
9+
* TruffleString does not consider whether a string is empty or not, and only look at their encodings and code range.
10+
11+
As a result, to use TruffleString equality nodes, one needs to:
12+
1. Compute the compatible encoding with `NegotiateCompatibleStringEncodingNode` or `Primitive.encoding_ensure_compatible_str`.
13+
2. Check if both sides are empty, and if so return true before using TruffleString equality nodes.
14+
15+
`StringHelperNodes.StringEqualInternalNode` is a good example showing what is needed.
16+
17+
An example which would throw without empty checks is comparing an empty ISO-2022-JP (a dummy, non-ascii-compatible, fixed-width encoding) string with an empty US-ASCII string:
18+
19+
```bash
20+
$ jt ruby -e '"".force_encoding("ISO-2022-JP") == ""'
21+
the given string is not compatible to the expected encoding "ISO_2022_JP", did you forget to convert it? (java.lang.IllegalArgumentException)
22+
```
23+
24+
## Logical vs Physical Byte Offsets
25+
26+
We categorize a byte offset into a `TruffleString` as either *logical* or *physical*.
27+
A physical byte offset includes the offset from the `InternalByteArray` (`InternalByteArray#getOffset()`).
28+
A logical byte offset does not include that and is the semantic byte offset from the start of the string.
29+
Physical offsets are quite difficult to use and they are error-prone as they can be passed by mistake to a method taking a logical offset.
30+
So avoid physical offsets as much as possible, and therefore avoid `InternalByteArray#getArray()`.
31+
32+
## Tests
33+
34+
This is a good set of tests to run when touching String code:
35+
```
36+
jt test integration strict-encoding-checks
37+
```

lib/cext/ABI_check.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
7
1+
8

lib/truffle/truffle/cext.rb

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -692,10 +692,6 @@ def rb_thread_alone
692692
Thread.list.count == 1 ? 1 : 0
693693
end
694694

695-
def rb_intern(str)
696-
str.intern
697-
end
698-
699695
def rb_int_positive_pow(a, b)
700696
a ** b
701697
end
@@ -809,14 +805,6 @@ def rb_enc_get_index(obj)
809805
enc
810806
end
811807

812-
def rb_intern_str(string)
813-
string.intern
814-
end
815-
816-
def rb_intern3(string, enc)
817-
string.force_encoding(enc).intern
818-
end
819-
820808
def rb_str_append(str, to_append)
821809
Primitive.string_append(str, to_append)
822810
end
@@ -1766,7 +1754,7 @@ def rb_gv_get(name)
17661754
end
17671755

17681756
def rb_reg_match(re, str)
1769-
result = str ? Truffle::RegexpOperations.match(re, str, 0) : nil
1757+
result = Truffle::RegexpOperations.match(re, str, 0)
17701758
Primitive.regexp_last_match_set(rb_get_special_vars(), result)
17711759

17721760
result.begin(0) if result

spec/ruby/core/file/shared/fnmatch.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@
159159
end
160160

161161
it "does not match leading periods in filenames with wildcards by default" do
162-
File.send(@method, '*', '.profile').should == false
163-
File.send(@method, '*', 'home/.profile').should == true
164-
File.send(@method, '*/*', 'home/.profile').should == true
165-
File.send(@method, '*/*', 'dave/.profile', File::FNM_PATHNAME).should == false
162+
File.should_not.send(@method, '*', '.profile')
163+
File.should.send(@method, '*', 'home/.profile')
164+
File.should.send(@method, '*/*', 'home/.profile')
165+
File.should_not.send(@method, '*/*', 'dave/.profile', File::FNM_PATHNAME)
166166
end
167167

168168
it "matches patterns with leading periods to dotfiles by default" do

spec/ruby/core/regexp/shared/quote.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
Regexp.send(@method, str).should == '\+\[\]\('
1818
end
1919

20+
it "works for broken strings" do
21+
Regexp.send(@method, "a.\x85b.".force_encoding("US-ASCII")).should =="a\\.\x85b\\.".force_encoding("US-ASCII")
22+
Regexp.send(@method, "a.\x80".force_encoding("UTF-8")).should == "a\\.\x80".force_encoding("UTF-8")
23+
end
24+
2025
it "sets the encoding of the result to US-ASCII if there are only US-ASCII characters present in the input String" do
2126
str = "abc".force_encoding("euc-jp")
2227
Regexp.send(@method, str).encoding.should == Encoding::US_ASCII

spec/ruby/core/string/capitalize_spec.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"hello".capitalize.should == "Hello"
1111
"HELLO".capitalize.should == "Hello"
1212
"123ABC".capitalize.should == "123abc"
13+
"abcdef"[1...-1].capitalize.should == "Bcde"
1314
end
1415

1516
describe "full Unicode case mapping" do
@@ -37,7 +38,7 @@
3738
end
3839

3940
it "handles non-ASCII substrings properly" do
40-
"garçon"[1..-1].capitalize(:ascii).should == "Arçon"
41+
"garçon"[1...-1].capitalize(:ascii).should == "Arço"
4142
end
4243
end
4344

spec/ruby/core/string/delete_prefix_spec.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
r.should == s
2222
end
2323

24+
it "does not remove partial bytes, only full characters" do
25+
"\xe3\x81\x82".delete_prefix("\xe3").should == "\xe3\x81\x82"
26+
end
27+
2428
it "doesn't set $~" do
2529
$~ = nil
2630

0 commit comments

Comments
 (0)