Skip to content

Commit b0247d0

Browse files
committed
[GR-34937] Add specs for TruffleString and update graal import
PullRequest: truffleruby/3371
2 parents 0f63df6 + 1ac86bb commit b0247d0

File tree

9 files changed

+71
-26
lines changed

9 files changed

+71
-26
lines changed

mx.truffleruby/suite.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
{
88
"name": "regex",
99
"subdir": True,
10-
"version": "587c31f311b09ba9e398e182b8e3a6bcf832679c",
10+
"version": "aeca61acf7e52a19c52c1bd019ab63f158477dfc",
1111
"urls": [
1212
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
1313
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},
@@ -16,7 +16,7 @@
1616
{
1717
"name": "sulong",
1818
"subdir": True,
19-
"version": "587c31f311b09ba9e398e182b8e3a6bcf832679c",
19+
"version": "aeca61acf7e52a19c52c1bd019ab63f158477dfc",
2020
"urls": [
2121
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
2222
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},

spec/ruby/core/integer/chr_spec.rb

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -223,26 +223,25 @@
223223

224224
# #5864
225225
it "raises RangeError if self is invalid as a codepoint in the specified encoding" do
226-
[ [0x80, "US-ASCII"],
227-
[0x0100, "BINARY"],
228-
[0x0100, "EUC-JP"],
229-
[0xA1A0, "EUC-JP"],
230-
[0xA1, "EUC-JP"],
231-
[0x80, "SHIFT_JIS"],
232-
[0xE0, "SHIFT_JIS"],
233-
[0x0100, "ISO-8859-9"],
234-
[620, "TIS-620"],
235-
[0xD800, "UTF-8"],
236-
[0xDBFF, "UTF-8"],
237-
[0xDC00, "UTF-8"],
238-
[0xDFFF, "UTF-8"],
239-
[0xD800, "UTF-16"],
240-
[0xDBFF, "UTF-16"],
241-
[0xDC00, "UTF-16"],
242-
[0xDFFF, "UTF-16"],
243-
].each do |integer, encoding_name|
244-
-> { integer.chr(encoding_name) }.should raise_error(RangeError)
245-
end
226+
-> { 0x80.chr("US-ASCII") }.should raise_error(RangeError)
227+
-> { 0x0100.chr("BINARY") }.should raise_error(RangeError)
228+
-> { 0x0100.chr("EUC-JP") }.should raise_error(RangeError)
229+
-> { 0xA1A0.chr("EUC-JP") }.should raise_error(RangeError)
230+
-> { 0xA1.chr("EUC-JP") }.should raise_error(RangeError)
231+
-> { 0x80.chr("SHIFT_JIS") }.should raise_error(RangeError)
232+
-> { 0xE0.chr("SHIFT_JIS") }.should raise_error(RangeError)
233+
-> { 0x0100.chr("ISO-8859-9") }.should raise_error(RangeError)
234+
-> { 620.chr("TIS-620") }.should raise_error(RangeError)
235+
# UTF-16 surrogate range
236+
-> { 0xD800.chr("UTF-8") }.should raise_error(RangeError)
237+
-> { 0xDBFF.chr("UTF-8") }.should raise_error(RangeError)
238+
-> { 0xDC00.chr("UTF-8") }.should raise_error(RangeError)
239+
-> { 0xDFFF.chr("UTF-8") }.should raise_error(RangeError)
240+
# UTF-16 surrogate range
241+
-> { 0xD800.chr("UTF-16") }.should raise_error(RangeError)
242+
-> { 0xDBFF.chr("UTF-16") }.should raise_error(RangeError)
243+
-> { 0xDC00.chr("UTF-16") }.should raise_error(RangeError)
244+
-> { 0xDFFF.chr("UTF-16") }.should raise_error(RangeError)
246245
end
247246

248247
it 'returns a String encoding self interpreted as a codepoint in the CESU-8 encoding' do

spec/ruby/core/regexp/shared/quote.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
Regexp.send(@method, :symbol).should == 'symbol'
1313
end
1414

15+
it "works with substrings" do
16+
str = ".+[]()"[1...-1]
17+
Regexp.send(@method, str).should == '\+\[\]\('
18+
end
19+
1520
it "sets the encoding of the result to US-ASCII if there are only US-ASCII characters present in the input String" do
1621
str = "abc".force_encoding("euc-jp")
1722
Regexp.send(@method, str).encoding.should == Encoding::US_ASCII

spec/ruby/core/string/capitalize_spec.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
it "does not capitalize non-ASCII characters" do
3636
"ßet".capitalize(:ascii).should == "ßet"
3737
end
38+
39+
it "handles non-ASCII substrings properly" do
40+
"garçon"[1..-1].capitalize(:ascii).should == "Arçon"
41+
end
3842
end
3943

4044
describe "full Unicode case mapping adapted for Turkic languages" do

spec/ruby/core/string/dup_spec.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,13 @@ class << @obj
4949
orig.should == "xtring"
5050
dup.should == "string"
5151
end
52+
53+
it "does not modify the original setbyte-mutated string when changing dupped string" do
54+
orig = "a"
55+
orig.setbyte 0, "b".ord
56+
copy = orig.dup
57+
orig.setbyte 0, "c".ord
58+
orig.should == "c"
59+
copy.should == "b"
60+
end
5261
end

spec/ruby/core/string/lstrip_spec.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,10 @@
5050
-> { "hello".freeze.lstrip! }.should raise_error(FrozenError)
5151
-> { "".freeze.lstrip! }.should raise_error(FrozenError)
5252
end
53+
54+
it "raises an ArgumentError if the first codepoint is invalid" do
55+
s = "\xDFabc".force_encoding(Encoding::UTF_8)
56+
s.valid_encoding?.should be_false
57+
-> { s.lstrip! }.should raise_error(ArgumentError)
58+
end
5359
end

spec/ruby/core/string/rstrip_spec.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,10 @@
4646
-> { "hello".freeze.rstrip! }.should raise_error(FrozenError)
4747
-> { "".freeze.rstrip! }.should raise_error(FrozenError)
4848
end
49+
50+
it "raises an ArgumentError if the last codepoint is invalid" do
51+
s = "abc\xDF".force_encoding(Encoding::UTF_8)
52+
s.valid_encoding?.should be_false
53+
-> { s.rstrip! }.should raise_error(ArgumentError)
54+
end
4955
end

spec/ruby/core/string/scrub_spec.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
"abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD"
1515
end
1616

17+
it "replaces invalid byte sequences in lazy substrings" do
18+
x81 = [0x81].pack('C').force_encoding('utf-8')
19+
"abc\u3042#{x81}def"[1...-1].scrub.should == "bc\u3042\uFFFDde"
20+
end
21+
1722
it "returns a copy of self when the input encoding is BINARY" do
1823
input = "foo".encode('BINARY')
1924

spec/ruby/core/string/split_spec.rb

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@
33
require_relative 'fixtures/classes'
44

55
describe "String#split with String" do
6+
it "throws an ArgumentError if the string is not a valid" do
7+
s = "\xDF".force_encoding(Encoding::UTF_8)
8+
9+
-> { s.split }.should raise_error(ArgumentError)
10+
-> { s.split(':') }.should raise_error(ArgumentError)
11+
end
12+
613
it "throws an ArgumentError if the pattern is not a valid string" do
714
str = 'проверка'
8-
broken_str = 'проверка'
9-
broken_str.force_encoding('binary')
10-
broken_str.chop!
11-
broken_str.force_encoding('utf-8')
15+
broken_str = "\xDF".force_encoding(Encoding::UTF_8)
16+
1217
-> { str.split(broken_str) }.should raise_error(ArgumentError)
1318
end
1419

@@ -218,6 +223,12 @@
218223
end
219224

220225
describe "String#split with Regexp" do
226+
it "throws an ArgumentError if the string is not a valid" do
227+
s = "\xDF".force_encoding(Encoding::UTF_8)
228+
229+
-> { s.split(/./) }.should raise_error(ArgumentError)
230+
end
231+
221232
it "divides self on regexp matches" do
222233
" now's the time".split(/ /).should == ["", "now's", "", "the", "time"]
223234
" x\ny ".split(/ /).should == ["", "x\ny"]

0 commit comments

Comments
 (0)