Skip to content

Commit 2031318

Browse files
committed
[GR-18163] Always terminate native strings with enough \0 bytes (#2704)
PullRequest: truffleruby/3455
2 parents 98db362 + 952dda9 commit 2031318

File tree

5 files changed

+59
-23
lines changed

5 files changed

+59
-23
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Compatibility:
2222
* Range literals of integers are now created at parse time like in CRuby (#2622, @aardvark179).
2323
* Fix `IO.pipe` - allow overriding `IO.new` that is used to create new pipes (#2692, @andykonchin).
2424
* Fix exception message when there are missing or extra keyword arguments - it contains all the missing/extra keywords now (#1522, @andrykonchin).
25+
* Always terminate native strings with enough `\0` bytes (#2704, @eregon).
2526

2627
Performance:
2728

spec/ruby/optional/capi/ext/string_spec.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,12 @@ VALUE string_spec_RSTRING_PTR_read(VALUE self, VALUE str, VALUE path) {
437437
return capacities;
438438
}
439439

440+
VALUE string_spec_RSTRING_PTR_null_terminate(VALUE self, VALUE str, VALUE min_length) {
441+
char* ptr = RSTRING_PTR(str);
442+
char* end = ptr + RSTRING_LEN(str);
443+
return rb_str_new(end, FIX2LONG(min_length));
444+
}
445+
440446
VALUE string_spec_StringValue(VALUE self, VALUE str) {
441447
return StringValue(str);
442448
}
@@ -662,6 +668,7 @@ void Init_string_spec(void) {
662668
rb_define_method(cls, "RSTRING_PTR_after_funcall", string_spec_RSTRING_PTR_after_funcall, 2);
663669
rb_define_method(cls, "RSTRING_PTR_after_yield", string_spec_RSTRING_PTR_after_yield, 1);
664670
rb_define_method(cls, "RSTRING_PTR_read", string_spec_RSTRING_PTR_read, 2);
671+
rb_define_method(cls, "RSTRING_PTR_null_terminate", string_spec_RSTRING_PTR_null_terminate, 2);
665672
rb_define_method(cls, "StringValue", string_spec_StringValue, 1);
666673
rb_define_method(cls, "SafeStringValue", string_spec_SafeStringValue, 1);
667674
rb_define_method(cls, "rb_str_hash", string_spec_rb_str_hash, 1);

spec/ruby/optional/capi/string_spec.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,32 @@ def inspect
9797
end
9898
end
9999

100+
describe "rb_str_set_len on a UTF-16 String" do
101+
before :each do
102+
@str = "abcdefghij".force_encoding(Encoding::UTF_16BE)
103+
# Make sure to unshare the string
104+
@s.rb_str_modify(@str)
105+
end
106+
107+
it "inserts two NULL bytes at the length" do
108+
@s.rb_str_set_len(@str, 4).b.should == "abcd".b
109+
@s.rb_str_set_len(@str, 8).b.should == "abcd\x00\x00gh".b
110+
end
111+
end
112+
113+
describe "rb_str_set_len on a UTF-32 String" do
114+
before :each do
115+
@str = "abcdefghijkl".force_encoding(Encoding::UTF_32BE)
116+
# Make sure to unshare the string
117+
@s.rb_str_modify(@str)
118+
end
119+
120+
it "inserts four NULL bytes at the length" do
121+
@s.rb_str_set_len(@str, 4).b.should == "abcd".b
122+
@s.rb_str_set_len(@str, 12).b.should == "abcd\x00\x00\x00\x00ijkl".b
123+
end
124+
end
125+
100126
describe "rb_str_buf_new" do
101127
it "returns the equivalent of an empty string" do
102128
buf = @s.rb_str_buf_new(10, nil)
@@ -592,6 +618,12 @@ def inspect
592618
capacities[0].should < capacities[1]
593619
str.should == "fixture file contents to test read() with RSTRING_PTR"
594620
end
621+
622+
it "terminates the string with at least (encoding min length) \\0 bytes" do
623+
@s.RSTRING_PTR_null_terminate("abc", 1).should == "\x00"
624+
@s.RSTRING_PTR_null_terminate("abc".encode("UTF-16BE"), 2).should == "\x00\x00"
625+
@s.RSTRING_PTR_null_terminate("abc".encode("UTF-32BE"), 4).should == "\x00\x00\x00\x00"
626+
end
595627
end
596628

597629
describe "RSTRING_LEN" do

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,10 @@
131131
public class CExtNodes {
132132

133133
public static Pointer newNativeStringPointer(int capacity, RubyLanguage language) {
134-
return Pointer.mallocAutoRelease(capacity + 1, language);
134+
// We need up to 4 \0 bytes for UTF-32. Always use 4 for speed rather than checking the encoding min length.
135+
Pointer pointer = Pointer.mallocAutoRelease(capacity + 4, language);
136+
pointer.writeInt(capacity, 0);
137+
return pointer;
135138
}
136139

137140
private static long getNativeStringCapacity(Pointer pointer) {
@@ -732,13 +735,24 @@ public abstract static class RbStrSetLenNode extends CoreMethodArrayArgumentsNod
732735
protected RubyString strSetLen(RubyString string, int newByteLength,
733736
@Cached RubyStringLibrary libString,
734737
@Cached StringToNativeNode stringToNativeNode,
735-
@Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) {
738+
@Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode,
739+
@Cached ConditionProfile minLengthOneProfile) {
736740
var pointer = stringToNativeNode.executeToNative(string);
737741

738-
pointer.writeByte(newByteLength, (byte) 0); // Like MRI
742+
var encoding = libString.getEncoding(string);
743+
int minLength = encoding.jcoding.minLength();
744+
// Like MRI
745+
if (minLengthOneProfile.profile(minLength == 1)) {
746+
pointer.writeByte(newByteLength, (byte) 0);
747+
} else if (minLength == 2) {
748+
pointer.writeShort(newByteLength, (short) 0);
749+
} else if (minLength == 4) {
750+
pointer.writeInt(newByteLength, 0);
751+
} else {
752+
throw CompilerDirectives.shouldNotReachHere();
753+
}
739754

740-
var newNativeTString = fromNativePointerNode.execute(pointer, 0, newByteLength,
741-
libString.getTEncoding(string), false);
755+
var newNativeTString = fromNativePointerNode.execute(pointer, 0, newByteLength, encoding.tencoding, false);
742756
string.setTString(newNativeTString);
743757

744758
return string;
@@ -802,7 +816,6 @@ static MutableTruffleString resize(Pointer pointer, int newCapacity, int newByte
802816
RubyLanguage language) {
803817
final Pointer newPointer = newNativeStringPointer(newCapacity, language);
804818
newPointer.writeBytes(0, pointer, 0, Math.min(pointer.getSize(), newCapacity));
805-
newPointer.writeByte(newCapacity, (byte) 0); // Like MRI
806819

807820
return fromNativePointerNode.execute(newPointer, 0, newByteLength, tencoding, false);
808821
}
@@ -1225,7 +1238,6 @@ public static Pointer allocateAndCopyToNative(AbstractTruffleString tstring, Tru
12251238
int capacity, TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, RubyLanguage language) {
12261239
final Pointer pointer = newNativeStringPointer(capacity, language);
12271240
copyToNativeMemoryNode.execute(tstring, 0, pointer, 0, capacity, tencoding);
1228-
pointer.writeByte(capacity, (byte) 0); // Like MRI
12291241
return pointer;
12301242
}
12311243

src/main/java/org/truffleruby/extra/ffi/Pointer.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,6 @@ public void writePointer(long offset, long address) {
170170
writeLong(offset, address);
171171
}
172172

173-
public void writeZeroTerminatedBytes(long offset, byte[] bytes, int start, int length) {
174-
writeBytes(offset, bytes, start, length);
175-
writeByte(offset + length, (byte) 0);
176-
}
177-
178173
@TruffleBoundary
179174
public void writeBytes(long destByteOffset, long size, byte value) {
180175
assert address + destByteOffset != 0 || size == 0;
@@ -191,17 +186,6 @@ public void writeBytes(long destByteOffset, Pointer source, int sourceByteOffset
191186
UNSAFE.copyMemory(source.getAddress() + sourceByteOffset, address + destByteOffset, bytesToCopy);
192187
}
193188

194-
@TruffleBoundary
195-
public void writeBytes(long destByteOffset, byte[] source, int sourceByteOffset, int bytesToCopy) {
196-
assert address + destByteOffset != 0 || bytesToCopy == 0;
197-
assert source != null;
198-
assert sourceByteOffset >= 0;
199-
assert bytesToCopy >= 0;
200-
201-
UNSAFE.copyMemory(source, Unsafe.ARRAY_BYTE_BASE_OFFSET + sourceByteOffset, null, address + destByteOffset,
202-
bytesToCopy);
203-
}
204-
205189
public byte readByte(long offset) {
206190
assert address + offset != 0;
207191
return UNSAFE.getByte(address + offset);

0 commit comments

Comments
 (0)