Skip to content

Commit 2ff82aa

Browse files
committed
Always terminate native strings with 4 \0 bytes
* So whether the string is UTF-32, UTF-16 or another encoding there are a safe amount of \0 bytes when passed to C functions. * See #2704 (comment)
1 parent 41eaa06 commit 2ff82aa

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Compatibility:
2222
* Range literals of integers are now created at parse time like in CRuby (#2622, @aardvark179).
2323
* Fix `IO.pipe` - allow overriding `IO.new` that is used to create new pipes (#2692, @andykonchin).
2424
* Fix exception message when there are missing or extra keyword arguments - it contains all the missing/extra keywords now (#1522, @andrykonchin).
25+
* Always terminate native strings with enough `\0` bytes (#2704, @eregon).
2526

2627
Performance:
2728

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,10 @@
131131
public class CExtNodes {
132132

133133
public static Pointer newNativeStringPointer(int capacity, RubyLanguage language) {
134-
return Pointer.mallocAutoRelease(capacity + 1, language);
134+
// We need up to 4 \0 bytes for UTF-32. Always use 4 for speed rather than checking the encoding min length.
135+
Pointer pointer = Pointer.mallocAutoRelease(capacity + 4, language);
136+
pointer.writeInt(capacity, 0);
137+
return pointer;
135138
}
136139

137140
private static long getNativeStringCapacity(Pointer pointer) {
@@ -732,13 +735,24 @@ public abstract static class RbStrSetLenNode extends CoreMethodArrayArgumentsNod
732735
protected RubyString strSetLen(RubyString string, int newByteLength,
733736
@Cached RubyStringLibrary libString,
734737
@Cached StringToNativeNode stringToNativeNode,
735-
@Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) {
738+
@Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode,
739+
@Cached ConditionProfile minLengthOneProfile) {
736740
var pointer = stringToNativeNode.executeToNative(string);
737741

738-
pointer.writeByte(newByteLength, (byte) 0); // Like MRI
742+
var encoding = libString.getEncoding(string);
743+
int minLength = encoding.jcoding.minLength();
744+
// Like MRI
745+
if (minLengthOneProfile.profile(minLength == 1)) {
746+
pointer.writeByte(newByteLength, (byte) 0);
747+
} else if (minLength == 2) {
748+
pointer.writeShort(newByteLength, (short) 0);
749+
} else if (minLength == 4) {
750+
pointer.writeInt(newByteLength, 0);
751+
} else {
752+
throw CompilerDirectives.shouldNotReachHere();
753+
}
739754

740-
var newNativeTString = fromNativePointerNode.execute(pointer, 0, newByteLength,
741-
libString.getTEncoding(string), false);
755+
var newNativeTString = fromNativePointerNode.execute(pointer, 0, newByteLength, encoding.tencoding, false);
742756
string.setTString(newNativeTString);
743757

744758
return string;
@@ -802,7 +816,6 @@ static MutableTruffleString resize(Pointer pointer, int newCapacity, int newByte
802816
RubyLanguage language) {
803817
final Pointer newPointer = newNativeStringPointer(newCapacity, language);
804818
newPointer.writeBytes(0, pointer, 0, Math.min(pointer.getSize(), newCapacity));
805-
newPointer.writeByte(newCapacity, (byte) 0); // Like MRI
806819

807820
return fromNativePointerNode.execute(newPointer, 0, newByteLength, tencoding, false);
808821
}
@@ -1225,7 +1238,6 @@ public static Pointer allocateAndCopyToNative(AbstractTruffleString tstring, Tru
12251238
int capacity, TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, RubyLanguage language) {
12261239
final Pointer pointer = newNativeStringPointer(capacity, language);
12271240
copyToNativeMemoryNode.execute(tstring, 0, pointer, 0, capacity, tencoding);
1228-
pointer.writeByte(capacity, (byte) 0); // Like MRI
12291241
return pointer;
12301242
}
12311243

0 commit comments

Comments
 (0)