Skip to content

Commit 451f1e9

Browse files
Introduce a ByteString.substringNoCopy() method.
PiperOrigin-RevId: 866483595
1 parent 9627348 commit 451f1e9

File tree

3 files changed

+145
-11
lines changed

3 files changed

+145
-11
lines changed

java/core/src/main/java/com/google/protobuf/ByteString.java

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ public byte[] copyFrom(byte[] bytes, int offset, int size) {
127127
/**
128128
* Gets the byte at the given index. This method should be used only for random access to
129129
* individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link
130-
* #iterator()}, and call {@link #substring(int, int)} first if necessary.
130+
* #iterator()}, and call {@link #substringNoCopy(int, int)} first if necessary.
131131
*
132132
* @param index index of byte
133133
* @return the value
@@ -317,7 +317,7 @@ public static Comparator<ByteString> unsignedLexicographicalComparator() {
317317
* Return the substring from {@code beginIndex}, inclusive, to the end of the string.
318318
*
319319
* @param beginIndex start at this index
320-
* @return substring sharing underlying data
320+
* @return substring
321321
* @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}.
322322
*/
323323
public final ByteString substring(int beginIndex) {
@@ -329,12 +329,38 @@ public final ByteString substring(int beginIndex) {
329329
*
330330
* @param beginIndex start at this index
331331
* @param endIndex the last character is the one before this index
332-
* @return substring sharing underlying data
332+
* @return substring
333333
* @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or
334334
* {@code beginIndex > endIndex}.
335335
*/
336336
public abstract ByteString substring(int beginIndex, int endIndex);
337337

338+
/**
339+
* Return the substring from {@code beginIndex}, inclusive, to the end of the string. Unlike
340+
* {@link #substring(int)} this method tries to avoid copies of the underlying data where
341+
* possible, but may still copy in some situations.
342+
*
343+
* @param beginIndex start at this index
344+
* @return substring sharing underlying data
345+
* @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}.
346+
*/
347+
public final ByteString substringNoCopy(int beginIndex) {
348+
return substringNoCopy(beginIndex, size());
349+
}
350+
351+
/**
352+
* Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive. Unlike
353+
* {@link #substring(int, int)} this method tries to avoid copies of the underlying data where
354+
* possible, but may still copy in some situations.
355+
*
356+
* @param beginIndex start at this index
357+
* @param endIndex the last character is the one before this index
358+
* @return substring sharing underlying data
359+
* @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or
360+
* {@code beginIndex > endIndex}.
361+
*/
362+
public abstract ByteString substringNoCopy(int beginIndex, int endIndex);
363+
338364
/**
339365
* Tests if this bytestring starts with the specified prefix. Similar to {@link
340366
* String#startsWith(String)}
@@ -344,7 +370,7 @@ public final ByteString substring(int beginIndex) {
344370
* byte sequence represented by this string; <code>false</code> otherwise.
345371
*/
346372
public final boolean startsWith(ByteString prefix) {
347-
return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix);
373+
return size() >= prefix.size() && substringNoCopy(0, prefix.size()).equals(prefix);
348374
}
349375

350376
/**
@@ -356,7 +382,7 @@ public final boolean startsWith(ByteString prefix) {
356382
* byte sequence represented by this string; <code>false</code> otherwise.
357383
*/
358384
public final boolean endsWith(ByteString suffix) {
359-
return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix);
385+
return size() >= suffix.size() && substringNoCopy(size() - suffix.size()).equals(suffix);
360386
}
361387

362388
// =================================================================
@@ -774,7 +800,7 @@ public void copyTo(byte[] target, int offset) {
774800
* @param targetOffset offset within the target buffer
775801
* @param numberToCopy number of bytes to copy
776802
* @throws IndexOutOfBoundsException if an offset or size is negative or too large
777-
* @deprecated Use {@code byteString.substring(sourceOffset, sourceOffset +
803+
* @deprecated Use {@code byteString.substringNoCopy(sourceOffset, sourceOffset +
778804
* numberToCopy).copyTo(target, targetOffset)} instead.
779805
*/
780806
@Deprecated
@@ -797,7 +823,7 @@ protected abstract void copyToInternal(
797823
* Copies bytes into a ByteBuffer.
798824
*
799825
* <p>To copy a subset of bytes, you call this method on the return value of {@link
800-
* #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)}
826+
* #substring(int, int)}. Example: {@code byteString.substringNoCopy(start, end).copyTo(target)}
801827
*
802828
* @param target ByteBuffer to copy into.
803829
* @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
@@ -1434,7 +1460,7 @@ public final String toString() {
14341460
private String truncateAndEscapeForDisplay() {
14351461
final int limit = 50;
14361462

1437-
return size() <= limit ? escapeBytes(this) : escapeBytes(substring(0, limit - 3)) + "...";
1463+
return size() <= limit ? escapeBytes(this) : escapeBytes(substringNoCopy(0, limit - 3)) + "...";
14381464
}
14391465

14401466
/**
@@ -1511,6 +1537,17 @@ public ByteString substring(int beginIndex, int endIndex) {
15111537
return new BoundedByteString(bytes, beginIndex, length);
15121538
}
15131539

1540+
@Override
1541+
public ByteString substringNoCopy(int beginIndex, int endIndex) {
1542+
final int length = checkRange(beginIndex, endIndex, size());
1543+
1544+
if (length == 0) {
1545+
return ByteString.EMPTY;
1546+
}
1547+
1548+
return new BoundedByteString(bytes, beginIndex, length);
1549+
}
1550+
15141551
// =================================================================
15151552
// ByteString -> byte[]
15161553

@@ -1607,7 +1644,7 @@ boolean equalsRange(ByteString other, int offset, int length) {
16071644
return subArrayEquals(bytes, 0, bbsOther.bytes, bbsOther.offset + offset, length);
16081645
}
16091646

1610-
return other.substring(offset, offset + length).equals(substring(0, length));
1647+
return other.substringNoCopy(offset, offset + length).equals(substringNoCopy(0, length));
16111648
}
16121649

16131650
@Override
@@ -1702,6 +1739,15 @@ public ByteString substring(int beginIndex, int endIndex) {
17021739
return new BoundedByteString(bytes, offset + beginIndex, substringLength);
17031740
}
17041741

1742+
@Override
1743+
public ByteString substringNoCopy(int beginIndex, int endIndex) {
1744+
int substringLength = checkRange(beginIndex, endIndex, length);
1745+
if (substringLength == 0) {
1746+
return ByteString.EMPTY;
1747+
}
1748+
return new BoundedByteString(bytes, offset + beginIndex, substringLength);
1749+
}
1750+
17051751
// =================================================================
17061752
// ByteString -> byte[]
17071753
@Override
@@ -1782,8 +1828,8 @@ boolean equalsRange(ByteString other, int offset, int length) {
17821828
}
17831829

17841830
return other
1785-
.substring(offset, offset + length)
1786-
.equals(substring(this.offset, this.offset + length));
1831+
.substringNoCopy(offset, offset + length)
1832+
.equals(substringNoCopy(this.offset, this.offset + length));
17871833
}
17881834

17891835
@Override

java/core/src/main/java/com/google/protobuf/RopeByteString.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,24 @@ protected boolean isBalanced() {
339339
*/
340340
@Override
341341
public ByteString substring(int beginIndex, int endIndex) {
342+
return substringNoCopy(beginIndex, endIndex);
343+
}
344+
345+
/**
346+
* Takes a substring of this one. This involves recursive descent along the left and right edges
347+
* of the substring, and referencing any wholly contained segments in between. Any leaf nodes
348+
* entirely uninvolved in the substring will not be referenced by the substring.
349+
*
350+
* <p>Substrings of {@code length < 2} should result in at most a single recursive call chain,
351+
* terminating at a leaf node. Thus the result will be a {@link
352+
* com.google.protobuf.ByteString.LeafByteString}.
353+
*
354+
* @param beginIndex start at this index
355+
* @param endIndex the last character is the one before this index
356+
* @return substring leaf node or tree
357+
*/
358+
@Override
359+
public ByteString substringNoCopy(int beginIndex, int endIndex) {
342360
final int length = checkRange(beginIndex, endIndex, totalLength);
343361

344362
if (length == 0) {

java/core/src/test/java/com/google/protobuf/ByteStringTest.java

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ public void testSubstring_beginIndex() {
119119
.isTrue();
120120
}
121121

122+
@Test
123+
public void testSubstringNoCopy_beginIndex() {
124+
byte[] bytes = getTestBytes();
125+
ByteString substring = ByteString.copyFrom(bytes).substringNoCopy(500);
126+
assertWithMessage("substring must contain the tail of the string")
127+
.that(isArrayRange(substring.toByteArray(), bytes, 500, bytes.length - 500))
128+
.isTrue();
129+
}
130+
122131
@Test
123132
public void testEmpty_isEmpty() {
124133
ByteString byteString = ByteString.empty();
@@ -732,6 +741,27 @@ public void testSubstringParity() {
732741
.isEqualTo(concreteSubstring.hashCode());
733742
}
734743

744+
@Test
745+
public void testSubstringNoCopyParity() {
746+
byte[] bigBytes = getTestBytes(2048 * 1024, 113344L);
747+
int start = 512 * 1024 - 3333;
748+
int end = 512 * 1024 + 7777;
749+
ByteString concreteSubstring = ByteString.copyFrom(bigBytes).substringNoCopy(start, end);
750+
boolean ok = true;
751+
for (int i = start; ok && i < end; ++i) {
752+
ok = (bigBytes[i] == concreteSubstring.byteAt(i - start));
753+
}
754+
assertWithMessage("Concrete substring didn't capture the right bytes").that(ok).isTrue();
755+
756+
ByteString literalString = ByteString.copyFrom(bigBytes, start, end - start);
757+
assertWithMessage("Substring must be equal to literal string")
758+
.that(literalString)
759+
.isEqualTo(concreteSubstring);
760+
assertWithMessage("Substring must have same hashcode as literal string")
761+
.that(literalString.hashCode())
762+
.isEqualTo(concreteSubstring.hashCode());
763+
}
764+
735765
@Test
736766
public void testCompositeSubstring() {
737767
byte[] referenceBytes = getTestBytes(77748, 113344L);
@@ -772,6 +802,46 @@ public void testCompositeSubstring() {
772802
.isFalse();
773803
}
774804

805+
@Test
806+
public void testCompositeSubstringNoCopy() {
807+
byte[] referenceBytes = getTestBytes(77748, 113344L);
808+
809+
List<ByteString> pieces = makeConcretePieces(referenceBytes);
810+
ByteString listString = ByteString.copyFrom(pieces);
811+
812+
int from = 1000;
813+
int to = 40000;
814+
ByteString compositeSubstring = listString.substringNoCopy(from, to);
815+
byte[] substringBytes = compositeSubstring.toByteArray();
816+
boolean stillEqual = true;
817+
for (int i = 0; stillEqual && i < to - from; ++i) {
818+
stillEqual = referenceBytes[from + i] == substringBytes[i];
819+
}
820+
assertWithMessage("Substring must return correct bytes").that(stillEqual).isTrue();
821+
822+
stillEqual = true;
823+
for (int i = 0; stillEqual && i < to - from; ++i) {
824+
stillEqual = referenceBytes[from + i] == compositeSubstring.byteAt(i);
825+
}
826+
assertWithMessage("Substring must support byteAt() correctly").that(stillEqual).isTrue();
827+
828+
ByteString literalSubstring = ByteString.copyFrom(referenceBytes, from, to - from);
829+
assertWithMessage("Composite substring must equal a literal substring over the same bytes")
830+
.that(literalSubstring)
831+
.isEqualTo(compositeSubstring);
832+
assertWithMessage("Literal substring must equal a composite substring over the same bytes")
833+
.that(compositeSubstring)
834+
.isEqualTo(literalSubstring);
835+
836+
assertWithMessage("We must get the same hashcodes for composite and literal substrings")
837+
.that(literalSubstring.hashCode())
838+
.isEqualTo(compositeSubstring.hashCode());
839+
840+
assertWithMessage("We can't be equal to a proper substring")
841+
.that(compositeSubstring.equals(literalSubstring.substring(0, literalSubstring.size() - 1)))
842+
.isFalse();
843+
}
844+
775845
@Test
776846
public void testCopyFromList() {
777847
byte[] referenceBytes = getTestBytes(77748, 113344L);

0 commit comments

Comments
 (0)