Skip to content

Commit 4376f8d

Browse files
authored
Merge pull request #242 from bulasevich/GR-71797
[Backport] [Oracle GraalVM] [GR-71797] Backport to 23.1: Sync HotSpot intrinsic stub ports.
2 parents 0599cfa + 9aeb635 commit 4376f8d

File tree

14 files changed

+534
-331
lines changed

14 files changed

+534
-331
lines changed

compiler/src/jdk.internal.vm.compiler.test/src/org/graalvm/compiler/replacements/test/StringCompressInflateTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
import static org.junit.Assume.assumeTrue;
2828

2929
import java.io.UnsupportedEncodingException;
30+
import java.util.Arrays;
31+
32+
import org.junit.Assert;
3033

3134
import org.graalvm.compiler.core.common.CompilationIdentifier;
3235
import org.graalvm.compiler.nodes.Invoke;
@@ -295,6 +298,27 @@ public void testStringUTF16CompressCharByte() throws ClassNotFoundException {
295298
}
296299
}
297300
}
301+
302+
// Exhaustively check compress returning the correct index of the non-latin1 char.
303+
final int size = 48;
304+
final byte fillByte = 'R';
305+
char[] chars = new char[size];
306+
final byte[] bytes = new byte[chars.length];
307+
Arrays.fill(bytes, fillByte);
308+
for (int i = 0; i < size; i++) { // Every starting index
309+
for (int j = i; j < size; j++) { // Every location of non-latin1
310+
Arrays.fill(chars, 'A');
311+
chars[j] = 0xFF21;
312+
byte[] dst = Arrays.copyOf(bytes, bytes.length);
313+
byte[] dst2 = Arrays.copyOf(bytes, bytes.length);
314+
int result = (int) invokeSafe(caller, null, chars, i, dst, 0, chars.length - i);
315+
int result2 = (int) executeVarargsSafe(code, chars, i, dst2, 0, chars.length - i);
316+
Assert.assertEquals(result, result2);
317+
Assert.assertArrayEquals(dst, dst2);
318+
Assert.assertEquals("compress found wrong index", j - i, result);
319+
Assert.assertEquals("extra character stored", fillByte, bytes[j]);
320+
}
321+
}
298322
}
299323

300324
public static void getCharsSnippet(String s, int srcBegin, int srcEnd, char[] dst, int dstBegin) {

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,9 @@ public enum ASIMDInstruction {
704704
CMHS(UBit | 0b00111 << 11),
705705
USHL(UBit | 0b01000 << 11),
706706
UMAX(UBit | 0b01100 << 11),
707+
UMAXP(UBit | 0b10100 << 11),
707708
UMIN(UBit | 0b01101 << 11),
709+
UMINP(UBit | 0b10101 << 11),
708710
SUB(UBit | 0b10000 << 11),
709711
CMEQ(UBit | 0b10001 << 11),
710712
MLS(UBit | 0b10010 << 11),
@@ -3556,6 +3558,31 @@ public void umaxVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
35563558
threeSameEncoding(ASIMDInstruction.UMAX, size, elemSizeXX(eSize), dst, src1, src2);
35573559
}
35583560

3561+
/**
3562+
* C7.2.361 Unsigned maximum pairwise.<br>
3563+
*
3564+
* <code>
3565+
* concat = src2:src1
3566+
* for i in 0..n-1 do dst[i] = uint_max(concat[2 * i], concat[2 * i + 1])
3567+
* </code>
3568+
*
3569+
* @param size register size.
3570+
* @param eSize element size.
3571+
* @param dst SIMD register.
3572+
* @param src1 SIMD register.
3573+
* @param src2 SIMD register.
3574+
*/
3575+
public void umaxpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) {
3576+
assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize;
3577+
3578+
assert dst.getRegisterCategory().equals(SIMD) : dst;
3579+
assert src1.getRegisterCategory().equals(SIMD) : src1;
3580+
assert src2.getRegisterCategory().equals(SIMD) : src2;
3581+
assert eSize != ElementSize.DoubleWord : "Invalid lane width for umaxp";
3582+
3583+
threeSameEncoding(ASIMDInstruction.UMAXP, size, elemSizeXX(eSize), dst, src1, src2);
3584+
}
3585+
35593586
/**
35603587
* C7.2.362 Unsigned maximum across vector.<br>
35613588
*
@@ -3567,8 +3594,8 @@ public void umaxVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
35673594
* @param src SIMD register.
35683595
*/
35693596
public void umaxvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) {
3570-
assert dst.getRegisterCategory().equals(SIMD);
3571-
assert src.getRegisterCategory().equals(SIMD);
3597+
assert dst.getRegisterCategory().equals(SIMD) : dst;
3598+
assert src.getRegisterCategory().equals(SIMD) : src;
35723599
assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for umaxv";
35733600
assert elementSize != ElementSize.DoubleWord : "Invalid lane width for umaxv";
35743601

@@ -3597,6 +3624,31 @@ public void uminVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
35973624
threeSameEncoding(ASIMDInstruction.UMIN, size, elemSizeXX(eSize), dst, src1, src2);
35983625
}
35993626

3627+
/**
3628+
* C7.2.364 Unsigned minimum pairwise.<br>
3629+
*
3630+
* <code>
3631+
* concat = src2:src1
3632+
* for i in 0..n-1 do dst[i] = uint_min(concat[2 * i], concat[2 * i + 1])
3633+
* </code>
3634+
*
3635+
* @param size register size.
3636+
* @param eSize element size.
3637+
* @param dst SIMD register.
3638+
* @param src1 SIMD register.
3639+
* @param src2 SIMD register.
3640+
*/
3641+
public void uminpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) {
3642+
assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize;
3643+
3644+
assert dst.getRegisterCategory().equals(SIMD) : dst;
3645+
assert src1.getRegisterCategory().equals(SIMD) : src1;
3646+
assert src2.getRegisterCategory().equals(SIMD) : src2;
3647+
assert eSize != ElementSize.DoubleWord : "Invalid lane width for uminp";
3648+
3649+
threeSameEncoding(ASIMDInstruction.UMINP, size, elemSizeXX(eSize), dst, src1, src2);
3650+
}
3651+
36003652
/**
36013653
* C7.2.365 Unsigned minimum across vector.<br>
36023654
*

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayCompareToOp.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,11 @@ private void emitSIMDCode(AArch64MacroAssembler masm, Label stringsEqualUptoLeng
333333
masm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, eSize, array1HighV, array1HighV, array2HighV);
334334
masm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmpRegV1, array1LowV, array1HighV);
335335
masm.neon.uminvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, eSize, tmpRegV1, tmpRegV1);
336-
masm.fcmpZero(64, tmpRegV1);
337-
masm.branchConditionally(ConditionFlag.EQ, mismatchInChunk);
336+
try (AArch64MacroAssembler.ScratchRegister scratchReg = masm.getScratchRegister()) {
337+
Register tmp = scratchReg.getRegister();
338+
masm.neon.umovGX(AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, tmpRegV1, 0);
339+
masm.cbz(64, tmp, mismatchInChunk);
340+
}
338341
masm.cmp(64, array1, lastChunkAddress1);
339342
masm.branchConditionally(ConditionFlag.LO, simdLoop);
340343

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayEqualsOp.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ private void emitArrayEquals(AArch64MacroAssembler asm,
231231
Register refAddress = len;
232232
asm.add(64, refAddress, arrayMax, len, ShiftType.LSL, strideMax.log2);
233233

234-
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM);
234+
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM, tmp);
235235
asm.branchConditionally(ConditionFlag.NE, end);
236236

237237
asm.cmp(64, refAddress, arrayMax);
@@ -248,7 +248,7 @@ private void emitArrayEquals(AArch64MacroAssembler asm,
248248
// 64 byte loop
249249
asm.align(PREFERRED_LOOP_ALIGNMENT);
250250
asm.bind(vectorLoop);
251-
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM);
251+
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM, tmp);
252252
asm.branchConditionally(ConditionFlag.NE, end);
253253
asm.cmp(64, arrayMax, refAddress);
254254
asm.branchConditionally(ConditionFlag.LO, vectorLoop);
@@ -262,13 +262,13 @@ private void emitArrayEquals(AArch64MacroAssembler asm,
262262
asm.sub(64, arrayM, arrayM, tmp, ShiftType.LSR, strideMax.log2 - strideM.log2);
263263
}
264264

265-
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM);
265+
simdCompare64(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM, tmp);
266266
asm.jmp(end);
267267

268268
// tail for 32 - 63 bytes
269-
tail32(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM, len, tailLessThan64, tailLessThan32, end);
269+
tail32(asm, strideMax, strideMin, strideA, strideM, arrayMax, arrayMin, arrayM, len, tmp, tailLessThan64, tailLessThan32, end);
270270
// tail for 16 - 31 bytes
271-
tail16(asm, strideA, strideB, strideM, strideMax, strideMin, arrayA, arrayB, arrayM, len, tailLessThan32, tailLessThan16, end);
271+
tail16(asm, strideA, strideB, strideM, strideMax, strideMin, arrayA, arrayB, arrayM, len, tmp, tailLessThan32, tailLessThan16, end);
272272
// tail for 8 - 15 bytes
273273
tailLessThan16(asm, strideA, strideB, strideM, strideMax, arrayA, arrayB, arrayM, len, tmp, ret, tailLessThan16, tailLessThan8, end, 8);
274274
// tail for 4 - 7 bytes
@@ -290,7 +290,8 @@ private void simdCompare64(AArch64MacroAssembler asm,
290290
Stride strideMask,
291291
Register arrayMax,
292292
Register arrayMin,
293-
Register arrayMask) {
293+
Register arrayMask,
294+
Register tmp) {
294295
ElementSize minESize = fromStride(strideMin);
295296
switch (strideMax.log2 - strideMin.log2) {
296297
case 0:
@@ -419,7 +420,7 @@ private void simdCompare64(AArch64MacroAssembler asm,
419420
default:
420421
throw GraalError.unimplemented("comparison of " + strideMin + " to " + strideMax + " not implemented"); // ExcludeFromJacocoGeneratedReport
421422
}
422-
vectorCheckZero(asm, v(0), v(0));
423+
cmpZeroVector(asm, v(0), v(0), tmp);
423424
}
424425

425426
private void tail32(AArch64MacroAssembler asm,
@@ -431,6 +432,7 @@ private void tail32(AArch64MacroAssembler asm,
431432
Register arrayMin,
432433
Register arrayMask,
433434
Register len,
435+
Register tmp,
434436
Label entry,
435437
Label nextTail,
436438
Label end) {
@@ -594,7 +596,7 @@ private void tail32(AArch64MacroAssembler asm,
594596
default:
595597
throw GraalError.unimplemented("comparison of " + strideMin + " to " + strideMax + " not implemented"); // ExcludeFromJacocoGeneratedReport
596598
}
597-
vectorCheckZero(asm, v(0), v(0));
599+
cmpZeroVector(asm, v(0), v(0), tmp);
598600
asm.jmp(end);
599601
}
600602

@@ -608,6 +610,7 @@ private void tail16(AArch64MacroAssembler asm,
608610
Register arrayB,
609611
Register arrayM,
610612
Register len,
613+
Register tmp,
611614
Label entry,
612615
Label nextTail,
613616
Label end) {
@@ -662,7 +665,7 @@ private void tail16(AArch64MacroAssembler asm,
662665
asm.neon.eorVVV(FullReg, vecArrayA2, vecArrayA2, vecArrayB2);
663666
asm.neon.orrVVV(FullReg, vecArrayA1, vecArrayA1, vecArrayA2);
664667

665-
vectorCheckZero(asm, vecArrayA1, vecArrayA1);
668+
cmpZeroVector(asm, vecArrayA1, vecArrayA1, tmp);
666669
asm.jmp(end);
667670
}
668671

@@ -752,7 +755,7 @@ private void tailLessThan16(AArch64MacroAssembler asm,
752755
asm.neon.orrVVV(FullReg, vecArrayA1, vecArrayA1, vecArrayM1);
753756
}
754757
asm.neon.eorVVV(FullReg, vecArrayA1, vecArrayA1, vecArrayB1);
755-
vectorCheckZero(asm, vecArrayA1, vecArrayA1);
758+
cmpZeroVector(asm, vecArrayA1, vecArrayA1, tmp);
756759
} else if (strideMax.value == nBytes) {
757760
asm.bind(entry);
758761
// tail for length == 1

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayIndexOfOp.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -631,9 +631,11 @@ private void emitSIMDMatch(AArch64MacroAssembler masm,
631631
break;
632632
}
633633
masm.neon.orrVVV(FullReg, vecTmp[0], vecArray1, vecArray2);
634-
/* If value != 0, then there was a match somewhere. */
635-
vectorCheckZero(masm, ElementSize.fromStride(getMatchResultStride()), vecTmp[0], vecTmp[0], variant != ArrayIndexOfVariant.Table);
636-
masm.branchConditionally(ConditionFlag.NE, matchInChunk);
634+
try (ScratchRegister sc = masm.getScratchRegister()) {
635+
Register tmp = sc.getRegister();
636+
/* If value != 0, then there was a match somewhere. */
637+
cbnzVector(masm, ElementSize.fromStride(getMatchResultStride()), vecTmp[0], vecTmp[0], tmp, variant != ArrayIndexOfVariant.Table, matchInChunk);
638+
}
637639
}
638640

639641
private Stride getMatchResultStride() {

compiler/src/jdk.internal.vm.compiler/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayRegionCompareToOp.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,7 @@ private void emitArrayCompare(CompilationResultBuilder crb, AArch64MacroAssemble
202202
asm.neon.eorVVV(FullReg, vecTmp1, vecArrayA1, vecArrayB1);
203203
asm.neon.eorVVV(FullReg, vecTmp2, vecArrayA2, vecArrayB2);
204204
asm.neon.orrVVV(FullReg, vecTmp2, vecTmp2, vecTmp1);
205-
vectorCheckZero(asm, vecTmp2, vecTmp2);
206-
asm.branchConditionally(ConditionFlag.NE, diffFound);
205+
cbnzVector(asm, ElementSize.Byte, vecTmp2, vecTmp2, tmp, false, diffFound);
207206
// if so, continue
208207
asm.cmp(64, maxStrideArray, refAddress);
209208
asm.branchConditionally(ConditionFlag.LO, vectorLoop);
@@ -218,8 +217,7 @@ private void emitArrayCompare(CompilationResultBuilder crb, AArch64MacroAssemble
218217
asm.neon.eorVVV(FullReg, vecTmp1, vecArrayA1, vecArrayB1);
219218
asm.neon.eorVVV(FullReg, vecTmp2, vecArrayA2, vecArrayB2);
220219
asm.neon.orrVVV(FullReg, vecTmp2, vecTmp2, vecTmp1);
221-
vectorCheckZero(asm, vecTmp2, vecTmp2);
222-
asm.branchConditionally(ConditionFlag.NE, diffFound);
220+
cbnzVector(asm, ElementSize.Byte, vecTmp2, vecTmp2, tmp, false, diffFound);
223221
asm.mov(64, ret, zr);
224222
asm.jmp(end);
225223

@@ -239,8 +237,7 @@ private void emitArrayCompare(CompilationResultBuilder crb, AArch64MacroAssemble
239237
asm.align(PREFERRED_BRANCH_TARGET_ALIGNMENT);
240238
asm.bind(diffFound);
241239
// check if vecArrayA1 and vecArrayB1 are equal
242-
vectorCheckZero(asm, vecTmp1, vecTmp1);
243-
asm.branchConditionally(ConditionFlag.NE, returnV1);
240+
cbnzVector(asm, ElementSize.Byte, vecTmp1, vecTmp1, tmp, false, returnV1);
244241
calcReturnValue(asm, ret, vecArrayA2, vecArrayB2, vecArrayA1, vecArrayB1, vecMask, strideMax);
245242
asm.jmp(end);
246243

@@ -295,11 +292,9 @@ private static void loadAndExtend(AArch64MacroAssembler asm, Stride strideDst, S
295292
private static void calcReturnValue(AArch64MacroAssembler asm, Register ret, Register vecArrayA, Register vecArrayB, Register vecTmp, Register vecIndex, Register vecMask, Stride strideMax) {
296293
// set all equal bytes to 0xff, others to 0x00
297294
asm.neon.cmeqVVV(FullReg, fromStride(strideMax), vecTmp, vecArrayA, vecArrayB);
298-
// BIC with the ascending index mask, this will replace all non-equal bytes with their
299-
// corresponding byte index
300-
asm.neon.bicVVV(FullReg, vecIndex, vecMask, vecTmp);
301-
// OR with the result of CMEQ, replacing all equal bytes with 0xff again
302-
asm.neon.orrVVV(FullReg, vecIndex, vecIndex, vecTmp);
295+
// OR with the ascending index mask, this will replace all non-equal bytes with their
296+
// corresponding byte index, and all equal bytes with 0xff
297+
asm.neon.orrVVV(FullReg, vecIndex, vecMask, vecTmp);
303298
// Get the unsigned minimum. This will yield the index of the first non-equal bytes, since
304299
// all equal ones are filled with 0xff
305300
asm.neon.uminvSV(FullReg, fromStride(strideMax), vecIndex, vecIndex);

0 commit comments

Comments
 (0)