@@ -438,34 +438,34 @@ public ByteVector binary(ByteVector xVec, ByteVector yVec, int vectorOpcode) {
438
438
case Bytecode .VECTOR_F64X2_GT -> f64x2_relop (x , y , VectorOperators .GT );
439
439
case Bytecode .VECTOR_F64X2_LE -> f64x2_relop (x , y , VectorOperators .LE );
440
440
case Bytecode .VECTOR_F64X2_GE -> f64x2_relop (x , y , VectorOperators .GE );
441
- case Bytecode .VECTOR_I8X16_NARROW_I16X8_S -> narrow (x , y , I16X8 , I8X16 , VectorOperators . S2B , VectorOperators . ZERO_EXTEND_B2S , Byte .MIN_VALUE , Byte .MAX_VALUE );
442
- case Bytecode .VECTOR_I8X16_NARROW_I16X8_U -> narrow (x , y , I16X8 , I8X16 , VectorOperators . S2B , VectorOperators . ZERO_EXTEND_B2S , (short ) 0 , (short ) 0xff );
441
+ case Bytecode .VECTOR_I8X16_NARROW_I16X8_S -> narrow (x , y , I16X8 , I8X16 , Byte .MIN_VALUE , Byte .MAX_VALUE );
442
+ case Bytecode .VECTOR_I8X16_NARROW_I16X8_U -> narrow (x , y , I16X8 , I8X16 , (short ) 0 , (short ) 0xff );
443
443
case Bytecode .VECTOR_I8X16_ADD -> binop (x , y , I8X16 , VectorOperators .ADD );
444
444
case Bytecode .VECTOR_I8X16_ADD_SAT_S -> binop (x , y , I8X16 , VectorOperators .SADD );
445
- case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .S2B , VectorOperators . ADD , 0 , 0xff );
445
+ case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .ADD , 0 , 0xff );
446
446
case Bytecode .VECTOR_I8X16_SUB -> binop (x , y , I8X16 , VectorOperators .SUB );
447
447
case Bytecode .VECTOR_I8X16_SUB_SAT_S -> binop (x , y , I8X16 , VectorOperators .SSUB );
448
- case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .S2B , VectorOperators . SUB , 0 , 0xff );
448
+ case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .SUB , 0 , 0xff );
449
449
case Bytecode .VECTOR_I8X16_MIN_S -> binop (x , y , I8X16 , VectorOperators .MIN );
450
450
case Bytecode .VECTOR_I8X16_MIN_U -> binop (x , y , I8X16 , VectorOperators .UMIN );
451
451
case Bytecode .VECTOR_I8X16_MAX_S -> binop (x , y , I8X16 , VectorOperators .MAX );
452
452
case Bytecode .VECTOR_I8X16_MAX_U -> binop (x , y , I8X16 , VectorOperators .UMAX );
453
- case Bytecode .VECTOR_I8X16_AVGR_U -> avgr (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators . S2B );
454
- case Bytecode .VECTOR_I16X8_NARROW_I32X4_S -> narrow (x , y , I32X4 , I16X8 , VectorOperators . I2S , VectorOperators . ZERO_EXTEND_S2I , Short .MIN_VALUE , Short .MAX_VALUE );
455
- case Bytecode .VECTOR_I16X8_NARROW_I32X4_U -> narrow (x , y , I32X4 , I16X8 , VectorOperators . I2S , VectorOperators . ZERO_EXTEND_S2I , 0 , 0xffff );
453
+ case Bytecode .VECTOR_I8X16_AVGR_U -> avgr_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S );
454
+ case Bytecode .VECTOR_I16X8_NARROW_I32X4_S -> narrow (x , y , I32X4 , I16X8 , Short .MIN_VALUE , Short .MAX_VALUE );
455
+ case Bytecode .VECTOR_I16X8_NARROW_I32X4_U -> narrow (x , y , I32X4 , I16X8 , 0 , 0xffff );
456
456
case Bytecode .VECTOR_I16X8_Q15MULR_SAT_S , Bytecode .VECTOR_I16X8_RELAXED_Q15MULR_S -> i16x8_q15mulr_sat_s (x , y );
457
457
case Bytecode .VECTOR_I16X8_ADD -> binop (x , y , I16X8 , VectorOperators .ADD );
458
458
case Bytecode .VECTOR_I16X8_ADD_SAT_S -> binop (x , y , I16X8 , VectorOperators .SADD );
459
- case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .I2S , VectorOperators . ADD , 0 , 0xffff );
459
+ case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .ADD , 0 , 0xffff );
460
460
case Bytecode .VECTOR_I16X8_SUB -> binop (x , y , I16X8 , VectorOperators .SUB );
461
461
case Bytecode .VECTOR_I16X8_SUB_SAT_S -> binop (x , y , I16X8 , VectorOperators .SSUB );
462
- case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .I2S , VectorOperators . SUB , 0 , 0xffff );
462
+ case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .SUB , 0 , 0xffff );
463
463
case Bytecode .VECTOR_I16X8_MUL -> binop (x , y , I16X8 , VectorOperators .MUL );
464
464
case Bytecode .VECTOR_I16X8_MIN_S -> binop (x , y , I16X8 , VectorOperators .MIN );
465
465
case Bytecode .VECTOR_I16X8_MIN_U -> binop (x , y , I16X8 , VectorOperators .UMIN );
466
466
case Bytecode .VECTOR_I16X8_MAX_S -> binop (x , y , I16X8 , VectorOperators .MAX );
467
467
case Bytecode .VECTOR_I16X8_MAX_U -> binop (x , y , I16X8 , VectorOperators .UMAX );
468
- case Bytecode .VECTOR_I16X8_AVGR_U -> avgr (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators . I2S );
468
+ case Bytecode .VECTOR_I16X8_AVGR_U -> avgr_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I );
469
469
case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_S -> extmul (x , y , I8X16 , VectorOperators .B2S , 0 );
470
470
case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_U -> extmul (x , y , I8X16 , VectorOperators .ZERO_EXTEND_B2S , 0 );
471
471
case Bytecode .VECTOR_I16X8_EXTMUL_HIGH_I8X16_S -> extmul (x , y , I8X16 , VectorOperators .B2S , 1 );
@@ -883,8 +883,8 @@ private static ByteVector i32x4_trunc_sat_f32x4_u(ByteVector xBytes) {
883
883
DoubleVector xHigh = castDouble128 (x .convert (VectorOperators .F2D , 1 ));
884
884
LongVector xLowTrunc = truncSatU32 (xLow );
885
885
LongVector xHighTrunc = truncSatU32 (xHigh );
886
- IntVector resultLow = castInt128 (compact (xLowTrunc , 0 , I64X2 , I32X4 , VectorOperators . L2I , VectorOperators . ZERO_EXTEND_I2L ));
887
- IntVector resultHigh = castInt128 (compact (xHighTrunc , 0 , I64X2 , I32X4 , VectorOperators . L2I , VectorOperators . ZERO_EXTEND_I2L ));
886
+ IntVector resultLow = castInt128 (compact (xLowTrunc , 0 , I64X2 , I32X4 ));
887
+ IntVector resultHigh = castInt128 (compact (xHighTrunc , - 1 , I64X2 , I32X4 ));
888
888
IntVector result = resultLow .or (resultHigh );
889
889
return result .reinterpretAsBytes ();
890
890
}
@@ -903,7 +903,7 @@ private static ByteVector f32x4_convert_i32x4_u(ByteVector xBytes) {
903
903
private static ByteVector i32x4_trunc_sat_f64x2_u_zero (ByteVector xBytes ) {
904
904
DoubleVector x = F64X2 .reinterpret (xBytes );
905
905
LongVector longResult = truncSatU32 (x );
906
- IntVector result = castInt128 (compact (longResult , 0 , I64X2 , I32X4 , VectorOperators . L2I , VectorOperators . ZERO_EXTEND_I2L ));
906
+ IntVector result = castInt128 (compact (longResult , 0 , I64X2 , I32X4 ));
907
907
return result .reinterpretAsBytes ();
908
908
}
909
909
@@ -965,8 +965,11 @@ private static ByteVector f64x2_relop(ByteVector xBytes, ByteVector yBytes, Vect
965
965
* {@code VectorSupport#convert} in a way that would map a vector of N elements to a vector of M
966
966
* elements, where M > N. Such a situation is currently not supported by the Graal compiler
967
967
* [GR-68216].
968
+ * <p>
969
+ * Works only for integral shapes. See {@link #compactGeneral} for the general implementation.
970
+ * </p>
968
971
*/
969
- private static <E , F > Vector <F > compact (Vector <E > vec , int part , Shape <E > inShape , Shape <F > outShape , VectorOperators . Conversion < E , F > downcast , VectorOperators . Conversion < F , E > upcast ) {
972
+ private static <E , F > Vector <F > compact (Vector <E > vec , int part , Shape <E > inShape , Shape <F > outShape ) {
970
973
// Works only for integral shapes.
971
974
assert inShape .species ().elementType () == short .class && outShape .species ().elementType () == byte .class ||
972
975
inShape .species ().elementType () == int .class && outShape .species ().elementType () == short .class ||
@@ -976,8 +979,7 @@ private static <E, F> Vector<F> compact(Vector<E> vec, int part, Shape<E> inShap
976
979
case -1 -> outShape .highMask ;
977
980
default -> throw CompilerDirectives .shouldNotReachHere ();
978
981
};
979
- VectorSpecies <F > halfSizeOutShape = outShape .species ().withShape (VectorShape .S_64_BIT );
980
- return vec .convertShape (downcast , halfSizeOutShape , 0 ).convertShape (upcast , inShape .species (), 0 ).reinterpretShape (outShape .species (), 0 ).rearrange (outShape .compressEvensShuffle , mask );
982
+ return vec .reinterpretShape (outShape .species (), 0 ).rearrange (outShape .compressEvensShuffle , mask );
981
983
}
982
984
983
985
/**
@@ -987,11 +989,11 @@ private static <W, WI, N, NI> Vector<N> compactGeneral(Vector<W> vec, int part,
987
989
Shape <WI > wideIntegralShape , Shape <N > narrowShape ,
988
990
VectorOperators .Conversion <W , N > downcast ,
989
991
VectorOperators .Conversion <N , NI > asIntegral ,
990
- VectorOperators .Conversion <NI , WI > upcast ) {
992
+ VectorOperators .Conversion <NI , WI > zeroExtend ) {
991
993
// NI and WI must be integral types, with NI being half the size of WI.
992
- assert upcast .domainType () == byte .class && upcast .rangeType () == short .class ||
993
- upcast .domainType () == short .class && upcast .rangeType () == int .class ||
994
- upcast .domainType () == int .class && upcast .rangeType () == long .class ;
994
+ assert zeroExtend .domainType () == byte .class && zeroExtend .rangeType () == short .class ||
995
+ zeroExtend .domainType () == short .class && zeroExtend .rangeType () == int .class ||
996
+ zeroExtend .domainType () == int .class && zeroExtend .rangeType () == long .class ;
995
997
VectorMask <N > mask = switch (part ) {
996
998
case 0 -> narrowShape .lowMask ;
997
999
case -1 -> narrowShape .highMask ;
@@ -1000,44 +1002,43 @@ private static <W, WI, N, NI> Vector<N> compactGeneral(Vector<W> vec, int part,
1000
1002
VectorSpecies <N > halfSizeOutShape = narrowShape .species ().withShape (VectorShape .S_64_BIT );
1001
1003
Vector <N > down = vec .convertShape (downcast , halfSizeOutShape , 0 );
1002
1004
Vector <NI > downIntegral = down .convert (asIntegral , 0 );
1003
- Vector <WI > upIntegral = downIntegral .convertShape (upcast , wideIntegralShape .species (), 0 );
1005
+ Vector <WI > upIntegral = downIntegral .convertShape (zeroExtend , wideIntegralShape .species (), 0 );
1004
1006
Vector <N > narrowEvens = upIntegral .reinterpretShape (narrowShape .species (), 0 );
1005
1007
return narrowEvens .rearrange (narrowShape .compressEvensShuffle , mask );
1006
1008
}
1007
1009
1008
- private static <E , F > ByteVector narrow (ByteVector xBytes , ByteVector yBytes ,
1009
- Shape <E > inShape , Shape <F > outShape , VectorOperators .Conversion <E , F > downcast , VectorOperators .Conversion <F , E > upcast ,
1010
- long min , long max ) {
1010
+ private static <E , F > ByteVector narrow (ByteVector xBytes , ByteVector yBytes , Shape <E > inShape , Shape <F > outShape , long min , long max ) {
1011
1011
Vector <E > x = inShape .reinterpret (xBytes );
1012
1012
Vector <E > y = inShape .reinterpret (yBytes );
1013
1013
Vector <E > xSat = sat (x , inShape , min , max );
1014
1014
Vector <E > ySat = sat (y , inShape , min , max );
1015
- Vector <F > resultLow = compact (xSat , 0 , inShape , outShape , downcast , upcast );
1016
- Vector <F > resultHigh = compact (ySat , -1 , inShape , outShape , downcast , upcast );
1015
+ Vector <F > resultLow = compact (xSat , 0 , inShape , outShape );
1016
+ Vector <F > resultHigh = compact (ySat , -1 , inShape , outShape );
1017
1017
Vector <F > result = resultLow .lanewise (VectorOperators .OR , resultHigh );
1018
1018
return result .reinterpretAsBytes ();
1019
1019
}
1020
1020
1021
1021
private static <E , F > ByteVector binop_sat_u (ByteVector xBytes , ByteVector yBytes ,
1022
1022
Shape <E > shape , Shape <F > extendedShape ,
1023
- VectorOperators .Conversion <E , F > upcast , VectorOperators . Conversion < F , E > downcast ,
1023
+ VectorOperators .Conversion <E , F > upcast ,
1024
1024
VectorOperators .Binary op , long min , long max ) {
1025
- return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , downcast , (x , y ) -> {
1025
+ return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> {
1026
1026
Vector <F > rawResult = x .lanewise (op , y );
1027
1027
Vector <F > satResult = sat (rawResult , extendedShape , min , max );
1028
1028
return satResult ;
1029
1029
});
1030
1030
}
1031
1031
1032
- private static <E , F > ByteVector avgr (ByteVector xBytes , ByteVector yBytes , Shape <E > shape , Shape <F > extendedShape , VectorOperators .Conversion <E , F > upcast ,
1033
- VectorOperators .Conversion <F , E > downcast ) {
1032
+ private static <E , F > ByteVector avgr_u (ByteVector xBytes , ByteVector yBytes ,
1033
+ Shape <E > shape , Shape <F > extendedShape ,
1034
+ VectorOperators .Conversion <E , F > upcast ) {
1034
1035
Vector <F > one = extendedShape .broadcast (1 );
1035
1036
Vector <F > two = extendedShape .broadcast (2 );
1036
- return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , downcast , (x , y ) -> x .add (y ).add (one ).div (two ));
1037
+ return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> x .add (y ).add (one ).div (two ));
1037
1038
}
1038
1039
1039
1040
private static ByteVector i16x8_q15mulr_sat_s (ByteVector xBytes , ByteVector yBytes ) {
1040
- return upcastBinopDowncast (xBytes , yBytes , I16X8 , I32X4 , VectorOperators .S2I , VectorOperators . I2S , (x , y ) -> {
1041
+ return upcastBinopDowncast (xBytes , yBytes , I16X8 , I32X4 , VectorOperators .S2I , (x , y ) -> {
1041
1042
Vector <Integer > rawResult = x .mul (y ).add (I32X4 .broadcast (1 << 14 )).lanewise (VectorOperators .ASHR , I32X4 .broadcast (15 ));
1042
1043
Vector <Integer > satResult = sat (rawResult , I32X4 , Short .MIN_VALUE , Short .MAX_VALUE );
1043
1044
return satResult ;
@@ -1228,16 +1229,16 @@ private static LongVector truncSatU32(DoubleVector x) {
1228
1229
1229
1230
private static <E , F > ByteVector upcastBinopDowncast (ByteVector xBytes , ByteVector yBytes ,
1230
1231
Shape <E > shape , Shape <F > extendedShape ,
1231
- VectorOperators .Conversion <E , F > upcast , VectorOperators . Conversion < F , E > downcast ,
1232
+ VectorOperators .Conversion <E , F > upcast ,
1232
1233
BinaryVectorOp <F > op ) {
1233
1234
Vector <E > x = shape .reinterpret (xBytes );
1234
1235
Vector <E > y = shape .reinterpret (yBytes );
1235
1236
Vector <F > xLow = x .convert (upcast , 0 );
1236
1237
Vector <F > xHigh = x .convert (upcast , 1 );
1237
1238
Vector <F > yLow = y .convert (upcast , 0 );
1238
1239
Vector <F > yHigh = y .convert (upcast , 1 );
1239
- Vector <E > resultLow = compact (op .apply (xLow , yLow ), 0 , extendedShape , shape , downcast , upcast );
1240
- Vector <E > resultHigh = compact (op .apply (xHigh , yHigh ), -1 , extendedShape , shape , downcast , upcast );
1240
+ Vector <E > resultLow = compact (op .apply (xLow , yLow ), 0 , extendedShape , shape );
1241
+ Vector <E > resultHigh = compact (op .apply (xHigh , yHigh ), -1 , extendedShape , shape );
1241
1242
Vector <E > result = resultLow .lanewise (VectorOperators .OR , resultHigh );
1242
1243
return result .reinterpretAsBytes ();
1243
1244
}
0 commit comments