@@ -8837,23 +8837,27 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
88378837#undef BLOCK_COMMENT
88388838
88398839// Compress char[] array to byte[].
8840- // ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
8840+ // Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
8841+ // Return the array length if every element in array can be encoded,
8842+ // otherwise, the index of first non-latin1 (> 0xff) character.
88418843// @IntrinsicCandidate
8842- // private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
8844+ // public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
88438845// for (int i = 0; i < len; i++) {
8844- // int c = src[srcOff++ ];
8845- // if (c >>> 8 != 0 ) {
8846- // return 0;
8846+ // char c = src[srcOff];
8847+ // if (c > 0xff ) {
8848+ // return i; // return index of non-latin1 char
88478849// }
8848- // dst[dstOff++] = (byte)c;
8850+ // dst[dstOff] = (byte)c;
8851+ // srcOff++;
8852+ // dstOff++;
88498853// }
88508854// return len;
88518855// }
88528856void MacroAssembler::char_array_compress (Register src, Register dst, Register len,
88538857 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
88548858 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
88558859 Register tmp5, Register result, KRegister mask1, KRegister mask2) {
8856- Label copy_chars_loop, return_length, return_zero, done ;
8860+ Label copy_chars_loop, done, reset_sp, copy_tail ;
88578861
88588862 // rsi: src
88598863 // rdi: dst
@@ -8868,28 +8872,28 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
88688872 assert (len != result, " " );
88698873
88708874 // save length for return
8871- push ( len);
8875+ movl (result, len);
88728876
88738877 if ((AVX3Threshold == 0 ) && (UseAVX > 2 ) && // AVX512
88748878 VM_Version::supports_avx512vlbw () &&
88758879 VM_Version::supports_bmi2 ()) {
88768880
8877- Label copy_32_loop, copy_loop_tail, below_threshold;
8881+ Label copy_32_loop, copy_loop_tail, below_threshold, reset_for_copy_tail ;
88788882
88798883 // alignment
88808884 Label post_alignment;
88818885
8882- // if length of the string is less than 16 , handle it in an old fashioned way
8886+ // if length of the string is less than 32 , handle it the old fashioned way
88838887 testl (len, -32 );
88848888 jcc (Assembler::zero, below_threshold);
88858889
88868890 // First check whether a character is compressible ( <= 0xFF).
88878891 // Create mask to test for Unicode chars inside zmm vector
8888- movl (result , 0x00FF );
8889- evpbroadcastw (tmp2Reg, result , Assembler::AVX_512bit);
8892+ movl (tmp5 , 0x00FF );
8893+ evpbroadcastw (tmp2Reg, tmp5 , Assembler::AVX_512bit);
88908894
88918895 testl (len, -64 );
8892- jcc (Assembler::zero, post_alignment);
8896+ jccb (Assembler::zero, post_alignment);
88938897
88948898 movl (tmp5, dst);
88958899 andl (tmp5, (32 - 1 ));
@@ -8898,18 +8902,19 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
88988902
88998903 // bail out when there is nothing to be done
89008904 testl (tmp5, 0xFFFFFFFF );
8901- jcc (Assembler::zero, post_alignment);
8905+ jccb (Assembler::zero, post_alignment);
89028906
89038907 // ~(~0 << len), where len is the # of remaining elements to process
8904- movl (result, 0xFFFFFFFF );
8905- shlxl (result, result, tmp5);
8906- notl (result);
8907- kmovdl (mask2, result);
8908+ movl (len, 0xFFFFFFFF );
8909+ shlxl (len, len, tmp5);
8910+ notl (len);
8911+ kmovdl (mask2, len);
8912+ movl (len, result);
89088913
89098914 evmovdquw (tmp1Reg, mask2, Address (src, 0 ), /* merge*/ false , Assembler::AVX_512bit);
89108915 evpcmpw (mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /* signed*/ false , Assembler::AVX_512bit);
89118916 ktestd (mask1, mask2);
8912- jcc (Assembler::carryClear, return_zero );
8917+ jcc (Assembler::carryClear, copy_tail );
89138918
89148919 evpmovwb (Address (dst, 0 ), mask2, tmp1Reg, Assembler::AVX_512bit);
89158920
@@ -8924,7 +8929,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
89248929 movl (tmp5, len);
89258930 andl (tmp5, (32 - 1 )); // tail count (in chars)
89268931 andl (len, ~(32 - 1 )); // vector count (in chars)
8927- jcc (Assembler::zero, copy_loop_tail);
8932+ jccb (Assembler::zero, copy_loop_tail);
89288933
89298934 lea (src, Address (src, len, Address::times_2));
89308935 lea (dst, Address (dst, len, Address::times_1));
@@ -8934,55 +8939,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
89348939 evmovdquw (tmp1Reg, Address (src, len, Address::times_2), Assembler::AVX_512bit);
89358940 evpcmpuw (mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
89368941 kortestdl (mask1, mask1);
8937- jcc (Assembler::carryClear, return_zero );
8942+ jccb (Assembler::carryClear, reset_for_copy_tail );
89388943
89398944 // All elements in current processed chunk are valid candidates for
89408945 // compression. Write a truncated byte elements to the memory.
89418946 evpmovwb (Address (dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
89428947 addptr (len, 32 );
8943- jcc (Assembler::notZero, copy_32_loop);
8948+ jccb (Assembler::notZero, copy_32_loop);
89448949
89458950 bind (copy_loop_tail);
89468951 // bail out when there is nothing to be done
89478952 testl (tmp5, 0xFFFFFFFF );
8948- jcc (Assembler::zero, return_length );
8953+ jcc (Assembler::zero, done );
89498954
89508955 movl (len, tmp5);
89518956
89528957 // ~(~0 << len), where len is the # of remaining elements to process
8953- movl (result , 0xFFFFFFFF );
8954- shlxl (result, result , len);
8955- notl (result );
8958+ movl (tmp5 , 0xFFFFFFFF );
8959+ shlxl (tmp5, tmp5 , len);
8960+ notl (tmp5 );
89568961
8957- kmovdl (mask2, result );
8962+ kmovdl (mask2, tmp5 );
89588963
89598964 evmovdquw (tmp1Reg, mask2, Address (src, 0 ), /* merge*/ false , Assembler::AVX_512bit);
89608965 evpcmpw (mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /* signed*/ false , Assembler::AVX_512bit);
89618966 ktestd (mask1, mask2);
8962- jcc (Assembler::carryClear, return_zero );
8967+ jcc (Assembler::carryClear, copy_tail );
89638968
89648969 evpmovwb (Address (dst, 0 ), mask2, tmp1Reg, Assembler::AVX_512bit);
8965- jmp (return_length);
8970+ jmp (done);
8971+
8972+ bind (reset_for_copy_tail);
8973+ lea (src, Address (src, tmp5, Address::times_2));
8974+ lea (dst, Address (dst, tmp5, Address::times_1));
8975+ subptr (len, tmp5);
8976+ jmp (copy_chars_loop);
89668977
89678978 bind (below_threshold);
89688979 }
89698980
89708981 if (UseSSE42Intrinsics) {
8971- Label copy_32_loop, copy_16, copy_tail ;
8982+ Label copy_32_loop, copy_16, copy_tail_sse, reset_for_copy_tail ;
89728983
8973- movl (result, len);
8984+ // vectored compression
8985+ testl (len, 0xfffffff8 );
8986+ jcc (Assembler::zero, copy_tail);
89748987
89758988 movl (tmp5, 0xff00ff00 ); // create mask to test for Unicode chars in vectors
8989+ movdl (tmp1Reg, tmp5);
8990+ pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
89768991
8977- // vectored compression
8978- andl (len, 0xfffffff0 ); // vector count (in chars)
8979- andl (result, 0x0000000f ); // tail count (in chars)
8980- testl (len, len);
8981- jcc (Assembler::zero, copy_16);
8992+ andl (len, 0xfffffff0 );
8993+ jccb (Assembler::zero, copy_16);
89828994
89838995 // compress 16 chars per iter
8984- movdl (tmp1Reg, tmp5);
8985- pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
89868996 pxor (tmp4Reg, tmp4Reg);
89878997
89888998 lea (src, Address (src, len, Address::times_2));
@@ -8995,59 +9005,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
89959005 movdqu (tmp3Reg, Address (src, len, Address::times_2, 16 )); // load next 8 characters
89969006 por (tmp4Reg, tmp3Reg);
89979007 ptest (tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
8998- jcc (Assembler::notZero, return_zero );
9008+ jccb (Assembler::notZero, reset_for_copy_tail );
89999009 packuswb (tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
90009010 movdqu (Address (dst, len, Address::times_1), tmp2Reg);
90019011 addptr (len, 16 );
9002- jcc (Assembler::notZero, copy_32_loop);
9012+ jccb (Assembler::notZero, copy_32_loop);
90039013
90049014 // compress next vector of 8 chars (if any)
90059015 bind (copy_16);
9006- movl (len, result);
9007- andl (len, 0xfffffff8 ); // vector count (in chars)
9008- andl (result, 0x00000007 ); // tail count (in chars)
9009- testl (len, len);
9010- jccb (Assembler::zero, copy_tail);
9016+ // len = 0
9017+ testl (result, 0x00000008 ); // check if there's a block of 8 chars to compress
9018+ jccb (Assembler::zero, copy_tail_sse);
90119019
9012- movdl (tmp1Reg, tmp5);
9013- pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
90149020 pxor (tmp3Reg, tmp3Reg);
90159021
90169022 movdqu (tmp2Reg, Address (src, 0 ));
90179023 ptest (tmp2Reg, tmp1Reg); // check for Unicode chars in vector
9018- jccb (Assembler::notZero, return_zero );
9024+ jccb (Assembler::notZero, reset_for_copy_tail );
90199025 packuswb (tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
90209026 movq (Address (dst, 0 ), tmp2Reg);
90219027 addptr (src, 16 );
90229028 addptr (dst, 8 );
9029+ jmpb (copy_tail_sse);
90239030
9024- bind (copy_tail);
9031+ bind (reset_for_copy_tail);
9032+ movl (tmp5, result);
9033+ andl (tmp5, 0x0000000f );
9034+ lea (src, Address (src, tmp5, Address::times_2));
9035+ lea (dst, Address (dst, tmp5, Address::times_1));
9036+ subptr (len, tmp5);
9037+ jmpb (copy_chars_loop);
9038+
9039+ bind (copy_tail_sse);
90259040 movl (len, result);
9041+ andl (len, 0x00000007 ); // tail count (in chars)
90269042 }
90279043 // compress 1 char per iter
9044+ bind (copy_tail);
90289045 testl (len, len);
9029- jccb (Assembler::zero, return_length );
9046+ jccb (Assembler::zero, done );
90309047 lea (src, Address (src, len, Address::times_2));
90319048 lea (dst, Address (dst, len, Address::times_1));
90329049 negptr (len);
90339050
90349051 bind (copy_chars_loop);
9035- load_unsigned_short (result , Address (src, len, Address::times_2));
9036- testl (result , 0xff00 ); // check if Unicode char
9037- jccb (Assembler::notZero, return_zero );
9038- movb (Address (dst, len, Address::times_1), result ); // ASCII char; compress to 1 byte
9052+ load_unsigned_short (tmp5 , Address (src, len, Address::times_2));
9053+ testl (tmp5 , 0xff00 ); // check if Unicode char
9054+ jccb (Assembler::notZero, reset_sp );
9055+ movb (Address (dst, len, Address::times_1), tmp5 ); // ASCII char; compress to 1 byte
90399056 increment (len);
9040- jcc (Assembler::notZero, copy_chars_loop);
9057+ jccb (Assembler::notZero, copy_chars_loop);
90419058
9042- // if compression succeeded, return length
9043- bind (return_length);
9044- pop (result);
9045- jmpb (done);
9046-
9047- // if compression failed, return 0
9048- bind (return_zero);
9049- xorl (result, result);
9050- addptr (rsp, wordSize);
9059+ // add len then return (len will be zero if compress succeeded, otherwise negative)
9060+ bind (reset_sp);
9061+ addl (result, len);
90519062
90529063 bind (done);
90539064}
0 commit comments