Skip to content

Commit d79a841

Browse files
committed
switch to vector stores
1 parent f75209f commit d79a841

File tree

1 file changed

+70
-102
lines changed

1 file changed

+70
-102
lines changed

src/hotspot/cpu/s390/stubGenerator_s390.cpp

Lines changed: 70 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,77 +1468,6 @@ class StubGenerator: public StubCodeGenerator {
14681468
return __ addr_at(start_off);
14691469
}
14701470

1471-
1472-
// Helper for generate_unsafe_setmemory
1473-
//
1474-
// Non-atomically fill an array of memory using 1 byte chunk and return.
1475-
// We don't care about atomicity because the address and size are not aligned, So we are
1476-
// free to fill the memory with best possible ways.
1477-
static void do_setmemory_atomic_loop_mvc(Register dest, Register size, Register byteVal,
1478-
MacroAssembler *_masm) {
1479-
NearLabel L_loop, L_tail, L_mvc;
1480-
1481-
__ z_aghi(size, -1); // -1 because first byte is preset by stc
1482-
__ z_bcr(Assembler::bcondLow, Z_R14); // result < 0 means size == 0 => return
1483-
__ z_stc(byteVal, Address(dest)); // initialize first byte
1484-
__ z_bcr(Assembler::bcondEqual, Z_R14); // result == 0 means size == 1 => return
1485-
1486-
// handle complete 256 byte blocks
1487-
__ bind(L_loop);
1488-
__ z_aghi(size, -256); // decrement remaining #bytes
1489-
__ z_brl(L_tail); // skip loop if no full 256 byte block left
1490-
1491-
__ z_mvc(1, 255, dest, 0, dest); // propagate byte from dest[0+i*256] to dest[1+i*256]
1492-
__ z_bcr(Assembler::bcondEqual, Z_R14); // remaining size == 0 => return (mvc does not touch CC)
1493-
1494-
__ z_aghi(dest, 256); // increment target address
1495-
__ z_bru(L_loop);
1496-
1497-
// handle remaining bytes. We know 0 < size < 256
1498-
__ bind(L_tail);
1499-
__ z_aghi(size, +256-1); // prepare size value for mvc via exrl
1500-
__ z_exrl(size, L_mvc);
1501-
__ z_br(Z_R14);
1502-
1503-
__ bind(L_mvc);
1504-
__ z_mvc(1, 0, dest, 0, dest); // mvc template, needs to be generated, not executed
1505-
}
1506-
1507-
static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal,
1508-
MacroAssembler *_masm) {
1509-
1510-
NearLabel L_Loop, L_Tail; // 2x unrolled loop
1511-
Register tmp = Z_R1; // R1 is free at this point
1512-
1513-
if (elem_size > 1) {
1514-
__ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, false);
1515-
}
1516-
1517-
if (elem_size > 2) {
1518-
__ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, false);
1519-
}
1520-
1521-
if (elem_size > 4) {
1522-
__ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, false);
1523-
}
1524-
1525-
__ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc
1526-
__ z_bre(L_Tail);
1527-
1528-
__ align(32); // loop alignment
1529-
__ bind(L_Loop);
1530-
__ store_sized_value(byteVal, Address(dest, 0), elem_size);
1531-
__ store_sized_value(byteVal, Address(dest, elem_size), elem_size);
1532-
__ z_aghi(dest, 2 * elem_size);
1533-
__ z_brct(tmp, L_Loop);
1534-
1535-
__ bind(L_Tail);
1536-
__ z_nilf(size, elem_size);
1537-
__ z_bcr(Assembler::bcondEqual, Z_R14);
1538-
__ store_sized_value(byteVal, Address(dest, 0), elem_size);
1539-
__ z_br(Z_R14);
1540-
}
1541-
15421471
//
15431472
// Generate 'unsafe' set memory stub
15441473
// Though just as safe as the other stubs, it takes an unscaled
@@ -1557,46 +1486,83 @@ class StubGenerator: public StubCodeGenerator {
15571486
// bump this on entry, not on exit:
15581487
// inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr);
15591488

1560-
{
1561-
const Register dest = Z_ARG1;
1562-
const Register size = Z_ARG2;
1563-
const Register byteVal = Z_ARG3;
1564-
const Register rScratch1 = Z_R1_scratch;
1565-
NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes;
1566-
// fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char)
1489+
const Register dest = Z_ARG1;
1490+
const Register size = Z_ARG2;
1491+
const Register byteVal = Z_ARG3;
1492+
NearLabel tail, finished;
1493+
// fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char)
15671494

1568-
// Check for pointer & size alignment
1569-
__ z_ogrk(rScratch1, dest, size);
1495+
// Mark remaining code as such which performs Unsafe accesses.
1496+
UnsafeMemoryAccessMark umam(this, true, false);
15701497

1571-
__ z_nill(rScratch1, 7);
1572-
__ z_braz(L_fill8Bytes); // branch if 0
1498+
__ z_vlvgb(Z_V0, byteVal, 0);
1499+
__ z_vrepb(Z_V0, Z_V0, 0);
15731500

1574-
__ z_nill(rScratch1, 3);
1575-
__ z_braz(L_fill4Bytes); // branch if 0
1501+
__ z_aghi(size, -32);
1502+
__ z_brl(tail);
15761503

1577-
__ z_nill(rScratch1, 1);
1578-
__ z_brnaz(L_fillBytes); // branch if not 0
1504+
{
1505+
NearLabel again;
1506+
__ bind(again);
1507+
__ z_vst(Z_V0, Address(dest, 0));
1508+
__ z_vst(Z_V0, Address(dest, 16));
1509+
__ z_aghi(dest, 32);
1510+
__ z_aghi(size, -32);
1511+
__ z_brnl(again);
1512+
}
15791513

1580-
// Mark remaining code as such which performs Unsafe accesses.
1581-
UnsafeMemoryAccessMark umam(this, true, false);
1514+
__ bind(tail);
1515+
1516+
{
1517+
NearLabel dont;
1518+
__ testbit(size, 4);
1519+
__ z_brz(dont);
1520+
__ z_vst(Z_V0, Address(dest, 0));
1521+
__ z_aghi(dest, 16);
1522+
__ bind(dont);
1523+
}
1524+
1525+
{
1526+
NearLabel dont;
1527+
__ testbit(size, 3);
1528+
__ z_brz(dont);
1529+
__ z_vsteg(Z_V0, 0, Z_R0, dest, 0);
1530+
__ z_aghi(dest, 8);
1531+
__ bind(dont);
1532+
}
15821533

1583-
// At this point, we know the lower bit of size is zero and a
1584-
// multiple of 2
1585-
do_setmemory_atomic_loop(2, dest, size, byteVal, _masm);
1534+
__ z_tmll(size, 7);
1535+
__ z_brc(Assembler::bcondAllZero, finished);
15861536

1587-
__ bind(L_fill8Bytes);
1588-
// At this point, we know the lower 3 bits of size are zero and a
1589-
// multiple of 8
1590-
do_setmemory_atomic_loop(8, dest, size, byteVal, _masm);
1537+
{
1538+
NearLabel dont;
1539+
__ testbit(size, 2);
1540+
__ z_brz(dont);
1541+
__ z_vstef(Z_V0, 0, Z_R0, dest, 0);
1542+
__ z_aghi(dest, 4);
1543+
__ bind(dont);
1544+
}
15911545

1592-
__ bind(L_fill4Bytes);
1593-
// At this point, we know the lower 2 bits of size are zero and a
1594-
// multiple of 4
1595-
do_setmemory_atomic_loop(4, dest, size, byteVal, _masm);
1546+
{
1547+
NearLabel dont;
1548+
__ testbit(size, 1);
1549+
__ z_brz(dont);
1550+
__ z_vsteh(Z_V0, 0, Z_R0, dest, 0);
1551+
__ z_aghi(dest, 2);
1552+
__ bind(dont);
1553+
}
15961554

1597-
__ bind(L_fillBytes);
1598-
do_setmemory_atomic_loop_mvc(dest, size, byteVal, _masm);
1555+
{
1556+
NearLabel dont;
1557+
__ testbit(size, 0);
1558+
__ z_brz(dont);
1559+
__ z_vsteb(Z_V0, 0, Z_R0, dest, 0);
1560+
__ bind(dont);
15991561
}
1562+
1563+
__ bind(finished);
1564+
__ z_br(Z_R14);
1565+
16001566
return __ addr_at(start_off);
16011567
}
16021568

@@ -1645,7 +1611,9 @@ class StubGenerator: public StubCodeGenerator {
16451611
StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id);
16461612

16471613
#ifdef COMPILER2
1648-
StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(StubRoutines::_jbyte_fill);
1614+
StubRoutines::_unsafe_setmemory =
1615+
VM_Version::has_VectorFacility() ? generate_unsafe_setmemory(StubRoutines::_jbyte_fill) : nullptr;
1616+
16491617
#endif // COMPILER2
16501618
}
16511619

0 commit comments

Comments
 (0)