Skip to content

Commit c4cf1e9

Browse files
zifeihanHamlin Li
authored andcommitted
8338539: New Object to ObjectMonitor mapping: riscv64 implementation
Reviewed-by: fyang, rehn, mli
1 parent 715fa8f commit c4cf1e9

File tree

9 files changed

+136
-81
lines changed

9 files changed

+136
-81
lines changed

src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
7070
}
7171

7272
if (LockingMode == LM_LIGHTWEIGHT) {
73-
lightweight_lock(obj, hdr, temp, t1, slow_case);
73+
lightweight_lock(disp_hdr, obj, hdr, temp, t1, slow_case);
7474
} else if (LockingMode == LM_LEGACY) {
7575
Label done;
7676
// Load object header

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 114 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -253,12 +253,13 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
253253
// C2 uses the value of flag (0 vs !0) to determine the continuation.
254254
}
255255

256-
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Register tmp2, Register tmp3) {
256+
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box,
257+
Register tmp1, Register tmp2, Register tmp3) {
257258
// Flag register, zero for success; non-zero for failure.
258259
Register flag = t1;
259260

260261
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
261-
assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0);
262+
assert_different_registers(obj, box, tmp1, tmp2, tmp3, flag, t0);
262263

263264
mv(flag, 1);
264265

@@ -269,6 +270,11 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis
269270
// Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
270271
Label slow_path;
271272

273+
if (UseObjectMonitorTable) {
274+
// Clear cache in case fast locking succeeds.
275+
sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
276+
}
277+
272278
if (DiagnoseSyncOnValueBasedClasses != 0) {
273279
load_klass(tmp1, obj);
274280
lwu(tmp1, Address(tmp1, Klass::access_flags_offset()));
@@ -277,14 +283,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis
277283
}
278284

279285
const Register tmp1_mark = tmp1;
286+
const Register tmp3_t = tmp3;
280287

281288
{ // Lightweight locking
282289

283290
// Push lock to the lock stack and finish successfully. MUST branch to with flag == 0
284291
Label push;
285292

286293
const Register tmp2_top = tmp2;
287-
const Register tmp3_t = tmp3;
288294

289295
// Check if lock-stack is full.
290296
lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
@@ -323,29 +329,67 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis
323329
{ // Handle inflated monitor.
324330
bind(inflated);
325331

332+
const Register tmp1_monitor = tmp1;
326333
if (!UseObjectMonitorTable) {
327-
// mark contains the tagged ObjectMonitor*.
328-
const Register tmp1_tagged_monitor = tmp1_mark;
329-
const uintptr_t monitor_tag = markWord::monitor_value;
330-
const Register tmp2_owner_addr = tmp2;
331-
const Register tmp3_owner = tmp3;
334+
assert(tmp1_monitor == tmp1_mark, "should be the same here");
335+
} else {
336+
Label monitor_found;
332337

333-
// Compute owner address.
334-
la(tmp2_owner_addr, Address(tmp1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag)));
338+
// Load cache address
339+
la(tmp3_t, Address(xthread, JavaThread::om_cache_oops_offset()));
335340

336-
// CAS owner (null => current thread).
337-
cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64,
338-
/*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner);
339-
beqz(tmp3_owner, locked);
341+
const int num_unrolled = 2;
342+
for (int i = 0; i < num_unrolled; i++) {
343+
ld(tmp1, Address(tmp3_t));
344+
beq(obj, tmp1, monitor_found);
345+
add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference()));
346+
}
340347

341-
// Check if recursive.
342-
bne(tmp3_owner, xthread, slow_path);
348+
Label loop;
343349

344-
// Recursive.
345-
increment(Address(tmp1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, tmp2, tmp3);
346-
} else {
347-
// OMCache lookup not supported yet. Take the slowpath.
350+
// Search for obj in cache.
351+
bind(loop);
352+
353+
// Check for match.
354+
ld(tmp1, Address(tmp3_t));
355+
beq(obj, tmp1, monitor_found);
356+
357+
// Search until null encountered, guaranteed _null_sentinel at end.
358+
add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference()));
359+
bnez(tmp1, loop);
360+
// Cache Miss. Take the slowpath.
348361
j(slow_path);
362+
363+
bind(monitor_found);
364+
ld(tmp1_monitor, Address(tmp3_t, OMCache::oop_to_monitor_difference()));
365+
}
366+
367+
const Register tmp2_owner_addr = tmp2;
368+
const Register tmp3_owner = tmp3;
369+
370+
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
371+
const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag);
372+
const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag);
373+
374+
Label monitor_locked;
375+
376+
// Compute owner address.
377+
la(tmp2_owner_addr, owner_address);
378+
379+
// CAS owner (null => current thread).
380+
cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64,
381+
/*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner);
382+
beqz(tmp3_owner, monitor_locked);
383+
384+
// Check if recursive.
385+
bne(tmp3_owner, xthread, slow_path);
386+
387+
// Recursive.
388+
increment(recursions_address, 1, tmp2, tmp3);
389+
390+
bind(monitor_locked);
391+
if (UseObjectMonitorTable) {
392+
sd(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
349393
}
350394
}
351395

@@ -370,18 +414,18 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis
370414
// C2 uses the value of flag (0 vs !0) to determine the continuation.
371415
}
372416

373-
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Register tmp2,
374-
Register tmp3) {
417+
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box,
418+
Register tmp1, Register tmp2, Register tmp3) {
375419
// Flag register, zero for success; non-zero for failure.
376420
Register flag = t1;
377421

378422
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
379-
assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0);
423+
assert_different_registers(obj, box, tmp1, tmp2, tmp3, flag, t0);
380424

381425
mv(flag, 1);
382426

383427
// Handle inflated monitor.
384-
Label inflated, inflated_load_monitor;
428+
Label inflated, inflated_load_mark;
385429
// Finish fast unlock successfully. unlocked MUST branch to with flag == 0
386430
Label unlocked;
387431
// Finish fast unlock unsuccessfully. MUST branch to with flag != 0
@@ -392,14 +436,15 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg
392436
const Register tmp3_t = tmp3;
393437

394438
{ // Lightweight unlock
439+
Label push_and_slow_path;
395440

396441
// Check if obj is top of lock-stack.
397442
lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset()));
398443
subw(tmp2_top, tmp2_top, oopSize);
399444
add(tmp3_t, xthread, tmp2_top);
400445
ld(tmp3_t, Address(tmp3_t));
401446
// Top of lock stack was not obj. Must be monitor.
402-
bne(obj, tmp3_t, inflated_load_monitor);
447+
bne(obj, tmp3_t, inflated_load_mark);
403448

404449
// Pop lock-stack.
405450
DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);)
@@ -416,8 +461,11 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg
416461
ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
417462

418463
// Check header for monitor (0b10).
464+
// Because we got here by popping (meaning we pushed in locked)
465+
// there will be no monitor in the box. So we need to push back the obj
466+
// so that the runtime can fix any potential anonymous owner.
419467
test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value));
420-
bnez(tmp3_t, inflated);
468+
bnez(tmp3_t, UseObjectMonitorTable ? push_and_slow_path : inflated);
421469

422470
// Try to unlock. Transition lock bits 0b00 => 0b01
423471
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
@@ -426,6 +474,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg
426474
/*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ tmp3_t);
427475
beq(tmp1_mark, tmp3_t, unlocked);
428476

477+
bind(push_and_slow_path);
429478
// Compare and exchange failed.
430479
// Restore lock-stack and handle the unlock in runtime.
431480
DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);)
@@ -436,7 +485,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg
436485
}
437486

438487
{ // Handle inflated monitor.
439-
bind(inflated_load_monitor);
488+
bind(inflated_load_mark);
440489
ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
441490
#ifdef ASSERT
442491
test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value));
@@ -458,54 +507,55 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg
458507
bind(check_done);
459508
#endif
460509

461-
if (!UseObjectMonitorTable) {
462-
// mark contains the tagged ObjectMonitor*.
463-
const Register tmp1_monitor = tmp1_mark;
464-
const uintptr_t monitor_tag = markWord::monitor_value;
510+
const Register tmp1_monitor = tmp1;
465511

512+
if (!UseObjectMonitorTable) {
513+
assert(tmp1_monitor == tmp1_mark, "should be the same here");
466514
// Untag the monitor.
467-
sub(tmp1_monitor, tmp1_mark, monitor_tag);
515+
add(tmp1_monitor, tmp1_mark, -(int)markWord::monitor_value);
516+
} else {
517+
ld(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
518+
// No valid pointer below alignof(ObjectMonitor*). Take the slow path.
519+
mv(tmp3_t, alignof(ObjectMonitor*));
520+
bltu(tmp1_monitor, tmp3_t, slow_path);
521+
}
468522

469-
const Register tmp2_recursions = tmp2;
470-
Label not_recursive;
523+
const Register tmp2_recursions = tmp2;
524+
Label not_recursive;
471525

472-
// Check if recursive.
473-
ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
474-
beqz(tmp2_recursions, not_recursive);
526+
// Check if recursive.
527+
ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
528+
beqz(tmp2_recursions, not_recursive);
475529

476-
// Recursive unlock.
477-
addi(tmp2_recursions, tmp2_recursions, -1);
478-
sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
479-
j(unlocked);
530+
// Recursive unlock.
531+
addi(tmp2_recursions, tmp2_recursions, -1);
532+
sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset()));
533+
j(unlocked);
480534

481-
bind(not_recursive);
535+
bind(not_recursive);
482536

483-
Label release;
484-
const Register tmp2_owner_addr = tmp2;
537+
Label release;
538+
const Register tmp2_owner_addr = tmp2;
485539

486-
// Compute owner address.
487-
la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
540+
// Compute owner address.
541+
la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
488542

489-
// Check if the entry lists are empty.
490-
ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
491-
ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
492-
orr(t0, t0, tmp3_t);
493-
beqz(t0, release);
543+
// Check if the entry lists are empty.
544+
ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
545+
ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
546+
orr(t0, t0, tmp3_t);
547+
beqz(t0, release);
494548

495-
// The owner may be anonymous and we removed the last obj entry in
496-
// the lock-stack. This loses the information about the owner.
497-
// Write the thread to the owner field so the runtime knows the owner.
498-
sd(xthread, Address(tmp2_owner_addr));
499-
j(slow_path);
549+
// The owner may be anonymous and we removed the last obj entry in
550+
// the lock-stack. This loses the information about the owner.
551+
// Write the thread to the owner field so the runtime knows the owner.
552+
sd(xthread, Address(tmp2_owner_addr));
553+
j(slow_path);
500554

501-
bind(release);
502-
// Set owner to null.
503-
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
504-
sd(zr, Address(tmp2_owner_addr));
505-
} else {
506-
// OMCache lookup not supported yet. Take the slowpath.
507-
j(slow_path);
508-
}
555+
bind(release);
556+
// Set owner to null.
557+
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
558+
sd(zr, Address(tmp2_owner_addr));
509559
}
510560

511561
bind(unlocked);

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3);
4848
void fast_unlock(Register object, Register box, Register tmp1, Register tmp2);
4949
// Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file.
50-
void fast_lock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3);
51-
void fast_unlock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3);
50+
void fast_lock_lightweight(Register object, Register box, Register tmp1, Register tmp2, Register tmp3);
51+
void fast_unlock_lightweight(Register object, Register box, Register tmp1, Register tmp2, Register tmp3);
5252

5353
void string_compare(Register str1, Register str2,
5454
Register cnt1, Register cnt2, Register result,

src/hotspot/cpu/riscv/interp_masm_riscv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
756756
}
757757

758758
if (LockingMode == LM_LIGHTWEIGHT) {
759-
lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case);
759+
lightweight_lock(lock_reg, obj_reg, tmp, tmp2, tmp3, slow_case);
760760
j(count);
761761
} else if (LockingMode == LM_LEGACY) {
762762
// Load (object->mark() | 1) into swap_reg

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5792,9 +5792,9 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) {
57925792
// - obj: the object to be locked
57935793
// - tmp1, tmp2, tmp3: temporary registers, will be destroyed
57945794
// - slow: branched to if locking fails
5795-
void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
5795+
void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
57965796
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
5797-
assert_different_registers(obj, tmp1, tmp2, tmp3, t0);
5797+
assert_different_registers(basic_lock, obj, tmp1, tmp2, tmp3, t0);
57985798

57995799
Label push;
58005800
const Register top = tmp1;
@@ -5805,6 +5805,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2
58055805
// instruction emitted as it is part of C1's null check semantics.
58065806
ld(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
58075807

5808+
if (UseObjectMonitorTable) {
5809+
// Clear cache in case fast locking succeeds.
5810+
sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))));
5811+
}
5812+
58085813
// Check if the lock-stack is full.
58095814
lwu(top, Address(xthread, JavaThread::lock_stack_top_offset()));
58105815
mv(t, (unsigned)LockStack::end_offset());

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ class MacroAssembler: public Assembler {
16021602
void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release);
16031603

16041604
public:
1605-
void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1605+
void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
16061606
void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
16071607

16081608
public:

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10553,33 +10553,33 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iR
1055310553
ins_pipe(pipe_serial);
1055410554
%}
1055510555

10556-
instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2)
10556+
instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
1055710557
%{
1055810558
predicate(LockingMode == LM_LIGHTWEIGHT);
1055910559
match(Set cr (FastLock object box));
10560-
effect(TEMP tmp1, TEMP tmp2, USE_KILL box);
10560+
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
1056110561

1056210562
ins_cost(10 * DEFAULT_COST);
10563-
format %{ "fastlock $object,$box\t! kills $box,$tmp1,$tmp2 #@cmpFastLockLightweight" %}
10563+
format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3 #@cmpFastLockLightweight" %}
1056410564

1056510565
ins_encode %{
10566-
__ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register);
10566+
__ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
1056710567
%}
1056810568

1056910569
ins_pipe(pipe_serial);
1057010570
%}
1057110571

10572-
instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2)
10572+
instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
1057310573
%{
1057410574
predicate(LockingMode == LM_LIGHTWEIGHT);
1057510575
match(Set cr (FastUnlock object box));
10576-
effect(TEMP tmp1, TEMP tmp2, USE_KILL box);
10576+
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
1057710577

1057810578
ins_cost(10 * DEFAULT_COST);
10579-
format %{ "fastunlock $object,$box\t! kills $box,$tmp1,$tmp2, #@cmpFastUnlockLightweight" %}
10579+
format %{ "fastunlock $object,$box\t! kills $tmp1,$tmp2,$tmp3 #@cmpFastUnlockLightweight" %}
1058010580

1058110581
ins_encode %{
10582-
__ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register);
10582+
__ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
1058310583
%}
1058410584

1058510585
ins_pipe(pipe_serial);

0 commit comments

Comments
 (0)