Skip to content
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 43 additions & 50 deletions src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
Label done;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you are touching this code can you add L_ to labels in this code?
This is our usual practice for labels to clear see them.


__ testptr(count, count);
__ jcc(Assembler::zero, done);
__ jccb(Assembler::zero, done);

// Calculate end address in "count".
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
Expand Down Expand Up @@ -128,7 +128,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ addptr(addr, sizeof(CardTable::CardValue));
__ cmpptr(addr, count);
__ jcc(Assembler::belowEqual, loop);
__ jmp(done);
__ jmpb(done);

__ bind(is_clean_card);
// Card was clean. Dirty card and go to next..
Expand Down Expand Up @@ -157,22 +157,6 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
}
}

static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
const Register thread, const Register value, const Register temp) {
// This code assumes that buffer index is pointer sized.
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
// Can we store a value in the given thread's buffer?
// (The index field is typed as size_t.)
__ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
__ testptr(temp, temp); // index == 0?
__ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
// The buffer is not full, store value into it.
__ subptr(temp, wordSize); // temp := next index
__ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
__ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
__ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
}

static void generate_pre_barrier_fast_path(MacroAssembler* masm,
const Register thread) {
Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
Expand All @@ -190,21 +174,40 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm,
const Register pre_val,
const Register thread,
const Register tmp,
Label& done,
Label& runtime) {
Label& L_done) {
Address index_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
Address buffer_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));

// This code assumes that buffer index is pointer sized.
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));

Label L_null, L_runtime;

// Do we need to load the previous value?
if (obj != noreg) {
__ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW);
}

// Is the previous value null?
__ cmpptr(pre_val, NULL_WORD);
__ jcc(Assembler::equal, done);
generate_queue_insertion(masm,
G1ThreadLocalData::satb_mark_queue_index_offset(),
G1ThreadLocalData::satb_mark_queue_buffer_offset(),
runtime,
thread, pre_val, tmp);
__ jmp(done);
__ testptr(pre_val, pre_val);
__ jccb(Assembler::equal, L_null);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that this short jump will be fused to one instruction with testptr on modern x86. But you will have jump-to-jump sequence. So you may win size wise but "throughput" could be worser. Especially if it is "fast" path.

Can you check performance of these changes vs using jcc(Assembler::equal, L_done); here.


// Can we store a value in the given thread's buffer?
// (The index field is typed as size_t.)
__ movptr(tmp, index_addr); // temp := *(index address)
__ testptr(tmp, tmp); // index == 0?
__ jccb(Assembler::zero, L_runtime); // jump to runtime if index == 0 (full buffer)

// The buffer is not full, store value into it.
__ subptr(tmp, wordSize); // temp := next index
__ movptr(index_addr, tmp); // *(index address) := next index
__ addptr(tmp, buffer_addr); // temp := buffer address + next index
__ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value

// Jump out if done, or fall-through to runtime
__ bind(L_null);
__ jmp(L_done);
__ bind(L_runtime);
}

void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Expand All @@ -219,7 +222,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
const Register thread = r15_thread;

Label done;
Label runtime;

assert(pre_val != noreg, "check this code");

Expand All @@ -231,9 +233,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
generate_pre_barrier_fast_path(masm, thread);
// If marking is not active (*(mark queue active address) == 0), jump to done
__ jcc(Assembler::equal, done);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);

__ bind(runtime);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done);

// Determine and save the live input values
__ push_call_clobbered_registers();
Expand Down Expand Up @@ -272,23 +272,23 @@ static void generate_post_barrier(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register tmp1,
Label& done,
bool new_val_may_be_null) {

assert_different_registers(store_addr, new_val, tmp1, noreg);

Register thread = r15_thread;

Label done;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use L_done.

// Does store cross heap regions?
__ movptr(tmp1, store_addr); // tmp1 := store address
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
__ jcc(Assembler::equal, done);
__ jccb(Assembler::equal, done);

// Crosses regions, storing null?
if (new_val_may_be_null) {
__ cmpptr(new_val, NULL_WORD); // new value == null?
__ jcc(Assembler::equal, done);
__ testptr(new_val, new_val); // new value == null?
__ jccb(Assembler::equal, done);
}

__ movptr(tmp1, store_addr); // tmp1 := store address
Expand All @@ -298,20 +298,19 @@ static void generate_post_barrier(MacroAssembler* masm,
__ addptr(tmp1, card_table_addr); // tmp1 := card address
if (UseCondCardMark) {
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
__ jcc(Assembler::notEqual, done);
__ jccb(Assembler::notEqual, done);
}
// Storing a region crossing, non-null oop, card is clean.
// Dirty card.
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
__ bind(done);
}

void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
__ bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp, true /* new_val_may_be_null */);
}

#if defined(COMPILER2)
Expand Down Expand Up @@ -354,17 +353,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
G1PreBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register obj = stub->obj();
Register pre_val = stub->pre_val();
Register thread = stub->thread();
Register tmp = stub->tmp1();
assert(stub->tmp2() == noreg, "not needed in this platform");

__ bind(*stub->entry());
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation());

__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
__ jmp(*stub->continuation());
}
Expand All @@ -374,9 +371,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register new_val,
Register tmp,
bool new_val_may_be_null) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
__ bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp, new_val_may_be_null);
}

#endif // COMPILER2
Expand Down Expand Up @@ -449,7 +444,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
}

__ cmpptr(pre_val_reg, NULL_WORD);
__ testptr(pre_val_reg, pre_val_reg);
__ jcc(Assembler::equal, *stub->continuation());
ce->store_parameter(stub->pre_val()->as_register(), 0);
__ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
Expand All @@ -465,9 +460,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2 /* unused on x86 */) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
masm->bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp1, true /* new_val_may_be_null */);
}

#define __ sasm->
Expand Down