From ce816e603a4b147bb48d65149d90cf216adc7407 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Thu, 20 Nov 2025 18:47:14 +0100 Subject: [PATCH 1/9] WIP --- .../x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 586135fcebc28..4ecbc5d341218 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -92,7 +92,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas Label done; __ testptr(count, count); - __ jcc(Assembler::zero, done); + __ jccb(Assembler::zero, done); // Calculate end address in "count". Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8; @@ -128,7 +128,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ addptr(addr, sizeof(CardTable::CardValue)); __ cmpptr(addr, count); __ jcc(Assembler::belowEqual, loop); - __ jmp(done); + __ jmpb(done); __ bind(is_clean_card); // Card was clean. Dirty card and go to next.. @@ -197,8 +197,8 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, __ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW); } // Is the previous value null? - __ cmpptr(pre_val, NULL_WORD); - __ jcc(Assembler::equal, done); + __ testptr(pre_val, pre_val); + __ jccb(Assembler::equal, done); generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), @@ -272,23 +272,23 @@ static void generate_post_barrier(MacroAssembler* masm, const Register store_addr, const Register new_val, const Register tmp1, - Label& done, bool new_val_may_be_null) { assert_different_registers(store_addr, new_val, tmp1, noreg); Register thread = r15_thread; + Label done; // Does store cross heap regions? __ movptr(tmp1, store_addr); // tmp1 := store address __ xorptr(tmp1, new_val); // tmp1 := store address ^ new value __ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0? - __ jcc(Assembler::equal, done); + __ jccb(Assembler::equal, done); // Crosses regions, storing null? if (new_val_may_be_null) { - __ cmpptr(new_val, NULL_WORD); // new value == null? - __ jcc(Assembler::equal, done); + __ testptr(new_val, new_val); // new value == null? + __ jccb(Assembler::equal, done); } __ movptr(tmp1, store_addr); // tmp1 := store address @@ -298,20 +298,19 @@ static void generate_post_barrier(MacroAssembler* masm, __ addptr(tmp1, card_table_addr); // tmp1 := card address if (UseCondCardMark) { __ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val? - __ jcc(Assembler::notEqual, done); + __ jccb(Assembler::notEqual, done); } // Storing a region crossing, non-null oop, card is clean. // Dirty card. __ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val + __ bind(done); } void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) { - Label done; - generate_post_barrier(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */); - __ bind(done); + generate_post_barrier(masm, store_addr, new_val, tmp, true /* new_val_may_be_null */); } #if defined(COMPILER2) @@ -374,9 +373,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, Register new_val, Register tmp, bool new_val_may_be_null) { - Label done; - generate_post_barrier(masm, store_addr, new_val, tmp, done, new_val_may_be_null); - __ bind(done); + generate_post_barrier(masm, store_addr, new_val, tmp, new_val_may_be_null); } #endif // COMPILER2 @@ -465,9 +462,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm, Register thread, Register tmp1, Register tmp2 /* unused on x86 */) { - Label done; - generate_post_barrier(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */); - masm->bind(done); + generate_post_barrier(masm, store_addr, new_val, tmp1, true /* new_val_may_be_null */); } #define __ sasm-> From 823cd7fee4739026d8aca6a802956a8f412cdd10 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Thu, 20 Nov 2025 19:11:29 +0100 Subject: [PATCH 2/9] Touchups --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 4ecbc5d341218..69d62549ba0c3 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -198,7 +198,7 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, } // Is the previous value null? __ testptr(pre_val, pre_val); - __ jccb(Assembler::equal, done); + __ jcc(Assembler::equal, done); generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), @@ -446,7 +446,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } - __ cmpptr(pre_val_reg, NULL_WORD); + __ testptr(pre_val_reg, pre_val_reg); __ jcc(Assembler::equal, *stub->continuation()); ce->store_parameter(stub->pre_val()->as_register(), 0); __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); From fb49fb12761d61837f177119cd9efced6bd091f0 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 09:09:32 +0100 Subject: [PATCH 3/9] Also optimize queue insertion --- .../x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 69d62549ba0c3..070382524f6c6 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -165,7 +165,7 @@ static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset // (The index field is typed as size_t.) __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address) __ testptr(temp, temp); // index == 0? - __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer) + __ jccb(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer) // The buffer is not full, store value into it. __ subptr(temp, wordSize); // temp := next index __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index @@ -190,21 +190,28 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, const Register pre_val, const Register thread, const Register tmp, - Label& done, - Label& runtime) { + Label& done) { + Label L_null, L_runtime; + // Do we need to load the previous value? if (obj != noreg) { __ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW); } // Is the previous value null? __ testptr(pre_val, pre_val); - __ jcc(Assembler::equal, done); + __ jccb(Assembler::equal, L_null); generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), - runtime, + L_runtime, thread, pre_val, tmp); + __ bind(L_null); + + // All done, jump out __ jmp(done); + + // Fall through to runtime + __ bind(L_runtime); } void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, @@ -219,7 +226,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, const Register thread = r15_thread; Label done; - Label runtime; assert(pre_val != noreg, "check this code"); @@ -231,9 +237,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, generate_pre_barrier_fast_path(masm, thread); // If marking is not active (*(mark queue active address) == 0), jump to done __ jcc(Assembler::equal, done); - generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime); - - __ bind(runtime); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done); // Determine and save the live input values __ push_call_clobbered_registers(); @@ -353,7 +357,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, G1PreBarrierStubC2* stub) const { Assembler::InlineSkippedInstructionsCounter skip_counter(masm); - Label runtime; Register obj = stub->obj(); Register pre_val = stub->pre_val(); Register thread = stub->thread(); @@ -361,9 +364,8 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, assert(stub->tmp2() == noreg, "not needed in this platform"); __ bind(*stub->entry()); - generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation()); - __ bind(runtime); generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); __ jmp(*stub->continuation()); } From ece4b513787c41fbe9e7944c8e5b7d417a6b8399 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 09:21:23 +0100 Subject: [PATCH 4/9] More touchups --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 070382524f6c6..62de5b0d25882 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -205,12 +205,9 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, G1ThreadLocalData::satb_mark_queue_buffer_offset(), L_runtime, thread, pre_val, tmp); + // Jump out, or fall through to runtime __ bind(L_null); - - // All done, jump out __ jmp(done); - - // Fall through to runtime __ bind(L_runtime); } From 6250ea8af042bd6f647186656fc61183e29e7e2f Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 09:33:58 +0100 Subject: [PATCH 5/9] Also reflow generate_pre_barrier_slow_path, so it is obvious the branches are short --- .../x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 62de5b0d25882..aa5193aa76228 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -157,22 +157,6 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator } } -static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, - const Register thread, const Register value, const Register temp) { - // This code assumes that buffer index is pointer sized. - STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); - // Can we store a value in the given thread's buffer? - // (The index field is typed as size_t.) - __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address) - __ testptr(temp, temp); // index == 0? - __ jccb(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer) - // The buffer is not full, store value into it. - __ subptr(temp, wordSize); // temp := next index - __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index - __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index - __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value -} - static void generate_pre_barrier_fast_path(MacroAssembler* masm, const Register thread) { Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); @@ -190,24 +174,39 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, const Register pre_val, const Register thread, const Register tmp, - Label& done) { + Label& L_done) { + Address index_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // This code assumes that buffer index is pointer sized. + STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); + Label L_null, L_runtime; // Do we need to load the previous value? if (obj != noreg) { __ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW); } + // Is the previous value null? __ testptr(pre_val, pre_val); __ jccb(Assembler::equal, L_null); - generate_queue_insertion(masm, - G1ThreadLocalData::satb_mark_queue_index_offset(), - G1ThreadLocalData::satb_mark_queue_buffer_offset(), - L_runtime, - thread, pre_val, tmp); - // Jump out, or fall through to runtime + + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ movptr(tmp, index_addr); // temp := *(index address) + __ testptr(tmp, tmp); // index == 0? + __ jccb(Assembler::zero, L_runtime); // jump to runtime if index == 0 (full buffer) + + // The buffer is not full, store value into it. + __ subptr(tmp, wordSize); // temp := next index + __ movptr(index_addr, tmp); // *(index address) := next index + __ addptr(tmp, buffer_addr); // temp := buffer address + next index + __ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value + + // Jump out if done, or fall-through to runtime __ bind(L_null); - __ jmp(done); + __ jmp(L_done); __ bind(L_runtime); } From 14b29ffd6f6d478bb60c28f4facedb31801b7073 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 10:58:07 +0100 Subject: [PATCH 6/9] Shorten a few more branches --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index aa5193aa76228..2935dbde7105c 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -118,7 +118,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas Label is_clean_card; if (UseCondCardMark) { __ cmpb(Address(addr, 0), G1CardTable::clean_card_val()); - __ jcc(Assembler::equal, is_clean_card); + __ jccb(Assembler::equal, is_clean_card); } else { __ movb(Address(addr, 0), G1CardTable::dirty_card_val()); } @@ -499,7 +499,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* __ movptr(tmp, queue_index); __ testptr(tmp, tmp); - __ jcc(Assembler::zero, runtime); + __ jccb(Assembler::zero, runtime); __ subptr(tmp, wordSize); __ movptr(queue_index, tmp); __ addptr(tmp, buffer); From edbc74a2863887c6705455d6abd3786ce24f07da Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 10:59:04 +0100 Subject: [PATCH 7/9] Comment --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 2935dbde7105c..e37327521cc57 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -204,7 +204,8 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, __ addptr(tmp, buffer_addr); // temp := buffer address + next index __ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value - // Jump out if done, or fall-through to runtime + // Jump out if done, or fall-through to runtime. + // "Done" is far away, so jump cannot be short. __ bind(L_null); __ jmp(L_done); __ bind(L_runtime); From 1f57d0d92a2901c1439c3b55f413b3170949d749 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 11:02:13 +0100 Subject: [PATCH 8/9] Make some backward branches explicitly short --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index e37327521cc57..6ac0e2b4aba1e 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -127,13 +127,13 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ bind(next_card); __ addptr(addr, sizeof(CardTable::CardValue)); __ cmpptr(addr, count); - __ jcc(Assembler::belowEqual, loop); + __ jccb(Assembler::belowEqual, loop); __ jmpb(done); __ bind(is_clean_card); // Card was clean. Dirty card and go to next.. __ movb(Address(addr, 0), G1CardTable::dirty_card_val()); - __ jmp(next_card); + __ jmpb(next_card); __ bind(done); } From c23bac469cafa111d88f5d9b3aab335b586dfa12 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Fri, 21 Nov 2025 17:03:02 +0100 Subject: [PATCH 9/9] Adjust label name --- src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 6ac0e2b4aba1e..7b61ad5137ef2 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -205,7 +205,7 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, __ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value // Jump out if done, or fall-through to runtime. - // "Done" is far away, so jump cannot be short. + // "L_done" is far away, so jump cannot be short. __ bind(L_null); __ jmp(L_done); __ bind(L_runtime);