Skip to content

Commit 76a55c3

Browse files
committed
8341334: CDS: Parallel relocation
Reviewed-by: iklam, adinn, stuefe
1 parent 499186b commit 76a55c3

File tree

6 files changed

+327
-3
lines changed

6 files changed

+327
-3
lines changed

src/hotspot/share/cds/archiveUtils.cpp

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,3 +399,188 @@ size_t HeapRootSegments::segment_offset(size_t seg_idx) {
399399
return _base_offset + seg_idx * _max_size_in_bytes;
400400
}
401401

402+
ArchiveWorkers ArchiveWorkers::_workers;
403+
404+
ArchiveWorkers::ArchiveWorkers() :
405+
_start_semaphore(0),
406+
_end_semaphore(0),
407+
_num_workers(0),
408+
_started_workers(0),
409+
_waiting_workers(0),
410+
_running_workers(0),
411+
_state(NOT_READY),
412+
_task(nullptr) {
413+
}
414+
415+
void ArchiveWorkers::initialize() {
416+
assert(Atomic::load(&_state) == NOT_READY, "Should be");
417+
418+
Atomic::store(&_num_workers, max_workers());
419+
Atomic::store(&_state, READY);
420+
421+
// Kick off pool startup by creating a single worker.
422+
start_worker_if_needed();
423+
}
424+
425+
int ArchiveWorkers::max_workers() {
426+
// The pool is used for short-lived bursty tasks. We do not want to spend
427+
// too much time creating and waking up threads unnecessarily. Plus, we do
428+
// not want to overwhelm large machines. This is why we want to be very
429+
// conservative about the number of workers actually needed.
430+
return MAX2(0, log2i_graceful(os::active_processor_count()));
431+
}
432+
433+
bool ArchiveWorkers::is_parallel() {
434+
return _num_workers > 0;
435+
}
436+
437+
void ArchiveWorkers::shutdown() {
438+
while (true) {
439+
State state = Atomic::load(&_state);
440+
if (state == SHUTDOWN) {
441+
// Already shut down.
442+
return;
443+
}
444+
if (Atomic::cmpxchg(&_state, state, SHUTDOWN, memory_order_relaxed) == state) {
445+
if (is_parallel()) {
446+
// Execute a shutdown task and block until all workers respond.
447+
run_task(&_shutdown_task);
448+
}
449+
}
450+
}
451+
}
452+
453+
void ArchiveWorkers::start_worker_if_needed() {
454+
while (true) {
455+
int cur = Atomic::load(&_started_workers);
456+
if (cur >= _num_workers) {
457+
return;
458+
}
459+
if (Atomic::cmpxchg(&_started_workers, cur, cur + 1, memory_order_relaxed) == cur) {
460+
new ArchiveWorkerThread(this);
461+
return;
462+
}
463+
}
464+
}
465+
466+
void ArchiveWorkers::signal_worker_if_needed() {
467+
while (true) {
468+
int cur = Atomic::load(&_waiting_workers);
469+
if (cur == 0) {
470+
return;
471+
}
472+
if (Atomic::cmpxchg(&_waiting_workers, cur, cur - 1, memory_order_relaxed) == cur) {
473+
_start_semaphore.signal(1);
474+
return;
475+
}
476+
}
477+
}
478+
479+
void ArchiveWorkers::run_task(ArchiveWorkerTask* task) {
480+
assert((Atomic::load(&_state) == READY) ||
481+
((Atomic::load(&_state) == SHUTDOWN) && (task == &_shutdown_task)),
482+
"Should be in correct state");
483+
assert(Atomic::load(&_task) == nullptr, "Should not have running tasks");
484+
485+
if (is_parallel()) {
486+
run_task_multi(task);
487+
} else {
488+
run_task_single(task);
489+
}
490+
}
491+
492+
void ArchiveWorkers::run_task_single(ArchiveWorkerTask* task) {
493+
// Single thread needs no chunking.
494+
task->configure_max_chunks(1);
495+
496+
// Execute the task ourselves, as there are no workers.
497+
task->work(0, 1);
498+
}
499+
500+
void ArchiveWorkers::run_task_multi(ArchiveWorkerTask* task) {
501+
// Multiple threads can work with multiple chunks.
502+
task->configure_max_chunks(_num_workers * CHUNKS_PER_WORKER);
503+
504+
// Set up the run and publish the task.
505+
Atomic::store(&_waiting_workers, _num_workers);
506+
Atomic::store(&_running_workers, _num_workers);
507+
Atomic::release_store(&_task, task);
508+
509+
// Kick off pool wakeup by signaling a single worker, and proceed
510+
// immediately to executing the task locally.
511+
signal_worker_if_needed();
512+
513+
// Execute the task ourselves, while workers are catching up.
514+
// This allows us to hide parts of task handoff latency.
515+
task->run();
516+
517+
// Done executing task locally, wait for any remaining workers to complete,
518+
// and then do the final housekeeping.
519+
_end_semaphore.wait();
520+
Atomic::store(&_task, (ArchiveWorkerTask *) nullptr);
521+
OrderAccess::fence();
522+
523+
assert(Atomic::load(&_waiting_workers) == 0, "All workers were signaled");
524+
assert(Atomic::load(&_running_workers) == 0, "No workers are running");
525+
}
526+
527+
void ArchiveWorkerTask::run() {
528+
while (true) {
529+
int chunk = Atomic::load(&_chunk);
530+
if (chunk >= _max_chunks) {
531+
return;
532+
}
533+
if (Atomic::cmpxchg(&_chunk, chunk, chunk + 1, memory_order_relaxed) == chunk) {
534+
assert(0 <= chunk && chunk < _max_chunks, "Sanity");
535+
work(chunk, _max_chunks);
536+
}
537+
}
538+
}
539+
540+
void ArchiveWorkerTask::configure_max_chunks(int max_chunks) {
541+
if (_max_chunks == 0) {
542+
_max_chunks = max_chunks;
543+
}
544+
}
545+
546+
bool ArchiveWorkers::run_as_worker() {
547+
assert(is_parallel(), "Should be in parallel mode");
548+
_start_semaphore.wait();
549+
550+
// Avalanche wakeups: each worker signals two others.
551+
signal_worker_if_needed();
552+
signal_worker_if_needed();
553+
554+
ArchiveWorkerTask* task = Atomic::load_acquire(&_task);
555+
task->run();
556+
557+
// All work done in threads should be visible to caller.
558+
OrderAccess::fence();
559+
560+
// Signal the pool the tasks are complete, if this is the last worker.
561+
if (Atomic::sub(&_running_workers, 1, memory_order_relaxed) == 0) {
562+
_end_semaphore.signal();
563+
}
564+
565+
// Continue if task was not a termination task.
566+
return (task != &_shutdown_task);
567+
}
568+
569+
ArchiveWorkerThread::ArchiveWorkerThread(ArchiveWorkers* pool) : NamedThread(), _pool(pool) {
570+
set_name("ArchiveWorkerThread");
571+
os::create_thread(this, os::os_thread);
572+
os::start_thread(this);
573+
}
574+
575+
void ArchiveWorkerThread::run() {
576+
// Avalanche thread startup: each starting worker starts two others.
577+
_pool->start_worker_if_needed();
578+
_pool->start_worker_if_needed();
579+
580+
// Set ourselves up.
581+
os::set_priority(this, NearMaxPriority);
582+
583+
while (_pool->run_as_worker()) {
584+
// Work until terminated.
585+
}
586+
}

src/hotspot/share/cds/archiveUtils.hpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "utilities/bitMap.hpp"
3434
#include "utilities/exceptions.hpp"
3535
#include "utilities/macros.hpp"
36+
#include "runtime/nonJavaThread.hpp"
37+
#include "runtime/semaphore.hpp"
3638

3739
class BootstrapInfo;
3840
class ReservedSpace;
@@ -319,4 +321,95 @@ class HeapRootSegments {
319321
HeapRootSegments& operator=(const HeapRootSegments&) = default;
320322
};
321323

324+
class ArchiveWorkers;
325+
326+
// A task to be worked on by worker threads
327+
class ArchiveWorkerTask : public CHeapObj<mtInternal> {
328+
friend class ArchiveWorkers;
329+
friend class ArchiveWorkerShutdownTask;
330+
private:
331+
const char* _name;
332+
int _max_chunks;
333+
volatile int _chunk;
334+
335+
void run();
336+
337+
void configure_max_chunks(int max_chunks);
338+
339+
public:
340+
ArchiveWorkerTask(const char* name) :
341+
_name(name), _max_chunks(0), _chunk(0) {}
342+
const char* name() const { return _name; }
343+
virtual void work(int chunk, int max_chunks) = 0;
344+
};
345+
346+
class ArchiveWorkerThread : public NamedThread {
347+
friend class ArchiveWorkers;
348+
private:
349+
ArchiveWorkers* const _pool;
350+
351+
public:
352+
ArchiveWorkerThread(ArchiveWorkers* pool);
353+
const char* type_name() const override { return "Archive Worker Thread"; }
354+
void run() override;
355+
};
356+
357+
class ArchiveWorkerShutdownTask : public ArchiveWorkerTask {
358+
public:
359+
ArchiveWorkerShutdownTask() : ArchiveWorkerTask("Archive Worker Shutdown") {
360+
// This task always have only one chunk.
361+
configure_max_chunks(1);
362+
}
363+
void work(int chunk, int max_chunks) override {
364+
// Do nothing.
365+
}
366+
};
367+
368+
// Special worker pool for archive workers. The goal for this pool is to
369+
// startup fast, distribute spiky workloads efficiently, and being able to
370+
// shutdown after use. This makes the implementation quite different from
371+
// the normal GC worker pool.
372+
class ArchiveWorkers {
373+
friend class ArchiveWorkerThread;
374+
private:
375+
// Target number of chunks per worker. This should be large enough to even
376+
// out work imbalance, and small enough to keep bookkeeping overheads low.
377+
static constexpr int CHUNKS_PER_WORKER = 4;
378+
static int max_workers();
379+
380+
// Global shared instance. Can be uninitialized, can be shut down.
381+
static ArchiveWorkers _workers;
382+
383+
ArchiveWorkerShutdownTask _shutdown_task;
384+
Semaphore _start_semaphore;
385+
Semaphore _end_semaphore;
386+
387+
int _num_workers;
388+
int _started_workers;
389+
int _waiting_workers;
390+
int _running_workers;
391+
392+
typedef enum { NOT_READY, READY, SHUTDOWN } State;
393+
volatile State _state;
394+
395+
ArchiveWorkerTask* _task;
396+
397+
bool run_as_worker();
398+
void start_worker_if_needed();
399+
void signal_worker_if_needed();
400+
401+
void run_task_single(ArchiveWorkerTask* task);
402+
void run_task_multi(ArchiveWorkerTask* task);
403+
404+
bool is_parallel();
405+
406+
ArchiveWorkers();
407+
408+
public:
409+
static ArchiveWorkers* workers() { return &_workers; }
410+
void initialize();
411+
void shutdown();
412+
void run_task(ArchiveWorkerTask* task);
413+
};
414+
322415
#endif // SHARE_CDS_ARCHIVEUTILS_HPP

src/hotspot/share/cds/cds_globals.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@
117117
product(bool, AOTClassLinking, false, \
118118
"Load/link all archived classes for the boot/platform/app " \
119119
"loaders before application main") \
120-
120+
\
121+
product(bool, AOTCacheParallelRelocation, true, DIAGNOSTIC, \
122+
"Use parallel relocation code to speed up startup.") \
123+
\
121124
// end of CDS_FLAGS
122125

123126
DECLARE_FLAGS(CDS_FLAGS)

src/hotspot/share/cds/filemap.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,32 @@ char* FileMapInfo::map_bitmap_region() {
19721972
return bitmap_base;
19731973
}
19741974

1975+
class SharedDataRelocationTask : public ArchiveWorkerTask {
1976+
private:
1977+
BitMapView* const _rw_bm;
1978+
BitMapView* const _ro_bm;
1979+
SharedDataRelocator* const _rw_reloc;
1980+
SharedDataRelocator* const _ro_reloc;
1981+
1982+
public:
1983+
SharedDataRelocationTask(BitMapView* rw_bm, BitMapView* ro_bm, SharedDataRelocator* rw_reloc, SharedDataRelocator* ro_reloc) :
1984+
ArchiveWorkerTask("Shared Data Relocation"),
1985+
_rw_bm(rw_bm), _ro_bm(ro_bm), _rw_reloc(rw_reloc), _ro_reloc(ro_reloc) {}
1986+
1987+
void work(int chunk, int max_chunks) override {
1988+
work_on(chunk, max_chunks, _rw_bm, _rw_reloc);
1989+
work_on(chunk, max_chunks, _ro_bm, _ro_reloc);
1990+
}
1991+
1992+
void work_on(int chunk, int max_chunks, BitMapView* bm, SharedDataRelocator* reloc) {
1993+
BitMap::idx_t size = bm->size();
1994+
BitMap::idx_t start = MIN2(size, size * chunk / max_chunks);
1995+
BitMap::idx_t end = MIN2(size, size * (chunk + 1) / max_chunks);
1996+
assert(end > start, "Sanity: no empty slices");
1997+
bm->iterate(reloc, start, end);
1998+
}
1999+
};
2000+
19752001
// This is called when we cannot map the archive at the requested[ base address (usually 0x800000000).
19762002
// We relocate all pointers in the 2 core regions (ro, rw).
19772003
bool FileMapInfo::relocate_pointers_in_core_regions(intx addr_delta) {
@@ -2010,8 +2036,14 @@ bool FileMapInfo::relocate_pointers_in_core_regions(intx addr_delta) {
20102036
valid_new_base, valid_new_end, addr_delta);
20112037
SharedDataRelocator ro_patcher((address*)ro_patch_base + header()->ro_ptrmap_start_pos(), (address*)ro_patch_end, valid_old_base, valid_old_end,
20122038
valid_new_base, valid_new_end, addr_delta);
2013-
rw_ptrmap.iterate(&rw_patcher);
2014-
ro_ptrmap.iterate(&ro_patcher);
2039+
2040+
if (AOTCacheParallelRelocation) {
2041+
SharedDataRelocationTask task(&rw_ptrmap, &ro_ptrmap, &rw_patcher, &ro_patcher);
2042+
ArchiveWorkers::workers()->run_task(&task);
2043+
} else {
2044+
rw_ptrmap.iterate(&rw_patcher);
2045+
ro_ptrmap.iterate(&ro_patcher);
2046+
}
20152047

20162048
// The MetaspaceShared::bm region will be unmapped in MetaspaceShared::initialize_shared_spaces().
20172049

src/hotspot/share/cds/metaspaceShared.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,9 @@ void MetaspaceShared::initialize_runtime_shared_and_meta_spaces() {
10881088
assert(CDSConfig::is_using_archive(), "Must be called when UseSharedSpaces is enabled");
10891089
MapArchiveResult result = MAP_ARCHIVE_OTHER_FAILURE;
10901090

1091+
// We are about to open the archives. Initialize workers now.
1092+
ArchiveWorkers::workers()->initialize();
1093+
10911094
FileMapInfo* static_mapinfo = open_static_archive();
10921095
FileMapInfo* dynamic_mapinfo = nullptr;
10931096

@@ -1679,6 +1682,9 @@ void MetaspaceShared::initialize_shared_spaces() {
16791682
dynamic_mapinfo->unmap_region(MetaspaceShared::bm);
16801683
}
16811684

1685+
// Archive was fully read. Workers are no longer needed.
1686+
ArchiveWorkers::workers()->shutdown();
1687+
16821688
LogStreamHandle(Info, cds) lsh;
16831689
if (lsh.is_enabled()) {
16841690
lsh.print("Using AOT-linked classes: %s (static archive: %s aot-linked classes",

src/hotspot/share/runtime/java.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,11 @@ void before_exit(JavaThread* thread, bool halt) {
441441

442442
#if INCLUDE_CDS
443443
ClassListWriter::write_resolved_constants();
444+
445+
// Initiate Archive Workers shutdown. These workers are likely already
446+
// shut down, but we need to make sure they really are. Otherwise, workers
447+
// would fail hard on broken semaphores.
448+
ArchiveWorkers::workers()->shutdown();
444449
#endif
445450

446451
// Hang forever on exit if we're reporting an error.

0 commit comments

Comments
 (0)