Skip to content

Commit 09dc3e6

Browse files
committed
Parallel dir entry sync options
1 parent 5df027f commit 09dc3e6

File tree

8 files changed

+109
-30
lines changed

8 files changed

+109
-30
lines changed

doc/admin-guide/files/records.yaml.en.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2659,6 +2659,22 @@ Cache Control
26592659
:units: millisecond
26602660

26612661
How long to wait between each write cycle when syncing the cache directory to disk.
2662+
.. ts:cv:: CONFIG proxy.config.cache.dir.sync_parallel_tasks INT 1
2663+
2664+
Number of parallel tasks to use for directory syncing. Each task syncs
2665+
directories for a different physical drive on ET_TASK threads.
2666+
2667+
======= ==================================================================
2668+
Value Description
2669+
======= ==================================================================
2670+
``-1`` Unlimited - one task per drive (maximum parallelism)
2671+
``1`` Sequential - one task for all drives (default, safe)
2672+
``N`` Parallel - up to N tasks (drives) sync concurrently
2673+
======= ==================================================================
2674+
2675+
Default is ``1`` (sequential). Set to ``-1`` for maximum parallelism on
2676+
high-end NVMe arrays, or to ``4-8`` for balanced performance on multi-drive
2677+
systems.
26622678

26632679
.. ts:cv:: CONFIG proxy.config.cache.limits.http.max_alts INT 5
26642680

src/iocore/cache/Cache.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ int cache_config_log_alternate_eviction = 0;
6464
int cache_config_dir_sync_frequency = 60;
6565
int cache_config_dir_sync_delay = 500;
6666
int cache_config_dir_sync_max_write = (2 * 1024 * 1024);
67+
int cache_config_dir_sync_parallel_tasks = 1;
6768
int cache_config_permit_pinning = 0;
6869
int cache_config_select_alternate = 1;
6970
int cache_config_max_doc_size = 0;
@@ -90,7 +91,6 @@ Cache *theCache = nullptr;
9091
std::vector<std::unique_ptr<CacheDisk>> gdisks;
9192
int gndisks = 0;
9293
Cache *caches[NUM_CACHE_FRAG_TYPES] = {nullptr};
93-
CacheSync *cacheDirSync = nullptr;
9494
Store theCacheStore;
9595
StripeSM **gstripes = nullptr;
9696
std::atomic<int> gnstripes = 0;
@@ -884,6 +884,9 @@ ink_cache_init(ts::ModuleVersion v)
884884

885885
cacheProcessor.wait_for_cache = RecGetRecordInt("proxy.config.http.wait_for_cache").value_or(0);
886886

887+
RecEstablishStaticConfigInt32(cache_config_dir_sync_parallel_tasks, "proxy.config.cache.dir.sync_parallel_tasks");
888+
Dbg(dbg_ctl_cache_init, "proxy.config.cache.dir.sync_parallel_tasks = %d", cache_config_dir_sync_parallel_tasks);
889+
887890
RecEstablishStaticConfigInt32(cache_config_persist_bad_disks, "proxy.config.cache.persist_bad_disks");
888891
Dbg(dbg_ctl_cache_init, "proxy.config.cache.persist_bad_disks = %d", cache_config_persist_bad_disks);
889892
if (cache_config_persist_bad_disks) {

src/iocore/cache/CacheDir.cc

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
#include "tscore/hugepages.h"
3232
#include "tscore/Random.h"
3333
#include "ts/ats_probe.h"
34+
#include "iocore/eventsystem/Tasks.h"
35+
36+
#include <unordered_map>
3437

3538
#ifdef LOOP_CHECK_MODE
3639
#define DIR_LOOP_THRESHOLD 1000
@@ -862,14 +865,51 @@ dir_lookaside_remove(const CacheKey *key, StripeSM *stripe)
862865
return;
863866
}
864867

865-
// Cache Sync
866-
//
868+
// Cache Dir Sync
867869

868870
void
869871
dir_sync_init()
870872
{
871-
cacheDirSync = new CacheSync;
872-
cacheDirSync->trigger = eventProcessor.schedule_in(cacheDirSync, HRTIME_SECONDS(cache_config_dir_sync_frequency));
873+
static std::vector<std::unique_ptr<CacheSync>> cache_syncs;
874+
static bool initialized = false;
875+
std::unordered_map<CacheDisk *, std::vector<int>> drive_stripe_map;
876+
877+
if (initialized) {
878+
Warning("dir_sync_init() called multiple times - ignoring");
879+
return;
880+
}
881+
initialized = true;
882+
883+
for (int i = 0; i < gnstripes; i++) {
884+
drive_stripe_map[gstripes[i]->disk].push_back(i);
885+
}
886+
887+
int num_tasks =
888+
(cache_config_dir_sync_parallel_tasks == -1) ? drive_stripe_map.size() : std::max(1, cache_config_dir_sync_parallel_tasks);
889+
890+
cache_syncs.resize(num_tasks);
891+
for (int i = 0; i < num_tasks; i++) {
892+
cache_syncs[i] = std::make_unique<CacheSync>();
893+
}
894+
895+
int task_idx = 0;
896+
897+
for (auto &[disk, indices] : drive_stripe_map) {
898+
int target_task = task_idx % num_tasks;
899+
900+
Dbg(dbg_ctl_cache_dir_sync, "Disk %s: %zu stripe(s) assigned to task %d", disk->path, indices.size(), target_task);
901+
for (int stripe_idx : indices) {
902+
cache_syncs[target_task]->stripe_indices.push_back(stripe_idx);
903+
}
904+
task_idx++;
905+
}
906+
907+
for (int i = 0; i < num_tasks; i++) {
908+
Dbg(dbg_ctl_cache_dir_sync, "Task %d: syncing %zu stripe(s)", i, cache_syncs[i]->stripe_indices.size());
909+
cache_syncs[i]->current_index = 0;
910+
cache_syncs[i]->trigger =
911+
eventProcessor.schedule_in(cache_syncs[i].get(), HRTIME_SECONDS(cache_config_dir_sync_frequency), ET_TASK);
912+
}
873913
}
874914

875915
void
@@ -930,34 +970,35 @@ sync_cache_dir_on_shutdown()
930970
}
931971

932972
int
933-
CacheSync::mainEvent(int event, Event *e)
973+
CacheSync::mainEvent(int event, Event * /* e ATS_UNUSED */)
934974
{
935975
if (trigger) {
936976
trigger->cancel_action();
937977
trigger = nullptr;
938978
}
939979

940980
Lrestart:
941-
if (stripe_index >= gnstripes) {
942-
stripe_index = 0;
981+
if (current_index >= static_cast<int>(stripe_indices.size())) {
982+
current_index = 0;
983+
#if FREE_BUF_BETWEEN_CYCLES
984+
// Free buffer between sync cycles to avoid holding large amounts of memory
943985
if (buf) {
944986
if (buf_huge) {
945987
ats_free_hugepage(buf, buflen);
946988
} else {
947989
ats_free(buf);
948990
}
949-
buflen = 0;
950991
buf = nullptr;
992+
buflen = 0;
951993
buf_huge = false;
952994
}
953-
Dbg(dbg_ctl_cache_dir_sync, "sync done");
954-
if (event == EVENT_INTERVAL) {
955-
trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
956-
} else {
957-
trigger = eventProcessor.schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
958-
}
995+
#endif
996+
Dbg(dbg_ctl_cache_dir_sync, "sync cycle done");
997+
trigger = eventProcessor.schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency), ET_TASK);
959998
return EVENT_CONT;
960999
}
1000+
stripe_index = stripe_indices[current_index];
1001+
current_index++;
9611002

9621003
StripeSM *stripe = gstripes[stripe_index]; // must be named "vol" to make STAT macros work.
9631004

@@ -1007,6 +1048,7 @@ CacheSync::mainEvent(int event, Event *e)
10071048
if (stripe->is_io_in_progress() || stripe->get_agg_buf_pos()) {
10081049
Dbg(dbg_ctl_cache_dir_sync, "Dir %s: waiting for agg buffer", stripe->hash_text.get());
10091050
stripe->dir_sync_waiting = true;
1051+
stripe->waiting_dir_sync = this;
10101052
if (!stripe->is_io_in_progress()) {
10111053
stripe->aggWrite(EVENT_IMMEDIATE, nullptr);
10121054
}
@@ -1072,9 +1114,7 @@ CacheSync::mainEvent(int event, Event *e)
10721114
return EVENT_CONT;
10731115
}
10741116
Ldone:
1075-
// done
10761117
writepos = 0;
1077-
++stripe_index;
10781118
goto Lrestart;
10791119
}
10801120

src/iocore/cache/P_CacheDir.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "iocore/eventsystem/Continuation.h"
2929
#include "iocore/aio/AIO.h"
3030
#include "tscore/Version.h"
31+
#include "tscore/hugepages.h"
3132

3233
#include <cstdint>
3334
#include <ctime>
@@ -246,10 +247,25 @@ struct CacheSync : public Continuation {
246247
AIOCallback io;
247248
Event *trigger = nullptr;
248249
ink_hrtime start_time = 0;
249-
int mainEvent(int event, Event *e);
250-
void aio_write(int fd, char *b, int n, off_t o);
250+
251+
std::vector<int> stripe_indices;
252+
int current_index{0};
253+
254+
int mainEvent(int event, Event *e);
255+
void aio_write(int fd, char *b, int n, off_t o);
251256

252257
CacheSync() : Continuation(new_ProxyMutex()) { SET_HANDLER(&CacheSync::mainEvent); }
258+
259+
~CacheSync()
260+
{
261+
if (buf) {
262+
if (buf_huge) {
263+
ats_free_hugepage(buf, buflen);
264+
} else {
265+
ats_free(buf);
266+
}
267+
}
268+
}
253269
};
254270

255271
struct StripteHeaderFooter {

src/iocore/cache/P_CacheInternal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ extern CacheStatsBlock cache_rsb;
9696
extern int cache_config_dir_sync_frequency;
9797
extern int cache_config_dir_sync_delay;
9898
extern int cache_config_dir_sync_max_write;
99+
extern int cache_config_dir_sync_parallel_tasks;
99100
extern int cache_config_http_max_alts;
100101
extern int cache_config_log_alternate_eviction;
101102
extern int cache_config_permit_pinning;
@@ -140,7 +141,6 @@ struct CacheRemoveCont : public Continuation {
140141
// Global Data
141142
extern ClassAllocator<CacheVC, false> cacheVConnectionAllocator;
142143
extern ClassAllocator<CacheEvacuateDocVC, false> cacheEvacuateDocVConnectionAllocator;
143-
extern CacheSync *cacheDirSync;
144144
// Function Prototypes
145145
int cache_write(CacheVC *, CacheHTTPInfoVector *);
146146
int get_alternate_index(CacheHTTPInfoVector *cache_vector, CacheKey key);

src/iocore/cache/StripeSM.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -709,9 +709,9 @@ StripeSM::aggWriteDone(int event, Event *e)
709709
{
710710
cancel_trigger();
711711

712-
// ensure we have the cacheDirSync lock if we intend to call it later
712+
// ensure we have the waiting_dir_sync lock if we intend to call it later
713713
// retaking the current mutex recursively is a NOOP
714-
CACHE_TRY_LOCK(lock, dir_sync_waiting ? cacheDirSync->mutex : mutex, mutex->thread_holding);
714+
CACHE_TRY_LOCK(lock, dir_sync_waiting ? waiting_dir_sync->mutex : mutex, mutex->thread_holding);
715715
if (!lock.is_locked()) {
716716
eventProcessor.schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
717717
return EVENT_CONT;
@@ -759,7 +759,8 @@ StripeSM::aggWriteDone(int event, Event *e)
759759
}
760760
if (dir_sync_waiting) {
761761
dir_sync_waiting = false;
762-
cacheDirSync->handleEvent(EVENT_IMMEDIATE, nullptr);
762+
waiting_dir_sync->handleEvent(EVENT_IMMEDIATE, nullptr);
763+
waiting_dir_sync = nullptr;
763764
}
764765
if (this->_write_buffer.get_pending_writers().head || sync.head) {
765766
return aggWrite(event, e);

src/iocore/cache/StripeSM.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,14 @@ class StripeSM : public Continuation, public Stripe
8585

8686
StripeInitInfo *init_info = nullptr;
8787

88-
Cache *cache = nullptr;
89-
uint32_t last_sync_serial = 0;
90-
uint32_t last_write_serial = 0;
91-
bool recover_wrapped = false;
92-
bool dir_sync_waiting = false;
93-
bool dir_sync_in_progress = false;
94-
bool writing_end_marker = false;
88+
Cache *cache = nullptr;
89+
uint32_t last_sync_serial = 0;
90+
uint32_t last_write_serial = 0;
91+
bool recover_wrapped = false;
92+
bool dir_sync_waiting = false;
93+
bool dir_sync_in_progress = false;
94+
CacheSync *waiting_dir_sync = nullptr;
95+
bool writing_end_marker = false;
9596

9697
CacheKey first_fragment_key;
9798
int64_t first_fragment_offset = 0;

src/records/RecordsConfig.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,8 @@ static constexpr RecordElement RecordsConfig[] =
858858
,
859859
{RECT_CONFIG, "proxy.config.cache.dir.sync_max_write", RECD_INT, "2097152", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
860860
,
861+
{RECT_CONFIG, "proxy.config.cache.dir.sync_parallel_tasks", RECD_INT, "1", RECU_RESTART_TS, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
862+
,
861863
{RECT_CONFIG, "proxy.config.cache.hostdb.disable_reverse_lookup", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
862864
,
863865
{RECT_CONFIG, "proxy.config.cache.select_alternate", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}

0 commit comments

Comments
 (0)