Skip to content

Commit 35a442a

Browse files
committed
add support for parallel recall and migrate
Signed-off-by: Utkarsh Srivastava <[email protected]>
1 parent 5b2f75c commit 35a442a

File tree

12 files changed

+684
-274
lines changed

12 files changed

+684
-274
lines changed

config.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,11 @@ config.NSFS_GLACIER_DMAPI_TPS_HTTP_HEADER = 'x-tape-meta-copy';
916916
// but can be overridden to any numberical value
917917
config.NSFS_GLACIER_DMAPI_PMIG_DAYS = config.S3_RESTORE_REQUEST_MAX_DAYS;
918918

919+
// NSFS_GLACIER_DMAPI_FINALIZE_RESTORE_ENABLE if enabled will force NooBaa to
920+
// examine the DMAPI xattr of the file before finalizing the restore to prevent
921+
// accidental blocking reads from happening.
922+
config.NSFS_GLACIER_DMAPI_FINALIZE_RESTORE_ENABLE = false;
923+
919924
config.NSFS_STATFS_CACHE_SIZE = 10000;
920925
config.NSFS_STATFS_CACHE_EXPIRY_MS = 1 * 1000;
921926

src/manage_nsfs/manage_nsfs_glacier.js

Lines changed: 35 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -9,86 +9,53 @@ const { Glacier } = require('../sdk/glacier');
99
const native_fs_utils = require('../util/native_fs_utils');
1010
const { is_desired_time, record_current_time } = require('./manage_nsfs_cli_utils');
1111

12-
const CLUSTER_LOCK = 'cluster.lock';
13-
const SCAN_LOCK = 'scan.lock';
14-
1512
async function process_migrations() {
1613
const fs_context = native_fs_utils.get_process_fs_context();
17-
const lock_path = path.join(config.NSFS_GLACIER_LOGS_DIR, CLUSTER_LOCK);
18-
await native_fs_utils.lock_and_run(fs_context, lock_path, async () => {
19-
const backend = Glacier.getBackend();
20-
21-
if (
22-
await backend.low_free_space() ||
23-
await time_exceeded(fs_context, config.NSFS_GLACIER_MIGRATE_INTERVAL, Glacier.MIGRATE_TIMESTAMP_FILE) ||
24-
await migrate_log_exceeds_threshold()
25-
) {
26-
await run_glacier_migrations(fs_context, backend);
27-
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.MIGRATE_TIMESTAMP_FILE);
28-
await record_current_time(fs_context, timestamp_file_path);
29-
}
30-
});
31-
}
32-
33-
/**
34-
* run_tape_migrations reads the migration WALs and attempts to migrate the
35-
* files mentioned in the WAL.
36-
* @param {nb.NativeFSContext} fs_context
37-
* @param {import('../sdk/glacier').Glacier} backend
38-
*/
39-
async function run_glacier_migrations(fs_context, backend) {
40-
await run_glacier_operation(fs_context, Glacier.MIGRATE_WAL_NAME, backend.migrate.bind(backend));
14+
const backend = Glacier.getBackend();
15+
16+
if (
17+
await backend.low_free_space() ||
18+
await time_exceeded(fs_context, config.NSFS_GLACIER_MIGRATE_INTERVAL, Glacier.MIGRATE_TIMESTAMP_FILE) ||
19+
await migrate_log_exceeds_threshold()
20+
) {
21+
await backend.perform(prepare_galcier_fs_context(fs_context), "MIGRATION");
22+
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.MIGRATE_TIMESTAMP_FILE);
23+
await record_current_time(fs_context, timestamp_file_path);
24+
}
4125
}
4226

4327
async function process_restores() {
4428
const fs_context = native_fs_utils.get_process_fs_context();
45-
const lock_path = path.join(config.NSFS_GLACIER_LOGS_DIR, CLUSTER_LOCK);
46-
await native_fs_utils.lock_and_run(fs_context, lock_path, async () => {
47-
const backend = Glacier.getBackend();
29+
const backend = Glacier.getBackend();
4830

49-
if (
50-
await backend.low_free_space() ||
51-
!(await time_exceeded(fs_context, config.NSFS_GLACIER_RESTORE_INTERVAL, Glacier.RESTORE_TIMESTAMP_FILE))
52-
) return;
31+
if (
32+
await backend.low_free_space() ||
33+
!(await time_exceeded(fs_context, config.NSFS_GLACIER_RESTORE_INTERVAL, Glacier.RESTORE_TIMESTAMP_FILE))
34+
) return;
5335

54-
55-
await run_glacier_restore(fs_context, backend);
56-
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.RESTORE_TIMESTAMP_FILE);
57-
await record_current_time(fs_context, timestamp_file_path);
58-
});
59-
}
60-
61-
/**
62-
* run_tape_restore reads the restore WALs and attempts to restore the
63-
* files mentioned in the WAL.
64-
* @param {nb.NativeFSContext} fs_context
65-
* @param {import('../sdk/glacier').Glacier} backend
66-
*/
67-
async function run_glacier_restore(fs_context, backend) {
68-
await run_glacier_operation(fs_context, Glacier.RESTORE_WAL_NAME, backend.restore.bind(backend));
36+
await backend.perform(prepare_galcier_fs_context(fs_context), "RESTORE");
37+
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.RESTORE_TIMESTAMP_FILE);
38+
await record_current_time(fs_context, timestamp_file_path);
6939
}
7040

7141
async function process_expiry() {
7242
const fs_context = native_fs_utils.get_process_fs_context();
73-
const lock_path = path.join(config.NSFS_GLACIER_LOGS_DIR, SCAN_LOCK);
74-
await native_fs_utils.lock_and_run(fs_context, lock_path, async () => {
75-
const backend = Glacier.getBackend();
76-
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.EXPIRY_TIMESTAMP_FILE);
77-
if (
78-
await backend.low_free_space() ||
79-
await is_desired_time(
80-
fs_context,
81-
new Date(),
82-
config.NSFS_GLACIER_EXPIRY_RUN_TIME,
83-
config.NSFS_GLACIER_EXPIRY_RUN_DELAY_LIMIT_MINS,
84-
timestamp_file_path,
85-
config.NSFS_GLACIER_EXPIRY_TZ
86-
)
87-
) {
88-
await backend.expiry(fs_context);
89-
await record_current_time(fs_context, timestamp_file_path);
90-
}
91-
});
43+
const backend = Glacier.getBackend();
44+
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.EXPIRY_TIMESTAMP_FILE);
45+
if (
46+
await backend.low_free_space() ||
47+
await is_desired_time(
48+
fs_context,
49+
new Date(),
50+
config.NSFS_GLACIER_EXPIRY_RUN_TIME,
51+
config.NSFS_GLACIER_EXPIRY_RUN_DELAY_LIMIT_MINS,
52+
timestamp_file_path,
53+
config.NSFS_GLACIER_EXPIRY_TZ
54+
)
55+
) {
56+
await backend.perform(prepare_galcier_fs_context(fs_context), "EXPIRY");
57+
await record_current_time(fs_context, timestamp_file_path);
58+
}
9259
}
9360

9461

@@ -137,27 +104,6 @@ async function migrate_log_exceeds_threshold(threshold = config.NSFS_GLACIER_MIG
137104
return log_size > threshold;
138105
}
139106

140-
/**
141-
* run_glacier_operations takes a log_namespace and a callback and executes the
142-
* callback on each log file in that namespace. It will also generate a failure
143-
* log file and persist the failures in that log file.
144-
* @param {nb.NativeFSContext} fs_context
145-
* @param {string} log_namespace
146-
* @param {Function} cb
147-
*/
148-
async function run_glacier_operation(fs_context, log_namespace, cb) {
149-
const log = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, log_namespace, { locking: 'EXCLUSIVE' });
150-
151-
fs_context = prepare_galcier_fs_context(fs_context);
152-
try {
153-
await log.process(async (entry, failure_recorder) => cb(fs_context, entry, failure_recorder));
154-
} catch (error) {
155-
console.error('failed to process log in namespace:', log_namespace);
156-
} finally {
157-
await log.close();
158-
}
159-
}
160-
161107
/**
162108
* prepare_galcier_fs_context returns a shallow copy of given
163109
* fs_context with backend set to 'GPFS'.

src/native/fs/fs_napi.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,6 +1455,7 @@ struct FileWrap : public Napi::ObjectWrap<FileWrap>
14551455
InstanceMethod<&FileWrap::fsync>("fsync"),
14561456
InstanceMethod<&FileWrap::flock>("flock"),
14571457
InstanceMethod<&FileWrap::fcntllock>("fcntllock"),
1458+
InstanceMethod<&FileWrap::fcntltrylock>("fcntltrylock"),
14581459
InstanceAccessor<&FileWrap::getfd>("fd"),
14591460
}));
14601461
constructor.SuppressDestruct();
@@ -1485,6 +1486,7 @@ struct FileWrap : public Napi::ObjectWrap<FileWrap>
14851486
Napi::Value getfd(const Napi::CallbackInfo& info);
14861487
Napi::Value flock(const Napi::CallbackInfo& info);
14871488
Napi::Value fcntllock(const Napi::CallbackInfo& info);
1489+
Napi::Value fcntltrylock(const Napi::CallbackInfo& info);
14881490
};
14891491

14901492
Napi::FunctionReference FileWrap::constructor;
@@ -1892,6 +1894,65 @@ struct FileFcntlLock : public FSWrapWorker<FileWrap>
18921894
}
18931895
};
18941896

1897+
struct FileFcntlTryLock : public FSWrapWorker<FileWrap>
1898+
{
1899+
bool _ok;
1900+
1901+
struct flock fl;
1902+
FileFcntlTryLock(const Napi::CallbackInfo& info)
1903+
: FSWrapWorker<FileWrap>(info)
1904+
, _ok(false)
1905+
, fl()
1906+
{
1907+
// lock entire file
1908+
fl.l_whence = SEEK_SET;
1909+
fl.l_start = 0;
1910+
fl.l_len = 0;
1911+
fl.l_pid = 0;
1912+
fl.l_type = F_RDLCK;
1913+
1914+
if (info.Length() > 1 && !info[1].IsUndefined()) {
1915+
auto mode = info[1].As<Napi::String>().Utf8Value();
1916+
if (mode == "EXCLUSIVE") {
1917+
fl.l_type = F_WRLCK;
1918+
} else if (mode == "UNLOCK") {
1919+
fl.l_type = F_UNLCK;
1920+
} else if (mode == "SHARED") {
1921+
fl.l_type = F_RDLCK;
1922+
} else {
1923+
SetError("invalid lock type");
1924+
}
1925+
}
1926+
1927+
Begin(XSTR() << "FileFcntlTryLock" << DVAL(_wrap->_path));
1928+
}
1929+
virtual void Work()
1930+
{
1931+
int fd = _wrap->_fd;
1932+
CHECK_WRAP_FD(fd);
1933+
// This uses F_OFD_SETLK instead for discussion related to this choice
1934+
// refer: https://github.com/noobaa/noobaa-core/pull/8174
1935+
int r = fcntl(fd, F_OFD_SETLK, &fl);
1936+
if (r == 0) {
1937+
_ok = true;
1938+
} else if (r == -1 && errno == EAGAIN) {
1939+
// Failed to aquire lock because another process
1940+
// already holds it
1941+
_ok = false;
1942+
} else {
1943+
SetSyscallError();
1944+
}
1945+
}
1946+
1947+
virtual void OnOK() {
1948+
DBG1("FS::FileFcntlTryLock::OnOK: " << DVAL(_ok));
1949+
Napi::Env env = Env();
1950+
auto res = Napi::Boolean::New(env, _ok);
1951+
_deferred.Resolve(res);
1952+
ReportWorkerStats(0);
1953+
}
1954+
};
1955+
18951956
struct RealPath : public FSWorker
18961957
{
18971958
std::string _path;
@@ -2050,6 +2111,12 @@ FileWrap::fcntllock(const Napi::CallbackInfo& info)
20502111
return api<FileFcntlLock>(info);
20512112
}
20522113

2114+
Napi::Value
2115+
FileWrap::fcntltrylock(const Napi::CallbackInfo& info)
2116+
{
2117+
return api<FileFcntlTryLock>(info);
2118+
}
2119+
20532120
/**
20542121
*
20552122
*/

0 commit comments

Comments
 (0)