Skip to content

Commit 2651c75

Browse files
committed
feat: Add USE_CHUNKSERVER_SIDE_CHUNK_LOCK option
The new behavior implemented previously can be now enabled and disabled via the option USE_CHUNKSERVER_SIDE_CHUNK_LOCK. This option is reloadable. The decision making instant is at the moment of sending the specific packet type to the chunkservers. The testing framework was modified in order to enable this option in all tests, while the default master behavior has it disabled by default. Signed-off-by: Dave <dave@leil.io>
1 parent e6ef8d1 commit 2651c75

File tree

8 files changed

+89
-38
lines changed

8 files changed

+89
-38
lines changed

doc/sfsmaster.cfg.5.adoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ for TLS connections (there is no default value).
290290
Path to the trusted CA certificate which is used to authenticate
291291
the TLS connection (there is no default value).
292292

293+
*USE_CHUNKSERVER_SIDE_CHUNK_LOCK (EXPERIMENTAL)*:: When set to 1, enables sending
294+
chunk part lock messages to the chunkservers. This can be useful to track down which
295+
chunk parts are currently being written. Reloadable (default: 0).
296+
293297
== NOTES
294298

295299
Chunks in master are tested in loop. Speed (or frequency) is regulated by two

src/admin/dump_config_command.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ const static std::unordered_map<std::string, std::string> defaultOptionsMaster =
128128
{"SNAPSHOT_INITIAL_BATCH_SIZE_LIMIT", "10000"},
129129
{"FILE_TEST_LOOP_MIN_TIME", "3600"},
130130
{"PRIORITIZE_DATA_PARTS", "1"},
131+
{"USE_CHUNKSERVER_SIDE_CHUNK_LOCK", "0"},
131132
{"CREATE_EMPTY_FOLDERS_WHEN_SPACE_DEPLETED", "1"},
132133
};
133134

src/data/sfsmaster.cfg.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,3 +355,9 @@
355355
## Example: /etc/saunafs/ssl/ca.crt
356356
## (There is no default)
357357
# TLS_CA_CERT_FILE =
358+
359+
## When set to 1, enables sending chunk part lock messages to the chunkservers.
360+
## This can be useful to track down which chunk parts are currently being
361+
## written. Reloadable.
362+
## (Default: 0)
363+
# USE_CHUNKSERVER_SIDE_CHUNK_LOCK = 0

src/master/chunks.cc

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ static uint64_t gEndangeredChunksMaxCapacity;
109109
static uint64_t gDisconnectedCounter = 0;
110110
inline LinearAssignmentCache gLinearAssignmentCache;
111111
inline bool gUseLinearAssignmentOptimizer;
112+
static bool gUseChunkserverSideChunkLock;
112113
bool gAvoidSameIpChunkservers = false;
113114

114115
struct ChunkPart {
@@ -1180,10 +1181,11 @@ int chunk_get_partstomodify(uint64_t chunkid, int &recover, int &remove) {
11801181

11811182
// Chunk operations
11821183

1183-
/// @brief Performs the chunk creation operation, which consists of creating a new chunk with
1184+
/// @brief Performs the chunk creation operation, which consists of creating a new chunk with
11841185
/// version 1, associating it with the given goal and sending create chunk messages to the provided
11851186
/// chunkservers. The parts in the chunk are marked as being written (it is expecteted that client
1186-
/// starts writing) if the corresponding chunkserver supports locking and the create chunk message was sent with locking.
1187+
/// starts writing) if the corresponding chunkserver supports locking and the create chunk message
1188+
/// was sent with locking.
11871189
/// @param createdChunk A reference to a pointer where the created chunk will be stored.
11881190
/// @param goal The goal that will be associated with the created chunk.
11891191
/// @param serversWithChunkTypes The list of chunkservers to create the chunk on.
@@ -1201,7 +1203,8 @@ void chunk_create_operation(
12011203
server_with_type.second));
12021204
bool sentChunkLock = false;
12031205
matocsserv_send_createchunk(server_with_type.first, createdChunk->chunkid,
1204-
server_with_type.second, createdChunk->version, sentChunkLock);
1206+
server_with_type.second, createdChunk->version,
1207+
gUseChunkserverSideChunkLock, sentChunkLock);
12051208

12061209
if (sentChunkLock) { createdChunk->parts.back().mark_being_written(); }
12071210
// If the chunk lock was not sent, it means that the chunkserver does not support locking,
@@ -1225,14 +1228,14 @@ void chunk_increase_version_operation(Chunk *chunk, bool needsLocking) {
12251228
part.version = chunk->version + 1;
12261229
// If part is already being written then we don't need to ask the chunkserver to lock
12271230
// it again, and we can just increase the version.
1228-
bool partNeedsLocking = !part.is_being_written() && needsLocking;
1231+
bool partNeedsLocking =
1232+
!part.is_being_written() && needsLocking && gUseChunkserverSideChunkLock;
12291233
bool sentChunkLock = false;
12301234
matocsserv_send_setchunkversion(part.server(), chunk->chunkid, chunk->version + 1,
1231-
chunk->version, part.type, partNeedsLocking, sentChunkLock);
1235+
chunk->version, part.type, partNeedsLocking,
1236+
sentChunkLock);
12321237

1233-
if (partNeedsLocking && sentChunkLock) {
1234-
part.mark_being_written();
1235-
}
1238+
if (partNeedsLocking && sentChunkLock) { part.mark_being_written(); }
12361239
}
12371240
}
12381241

@@ -1248,18 +1251,20 @@ void chunk_increase_version_operation(Chunk *chunk, bool needsLocking) {
12481251
void chunk_lock_operation(Chunk *chunk) {
12491252
bool mustWaitForReply = false;
12501253
assert(chunk->isWritable());
1251-
for (auto &part : chunk->parts) {
1252-
if (part.is_valid()) {
1253-
if (part.is_busy()) { continue; }
1254-
// No busy parts from now on
1255-
1256-
bool sentChunkLock = false;
1257-
matocsserv_send_chunklock(part.server(), chunk->chunkid, part.type,
1258-
!part.is_being_written(), sentChunkLock);
1259-
if (sentChunkLock) {
1260-
part.mark_being_written();
1261-
mustWaitForReply = true;
1262-
part.mark_busy();
1254+
if (gUseChunkserverSideChunkLock) {
1255+
for (auto &part : chunk->parts) {
1256+
if (part.is_valid()) {
1257+
if (part.is_busy()) { continue; }
1258+
// No busy parts from now on
1259+
1260+
bool sentChunkLock = false;
1261+
matocsserv_send_chunklock(part.server(), chunk->chunkid, part.type,
1262+
!part.is_being_written(), sentChunkLock);
1263+
if (sentChunkLock) {
1264+
part.mark_being_written();
1265+
mustWaitForReply = true;
1266+
part.mark_busy();
1267+
}
12631268
}
12641269
}
12651270
}
@@ -1298,7 +1303,8 @@ void chunk_duplicate_operation(Chunk *originalChunk, uint8_t goal, Chunk *&newCh
12981303
bool sentChunkLock = false;
12991304
matocsserv_send_duplicatechunk(oldPart.server(), newChunk->chunkid, newChunk->version,
13001305
oldPart.type, originalChunk->chunkid,
1301-
originalChunk->version, sentChunkLock);
1306+
originalChunk->version, gUseChunkserverSideChunkLock,
1307+
sentChunkLock);
13021308

13031309
if (sentChunkLock) { newChunk->parts.back().mark_being_written(); }
13041310
}
@@ -3173,6 +3179,7 @@ void chunk_reload(void) {
31733179
gAvoidSameIpChunkservers = cfg_getuint32("AVOID_SAME_IP_CHUNKSERVERS", 0);
31743180
gRedundancyLevel = cfg_getuint32("REDUNDANCY_LEVEL", 0);
31753181
gUseLinearAssignmentOptimizer = cfg_getuint32("USE_LINEAR_ASSIGNMENT_OPTIMIZER", 1);
3182+
gUseChunkserverSideChunkLock = cfg_getuint32("USE_CHUNKSERVER_SIDE_CHUNK_LOCK", 0);
31763183

31773184
uint32_t disableChunksDel = cfg_getuint32("DISABLE_CHUNKS_DEL", 0);
31783185
if (disableChunksDel) {
@@ -3268,6 +3275,7 @@ int chunk_strinit(void) {
32683275
gAvoidSameIpChunkservers = cfg_getuint32("AVOID_SAME_IP_CHUNKSERVERS", 0);
32693276
gRedundancyLevel = cfg_getuint32("REDUNDANCY_LEVEL", 0);
32703277
gUseLinearAssignmentOptimizer = cfg_getuint32("USE_LINEAR_ASSIGNMENT_OPTIMIZER", 1);
3278+
gUseChunkserverSideChunkLock = cfg_getuint32("USE_CHUNKSERVER_SIDE_CHUNK_LOCK", 0);
32713279

32723280
if (disableChunksDel) {
32733281
MaxDelHardLimit = MaxDelSoftLimit = 0;

src/master/matocsserv.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -604,12 +604,12 @@ void matocsserv_got_chunk_checksum(matocsserventry *eptr, const uint8_t *data, u
604604
}
605605

606606
int matocsserv_send_createchunk(matocsserventry *eptr, uint64_t chunkId, ChunkPartType chunkType,
607-
uint32_t chunkVersion, bool &sentChunkLock) {
607+
uint32_t chunkVersion, bool needsLock, bool &sentChunkLock) {
608608
sentChunkLock = false;
609609
if (eptr->mode != ChunkserverConnectionMode::KILL) {
610610
eptr->outputPackets.push_back(OutputPacket());
611611
sassert(eptr->version >= kFirstECVersion);
612-
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock) {
612+
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needsLock) {
613613
// For newer chunkservers, create and lock part
614614
matocs::createAndLockChunk::serialize(eptr->outputPackets.back().packet, chunkId,
615615
chunkType, chunkVersion);
@@ -730,11 +730,11 @@ void matocsserv_got_replicatechunk_status(matocsserventry *eptr, const std::vect
730730
}
731731

732732
int matocsserv_send_chunklock(matocsserventry *eptr, uint64_t chunkId, ChunkPartType chunkType,
733-
bool needLock, bool &sentChunkLock) {
733+
bool needsLock, bool &sentChunkLock) {
734734
sentChunkLock = false;
735735
if (eptr->mode != ChunkserverConnectionMode::KILL) {
736736
sassert(eptr->version >= kFirstECVersion);
737-
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needLock) {
737+
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needsLock) {
738738
eptr->outputPackets.emplace_back();
739739
matocs::chunkLock::serialize(eptr->outputPackets.back().packet, chunkId, chunkType);
740740
sentChunkLock = true;
@@ -794,12 +794,12 @@ int matocsserv_send_chunkunlock(matocsserventry *eptr, uint64_t chunkId, ChunkPa
794794
}
795795

796796
int matocsserv_send_setchunkversion(matocsserventry *eptr, uint64_t chunkId, uint32_t newVersion,
797-
uint32_t chunkVersion, ChunkPartType chunkType, bool needChunkLock, bool &sentChunkLock) {
797+
uint32_t chunkVersion, ChunkPartType chunkType, bool needsLock, bool &sentChunkLock) {
798798
sentChunkLock = false;
799799
if (eptr->mode != ChunkserverConnectionMode::KILL) {
800800
eptr->outputPackets.emplace_back();
801801
sassert(eptr->version >= kFirstECVersion);
802-
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needChunkLock) {
802+
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needsLock) {
803803
// For newer chunkservers, set version with chunk lock
804804
matocs::setVersionAndLock::serialize(eptr->outputPackets.back().packet, chunkId,
805805
chunkType, chunkVersion, newVersion);
@@ -835,13 +835,14 @@ void matocsserv_got_setchunkversion_status(matocsserventry *eptr,
835835

836836
int matocsserv_send_duplicatechunk(matocsserventry *eptr, uint64_t newChunkId,
837837
uint32_t newChunkVersion, ChunkPartType chunkType,
838-
uint64_t chunkId, uint32_t chunkVersion, bool &sentChunkLock) {
838+
uint64_t chunkId, uint32_t chunkVersion, bool needsLock,
839+
bool &sentChunkLock) {
839840
sentChunkLock = false;
840841
if (eptr->mode == ChunkserverConnectionMode::KILL) { return 0; }
841842

842843
OutputPacket outPacket;
843844
sassert(eptr->version >= kFirstECVersion);
844-
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock) {
845+
if (eptr->version >= kFirstVersionWithChunkserverSideChunkLock && needsLock) {
845846
// For newer chunkservers, duplicate with chunk lock
846847
matocs::duplicateAndLockChunk::serialize(outPacket.packet, newChunkId, newChunkVersion,
847848
chunkType, chunkId, chunkVersion);

src/master/matocsserv.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,17 @@ int matocsserv_send_sau_replicatechunk(matocsserventry* eptr,
107107
int matocsserv_send_deletechunk(matocsserventry* eptr,
108108
uint64_t chunkId, uint32_t chunkVersion, ChunkPartType chunkType);
109109
int matocsserv_send_createchunk(matocsserventry *eptr, uint64_t chunkid, ChunkPartType chunkType,
110-
uint32_t version, bool &sentChunkLock);
110+
uint32_t version, bool needsLock, bool &sentChunkLock);
111111
int matocsserv_send_chunklock(matocsserventry *eptr, uint64_t chunkId, ChunkPartType chunkType,
112-
bool needLock, bool &sentChunkLock);
112+
bool needsLock, bool &sentChunkLock);
113113
int matocsserv_send_chunkunlock(matocsserventry *eptr, uint64_t chunkId, ChunkPartType chunkType);
114114
int matocsserv_send_setchunkversion(matocsserventry *eptr, uint64_t chunkId, uint32_t newVersion,
115-
uint32_t chunkVersion, ChunkPartType chunkType,
116-
bool needsLocking, bool &sentChunkLock);
115+
uint32_t chunkVersion, ChunkPartType chunkType, bool needsLock,
116+
bool &sentChunkLock);
117117
int matocsserv_send_duplicatechunk(matocsserventry *eptr, uint64_t newChunkId,
118118
uint32_t newChunkVersion, ChunkPartType chunkType,
119-
uint64_t chunkId, uint32_t chunkVersion, bool &sentChunkLock);
119+
uint64_t chunkId, uint32_t chunkVersion, bool needsLock,
120+
bool &sentChunkLock);
120121
void matocsserv_send_truncatechunk(matocsserventry* eptr,
121122
uint64_t chunkid, ChunkPartType chunkType, uint32_t length,
122123
uint32_t version, uint32_t oldversion);

tests/test_suites/ShortSystemTests/test_concurrent_random_writes_on_chunk.sh

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
timeout_set 30 seconds
1+
timeout_set 45 seconds
22

33
CHUNKSERVERS=8 \
44
USE_RAMDISK=YES \
@@ -12,22 +12,51 @@ cd "${info[mount0]}"
1212
mkdir dir
1313
saunafs setgoal ec62 dir
1414

15-
times_to_repeat=512
15+
times_to_repeat=1024
1616
FILE_SIZE=$(( times_to_repeat * 4 * 1024 )) file-generate ${TEMP_DIR}/original_file
1717

18-
# Write 4KB at a time, 1KB in each of 4 mounts, and repeat this 512 times, so that the file is
18+
# Write 4KB at a time, 1KB in each of 4 mounts, and repeat this 1024 times, so that the file is
1919
# written in random order and with many concurrent writes.
2020

21+
master_reloading_loop_file=${TEMP_DIR}/master_reloading_loop_file
22+
switch_use_chunkserver_side_chunk_lock_thread() {
23+
touch ${master_reloading_loop_file}
24+
while true; do
25+
if [ ! -e ${master_reloading_loop_file} ]; then
26+
break 2
27+
fi
28+
sleep 0.15
29+
current=$(grep USE_CHUNKSERVER_SIDE_CHUNK_LOCK ${info[master0_master_cfg]} | tail -n 1 | awk '{print $3}')
30+
echo "Switching USE_CHUNKSERVER_SIDE_CHUNK_LOCK to $(( 1 - current ))"
31+
sed -i "s/USE_CHUNKSERVER_SIDE_CHUNK_LOCK = ./USE_CHUNKSERVER_SIDE_CHUNK_LOCK = $(( 1 - current ))/g" ${info[master0_master_cfg]}
32+
saunafs_master_daemon reload
33+
done
34+
echo "chunkservers_restarting_loop stopped"
35+
}
36+
37+
stop_switch_use_chunkserver_side_chunk_lock_thread() {
38+
rm -f ${master_reloading_loop_file}
39+
}
40+
41+
switch_use_chunkserver_side_chunk_lock_thread &
42+
switch_use_chunkserver_side_chunk_lock_thread_pid=$!
43+
2144
for i in $(seq 0 $((times_to_repeat - 1))); do
2245
shuffled_seq=($(shuf -e $(seq 0 3)))
46+
pids=()
2347
for mount in $(seq 0 3); do
2448
dd if="${TEMP_DIR}/original_file" of="${info[mount${mount}]}/dir/file" bs=1K \
2549
skip=$(( i * 4 + ${shuffled_seq[$mount]} )) \
2650
seek=$(( i * 4 + ${shuffled_seq[$mount]} )) \
2751
count=1 conv=notrunc 2>/dev/null &
52+
pids+=("$!")
2853
done
29-
wait
54+
if [ ${#pids[@]} -gt 0 ]; then
55+
wait "${pids[@]}"
56+
fi
3057
echo "Done writing $i-th block of 4KB"
3158
done
3259

60+
stop_switch_use_chunkserver_side_chunk_lock_thread
61+
3362
MESSAGE="Validating file after concurrent random writes" expect_success file-validate dir/file

tests/tools/saunafs.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ create_sfsmaster_master_cfg_() {
372372
echo "MATOTS_LISTEN_PORT = ${saunafs_info_[matots]}"
373373
echo "METADATA_CHECKSUM_INTERVAL = 1"
374374
echo "ADMIN_PASSWORD = ${saunafs_info_[admin_password]}"
375+
echo "USE_CHUNKSERVER_SIDE_CHUNK_LOCK = 1"
375376
create_magic_debug_log_entry_ "master_${masterserver_id}"
376377
echo "${MASTER_EXTRA_CONFIG-}" | tr '|' '\n'
377378
echo "${!this_module_cfg_variable-}" | tr '|' '\n'

0 commit comments

Comments
 (0)