Skip to content

Commit aff8cc0

Browse files
author
yawzhang
committed
assert if refactor-verification failed before write to disk && change refactor seq from id to node
1 parent 08ee978 commit aff8cc0

File tree

2 files changed

+36
-7
lines changed

2 files changed

+36
-7
lines changed

refactor.sh

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@ CLUSTER=908
55
NAMESPACE="nuobject2sh-dev"
66
DEPLOYMENT_COUNT=4
77
REFACTOR_IMAGE="hub.tess.io/yawzhang/storage_mgr:refactor_new-RelWithDebInfo"
8-
NEW_IMAGE="hub.tess.io/yawzhang/storage_mgr:crc_1027-RelWithDebInfo"
8+
NEW_IMAGE="hub.tess.io/sds/storage_mgr:1.0-pre.0.2.6.6-RelWithDebInfo"
9+
NODE_NAME=""
10+
DEPLOYMENT_REGEX="sm-long-running[1-4]-1007"
11+
12+
if [[ -z "$NODE_NAME" ]]; then
13+
echo "please update node name in the script"
14+
exit 1
15+
fi
916

1017
# Function to check deployment status
1118
check_deployment_status() {
@@ -27,10 +34,24 @@ check_pod_logs() {
2734
return $?
2835
}
2936

30-
start_idx=1
31-
for i in $(seq "$start_idx" "$DEPLOYMENT_COUNT"); do
32-
DEPLOYMENT="sm-long-running$i-1007"
33-
POD=$(tess kubectl --context="$CLUSTER" -n "$NAMESPACE" get pods -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep "$DEPLOYMENT")
37+
PODS=$(tess kubectl --context="$CLUSTER" -n "$NAMESPACE" get pods --field-selector spec.nodeName="$NODE_NAME" -o jsonpath='{.items[*].metadata.name}')
38+
if [[ -z "$PODS" ]]; then
39+
echo "No pods found on node $NODE_NAME."
40+
exit 0
41+
fi
42+
43+
echo "node $NODE_NAME has pods [$PODS]"
44+
45+
process_cnt=0
46+
for POD in $PODS; do
47+
DEPLOYMENT=$(tess kubectl --context="$CLUSTER" -n "$NAMESPACE" get pod "$POD" -o jsonpath='{.metadata.ownerReferences[?(@.kind=="ReplicaSet")].name}' | sed 's/-[a-z0-9]*$//')
48+
if [[ -z "$DEPLOYMENT" ]]; then
49+
echo "No deployment found for pod $POD. Skipping..."
50+
continue
51+
elif ! [[ $DEPLOYMENT =~ $DEPLOYMENT_REGEX ]]; then
52+
echo "Skipping pod $POD as its deployment $DEPLOYMENT does not match the expected pattern."
53+
continue
54+
fi
3455
echo "Processing deployment $DEPLOYMENT pod $POD..."
3556

3657
# PRE-CHECK
@@ -40,6 +61,11 @@ for i in $(seq "$start_idx" "$DEPLOYMENT_COUNT"); do
4061
continue
4162
fi
4263

64+
if [[ $process_cnt -ge $DEPLOYMENT_COUNT ]]; then
65+
echo "Reached the maximum number of deployments to process: $DEPLOYMENT_COUNT. Stopping further processing."
66+
break
67+
fi
68+
4369
# Step 1: Update deployment strategy to Recreate and set sm-app image to refactor image
4470
echo "[Step 1]. Updating deployment $DEPLOYMENT strategy to Recreate and setting sm-app image to $REFACTOR_IMAGE..."
4571
tess kubectl --context="$CLUSTER" -n "$NAMESPACE" patch deployment "$DEPLOYMENT" --type='json' -p='[
@@ -101,6 +127,7 @@ for i in $(seq "$start_idx" "$DEPLOYMENT_COUNT"); do
101127
done
102128

103129
echo "[Step 6]. Deployment $DEPLOYMENT processed successfully."
130+
process_cnt=$((process_cnt + 1))
104131
done
105132

106133
echo "All pods processed successfully."

src/lib/logstore/log_dev.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -908,8 +908,6 @@ void LogDevMetadata::refactor_superblk(const std::vector< std::pair< logstore_id
908908
store_sb.m_first_seq_num);
909909
logstore_superblk::init(sb_area[store_id], store_sb.m_first_seq_num);
910910
}
911-
m_sb.write();
912-
LOGINFO("Refactored logdev_superblk written to disk, log_dev={}", new_sb.logdev_id);
913911

914912
// check if refactor is successful
915913
new_logdev_superblk* test_sb = reinterpret_cast< new_logdev_superblk* >(m_sb.raw_buf()->bytes());
@@ -921,11 +919,15 @@ void LogDevMetadata::refactor_superblk(const std::vector< std::pair< logstore_id
921919
if (test_store_sb[store_id].m_first_seq_num != store_sb.m_first_seq_num) {
922920
LOGERROR("Refactored logdev superblk verification failed for store id {}, expected is {}, actual is {}",
923921
store_id, store_sb.m_first_seq_num, test_store_sb[store_id].m_first_seq_num);
922+
RELEASE_ASSERT(false, "Refactored logdev superblk verification failed");
924923
} else {
925924
LOGINFO("Refactored logdev={} superblk verification succeeded for store id {}, lsn={}", test_sb->logdev_id,
926925
store_id, test_store_sb[store_id].m_first_seq_num);
927926
}
928927
}
928+
m_sb.write();
929+
LOGINFO("Refactored logdev_superblk written to disk, log_dev={}", new_sb.logdev_id);
930+
929931
}
930932

931933
logstore_id_t LogDevMetadata::reserve_store(bool persist_now) {

0 commit comments

Comments
 (0)