Skip to content

Commit c6d9749

Browse files
committed
RD-603 replication scanner
1 parent f70e7f1 commit c6d9749

File tree

1 file changed

+160
-0
lines changed

1 file changed

+160
-0
lines changed

s3-replication-scanner.sh

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#!/bin/bash
2+
3+
# DOCUMENTATION:
4+
# ./s3-replication-scanner.sh
5+
# Environment variable for replication status filter
6+
# - REPLICATION_STATUS_FILTER: Filter objects by status (default: "PENDING")
7+
# - Valid values: "PENDING", "FAILED", "COMPLETED", "" (empty for all)
8+
# - HOST: Metastore host endpoint (default: "localhost:9000")
9+
# - MAX_KEYS: Maximum keys per API request (default: 1000)
10+
# - BUCKET_LIST: Comma-separated bucket names to process (optional)
11+
12+
13+
REPLICATION_STATUS_FILTER=${REPLICATION_STATUS_FILTER:-"PENDING"}
14+
15+
HOST=${HOST:-"localhost:9000"}
16+
MAX_KEYS=${MAX_KEYS:-1000}
17+
BUCKET_LIST=${BUCKET_LIST:-""}
18+
19+
# Counters for summary
20+
BUCKETS_PROCESSED=0
21+
BUCKETS_FAILED=0
22+
OBJECTS_FOUND=0
23+
24+
echo "Filtering objects with replication status: '$REPLICATION_STATUS_FILTER'"
25+
echo "Using host: $HOST"
26+
echo "Max keys per request: $MAX_KEYS"
27+
if [ ! -z "$BUCKET_LIST" ]; then
28+
echo "Using provided bucket list: $BUCKET_LIST"
29+
fi
30+
echo "=================================="
31+
32+
# Get list of buckets - either from parameter or fetch all
33+
if [ ! -z "$BUCKET_LIST" ]; then
34+
# Convert comma-separated list to space-separated
35+
buckets=$(echo "$BUCKET_LIST" | tr ',' ' ')
36+
echo "Using provided buckets: $buckets"
37+
else
38+
# Get list of all buckets
39+
echo "Fetching bucket list from metastore..."
40+
buckets=$(curl -f "$HOST/default/metastore" 2>/dev/null | jq -r '.[].key' 2>/dev/null)
41+
curl_exit_code=$?
42+
43+
if [ $curl_exit_code -ne 0 ]; then
44+
echo "ERROR: Failed to connect to metastore at $HOST (curl exit code: $curl_exit_code)"
45+
echo "Partial results: Unable to fetch bucket list"
46+
exit 1
47+
fi
48+
49+
if [ -z "$buckets" ]; then
50+
echo "WARNING: No buckets found in metastore"
51+
exit 0
52+
fi
53+
echo "Found buckets from metastore"
54+
fi
55+
56+
# Function to check if bucket has replication enabled
57+
has_replication_enabled() {
58+
local bucket=$1
59+
# Remove db/ prefix if present
60+
local clean_bucket="${bucket#db/}"
61+
62+
local replication_config=$(curl -f "$HOST/default/attributes/$clean_bucket" 2>/dev/null | jq '.replicationConfiguration' 2>/dev/null)
63+
local curl_exit_code=$?
64+
65+
if [ $curl_exit_code -ne 0 ]; then
66+
echo " WARNING: Failed to fetch attributes for bucket $clean_bucket (curl exit code: $curl_exit_code)" >&2
67+
return 2 # error state
68+
fi
69+
70+
if [ "$replication_config" != "null" ] && [ "$replication_config" != "" ]; then
71+
return 0 # has replication config
72+
else
73+
return 1 # no replication config
74+
fi
75+
}
76+
77+
# Function to get objects with specific replication status (with pagination support)
78+
get_objects_with_status() {
79+
local bucket=$1
80+
local status_filter=$2
81+
local marker=$3
82+
83+
# Remove db/ prefix if present
84+
local clean_bucket="${bucket#db/}"
85+
86+
# Build URL with pagination parameters
87+
local url="$HOST/default/bucket/$clean_bucket?maxKeys=$MAX_KEYS"
88+
if [ ! -z "$marker" ]; then
89+
url="$url&marker=$marker"
90+
fi
91+
92+
echo " Fetching from: $url" >&2
93+
94+
# Get objects in the bucket with pagination
95+
local response=$(curl -f "$url" 2>/dev/null)
96+
local curl_exit_code=$?
97+
98+
if [ $curl_exit_code -ne 0 ]; then
99+
echo " ERROR: Failed to fetch objects from bucket $clean_bucket (curl exit code: $curl_exit_code)" >&2
100+
return 1
101+
fi
102+
103+
local objects=$(echo "$response" | jq -r '.Contents[]?' 2>/dev/null)
104+
local is_truncated=$(echo "$response" | jq -r '.IsTruncated // false' 2>/dev/null)
105+
local last_key=$(echo "$objects" | jq -r '.key' | tail -n 1 2>/dev/null)
106+
107+
if [ -z "$objects" ]; then
108+
echo " No objects found in bucket $clean_bucket" >&2
109+
return 0
110+
fi
111+
112+
# Filter objects by replication status
113+
local filtered=$(echo "$objects" | jq -r --arg status "$status_filter" '
114+
select(.value | fromjson | .replicationInfo.status == $status) |
115+
(.key | split("\u0000")) as $key_parts |
116+
{
117+
bucket: "'$clean_bucket'",
118+
key: ($key_parts[0] // .key),
119+
versionId: ($key_parts[1] // ""),
120+
replicationStatus: (.value | fromjson | .replicationInfo.status)
121+
}
122+
' 2>/dev/null)
123+
124+
# Output filtered results if any
125+
if [ ! -z "$filtered" ]; then
126+
echo "$filtered"
127+
fi
128+
129+
# Continue pagination if truncated
130+
if [ "$is_truncated" = "true" ] && [ ! -z "$last_key" ]; then
131+
echo " More results available, next marker: $last_key" >&2
132+
get_objects_with_status "$bucket" "$status_filter" "$last_key"
133+
return $?
134+
fi
135+
136+
return 0
137+
}
138+
139+
# Main logic
140+
echo "Buckets with replication enabled:"
141+
for bucket in $buckets; do
142+
if has_replication_enabled "$bucket"; then
143+
# Remove db/ prefix for display
144+
clean_bucket="${bucket#db/}"
145+
echo "- $clean_bucket"
146+
147+
# Get objects with the specified replication status
148+
filtered_objects=$(get_objects_with_status "$bucket" "$REPLICATION_STATUS_FILTER")
149+
150+
if [ ! -z "$filtered_objects" ]; then
151+
echo " Objects with replication status '$REPLICATION_STATUS_FILTER':"
152+
echo "$filtered_objects" | jq -r '
153+
" Bucket: " + .bucket + " | Key: " + .key + " | VersionId: " + .versionId + " | Status: " + .replicationStatus
154+
'
155+
else
156+
echo " No objects with replication status '$REPLICATION_STATUS_FILTER'"
157+
fi
158+
echo ""
159+
fi
160+
done

0 commit comments

Comments
 (0)