Skip to content

Commit fd804d9

Browse files
committed
Make fetch_osc more fault-aware.
1 parent 77030fd commit fd804d9

File tree

1 file changed

+54
-6
lines changed

1 file changed

+54
-6
lines changed

src/bin/fetch_osc.sh

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ fi
2727
REPLICATE_ID=$1
2828
SOURCE_DIR=$2
2929
LOCAL_DIR=$3
30-
SLEEP_BETWEEN_DLS=${4:-15} # How long to sleep between download attempts (sec). Default: 15
30+
SLEEP_BETWEEN_DLS=15 # How long to sleep between download attempts (sec). Default: 15. See also FAIL_COUNTER_ALERT
31+
FAIL_COUNTER_ALERT=20 # After how many sleep cycles do we get nervous
32+
FAIL_COUNTER=0
33+
FILE_PANIC=
3134

3235
if [[ ! -d $LOCAL_DIR ]];
3336
then {
@@ -44,6 +47,8 @@ fetch_file()
4447

4548
retry_fetch_file()
4649
{
50+
FILE_PANIC=
51+
FAIL_COUNTER=0
4752
if [[ ! -s "$2" ]]; then {
4853
fetch_file "$1" "$2"
4954
if [[ "$3" == "gzip" ]]; then {
@@ -53,7 +58,8 @@ retry_fetch_file()
5358
}; fi
5459
}; fi
5560
}; fi
56-
until [[ -s "$2" ]]; do {
61+
until [[ -s "$2" || $FAIL_COUNTER -ge $FAIL_COUNTER_ALERT ]]; do {
62+
FAIL_COUNTER=$(($FAIL_COUNTER+1))
5763
sleep $SLEEP_BETWEEN_DLS
5864
fetch_file "$1" "$2"
5965
if [[ "$3" == "gzip" ]]; then {
@@ -63,12 +69,48 @@ retry_fetch_file()
6369
}; fi
6470
}; fi
6571
}; done
72+
if [[ $FAIL_COUNTER -ge $FAIL_COUNTER_ALERT ]]; then
73+
FILE_PANIC="true"
74+
fi
75+
};
76+
77+
file_panic()
78+
{
79+
echo "fetch_osc()@"`date "+%F %T"`": upstream_delay $REPLICATE_ID" >>$LOCAL_DIR/fetch_osc.log
80+
REPLICATE_ID=$(($REPLICATE_ID - 1))
81+
82+
printf -v TDIGIT3 %03u $(($REPLICATE_ID % 1000))
83+
ARG=$(($REPLICATE_ID / 1000))
84+
printf -v TDIGIT2 %03u $(($ARG % 1000))
85+
ARG=$(($ARG / 1000))
86+
printf -v TDIGIT1 %03u $ARG
87+
88+
FILE_PANIC="true"
89+
until [[ ! -n $FILE_PANIC ]]; do {
90+
retry_fetch_file "$REMOTE_PATH/$TDIGIT3.osc.gz" "$LOCAL_PATH/$TDIGIT3.new.osc.gz" "gzip"
91+
}; done
92+
FILE_PANIC="true"
93+
until [[ ! -n $FILE_PANIC ]]; do {
94+
retry_fetch_file "$REMOTE_PATH/$TDIGIT3.state.txt" "$LOCAL_PATH/$TDIGIT3.new.state.txt" "text"
95+
}; done
96+
97+
RES_GZIP=`diff -q "$LOCAL_PATH/$TDIGIT3.osc.gz" "$LOCAL_PATH/$TDIGIT3.new.osc.gz"`
98+
RES_TEXT=`diff -q "$LOCAL_PATH/$TDIGIT3.state.txt" "$LOCAL_PATH/$TDIGIT3.new.state.txt"`
99+
if [[ -n $RES_GZIP || -n $RES_TEXT ]]; then
100+
echo "fetch_osc()@"`date "+%F %T"`": file_panic $REPLICATE_ID" >>$LOCAL_DIR/fetch_osc.log
101+
echo "fetch_osc()@"`date "+%F %T"`": $RES_GZIP" >>$LOCAL_DIR/fetch_osc.log
102+
echo "fetch_osc()@"`date "+%F %T"`": $RES_TEXT" >>$LOCAL_DIR/fetch_osc.log
103+
exit 1
104+
fi
105+
106+
rm "$LOCAL_PATH/$TDIGIT3.new.osc.gz"
107+
rm "$LOCAL_PATH/$TDIGIT3.new.state.txt"
66108
};
67109

68110
fetch_minute_diff()
69111
{
70-
printf -v TDIGIT3 %03u $(($1 % 1000))
71-
ARG=$(($1 / 1000))
112+
printf -v TDIGIT3 %03u $(($REPLICATE_ID % 1000))
113+
ARG=$(($REPLICATE_ID / 1000))
72114
printf -v TDIGIT2 %03u $(($ARG % 1000))
73115
ARG=$(($ARG / 1000))
74116
printf -v TDIGIT1 %03u $ARG
@@ -78,7 +120,13 @@ fetch_minute_diff()
78120
mkdir -p "$LOCAL_DIR/$TDIGIT1/$TDIGIT2"
79121

80122
retry_fetch_file "$REMOTE_PATH/$TDIGIT3.osc.gz" "$LOCAL_PATH/$TDIGIT3.osc.gz" "gzip"
123+
if [[ -n $FILE_PANIC ]]; then
124+
file_panic
125+
fi
81126
retry_fetch_file "$REMOTE_PATH/$TDIGIT3.state.txt" "$LOCAL_PATH/$TDIGIT3.state.txt" "text"
127+
if [[ -n $FILE_PANIC ]]; then
128+
file_panic
129+
fi
82130

83131
TIMESTAMP_LINE=`grep timestamp $LOCAL_DIR/$TDIGIT1/$TDIGIT2/$TDIGIT3.state.txt`
84132
TIMESTAMP=${TIMESTAMP_LINE:10}
@@ -88,8 +136,8 @@ while [[ true ]];
88136
do
89137
{
90138
REPLICATE_ID=$(($REPLICATE_ID + 1))
91-
fetch_minute_diff $REPLICATE_ID
92-
sleep 1
139+
fetch_minute_diff
93140
echo "fetch_osc()@"`date "+%F %T"`": new_replicate_diff $REPLICATE_ID $TIMESTAMP" >>$LOCAL_DIR/fetch_osc.log
141+
sleep 1
94142
};
95143
done

0 commit comments

Comments
 (0)