Skip to content

Commit fa68e93

Browse files
committed
fix load_data script for increasing ingestion speed
1 parent 7d9b55e commit fa68e93

File tree

1 file changed

+5
-29
lines changed

1 file changed

+5
-29
lines changed

quickwit/load_data.sh

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,48 +20,24 @@ ERROR_LOG="$4"
2020
QW_CMD="$PWD/quickwit"
2121

2222
echo "Prepare clean index: jsonbench"
23-
$QW_CMD index create --index-config ./config/index-config.yaml --overwrite --yes
24-
25-
# Create a temporary directory for uncompressed files
26-
TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX)
27-
trap "rm -rf $TEMP_DIR" EXIT # Cleanup temp directory on script exit
23+
./quickwit index create --index-config ./config/index-config.yaml --overwrite --yes
2824

2925
pushd $DATA_DIRECTORY
3026
counter=0
3127
for file in $(ls *.json.gz | head -n $MAX_FILES); do
32-
echo "Processing file: $file"
33-
34-
# Uncompress the file into the TEMP_DIR
35-
uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")"
36-
gunzip -c "$file" > "$uncompressed_file"
37-
38-
if [[ $? -ne 0 ]]; then
39-
echo "Error: Failed to uncompress $file" >> "$ERROR_LOG"
40-
continue
41-
fi
42-
43-
$QW_CMD tool local-ingest \
44-
--index jsonbench \
45-
--input-path "$uncompressed_file"
46-
47-
first_attempt=$?
48-
if [[ $first_attempt -eq 0 ]]; then
49-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $file." >> "$SUCCESS_LOG"
50-
else
51-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed for $file. Giving up." >> "$ERROR_LOG"
52-
fi
28+
gunzip -c "$file"
5329

5430
counter=$((counter + 1))
5531
if [[ $counter -ge $MAX_FILES ]]; then
5632
break
5733
fi
58-
done
34+
done | $QW_CMD tool local-ingest --index jsonbench
5935
popd
6036

6137
# See https://github.com/quickwit-oss/quickwit/issues/4869
62-
echo "Wating 60 secs for Quickwit to commit the data"
38+
echo "Wait 1 min for Quickwit search become available"
6339
sleep 60
6440

65-
$QW_CMD tool gc --index jsonbench
41+
./quickwit tool gc --index jsonbench
6642

6743
echo -e "\nLoaded $MAX_FILES data files from $DATA_DIRECTORY to Quickwit."

0 commit comments

Comments
 (0)