Skip to content

Commit 7643b03

Browse files
committed
Allow to configure --batch-size
1 parent f404ebb commit 7643b03

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

run/fill-db

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ script_name="$(basename "$(readlink -f "${BASH_SOURCE[0]}")")"
1919

2020
# initial default values
2121
APP_NAME="RDF DB Filler"
22+
BATCH_SIZE_DEFAULT=5000
2223
CLONE_URL_OKH_ONTOLOGY="https://github.com/iop-alliance/OpenKnowHow.git"
2324
CLONE_URL_OLD_DATA="https://gitlab.opensourceecology.de/verein/projekte/okh/data.git"
2425
CLONE_URL_EXP_DATA="[email protected]:OSEGermany/OKH-data-experimental.git"
@@ -27,6 +28,7 @@ data_url="$CLONE_URL_OLD_DATA"
2728
data_branch="main"
2829
cleanup=false
2930
online=true
31+
batch_size="$BATCH_SIZE_DEFAULT"
3032

3133
function print_help() {
3234

@@ -37,6 +39,8 @@ function print_help() {
3739
echo "Options:"
3840
echo " -h, --help"
3941
echo " Print this usage help and exit"
42+
echo " --batch-size <NUMBER>"
43+
echo " The number of Turtle fiels to load into the DB at once [default: $BATCH_SIZE_DEFAULT]"
4044
echo " --offline"
4145
echo " Do not try to fetch git repos"
4246
echo " --experimental"
@@ -54,7 +58,7 @@ function print_help() {
5458
echo " $script_name --help"
5559
echo " $script_name --experimental"
5660
echo " $script_name --custom-data \"$CLONE_URL_OLD_DATA\" main"
57-
echo " $script_name --local-data /data"
61+
echo " $script_name --local-data /data --batch-size 1000"
5862
echo " $script_name --samples"
5963
}
6064

@@ -70,6 +74,10 @@ do
7074
print_help
7175
exit 0
7276
;;
77+
--batch-size)
78+
batch_size="$1"
79+
shift
80+
;;
7381
-o|--offline)
7482
online=false
7583
;;
@@ -187,7 +195,7 @@ find \
187195
"$data_ttls_root" \
188196
-name "*.ttl" \
189197
> "$ttl_list_file"
190-
time while mapfile -t -n 10000 batch && ((${#batch[@]}))
198+
time while mapfile -t -n "$batch_size" batch && ((${#batch[@]}))
191199
do
192200
"$jena_db_data_injector" \
193201
--loc "$db_tmp_dir" \

0 commit comments

Comments
 (0)