@@ -19,6 +19,7 @@ script_name="$(basename "$(readlink -f "${BASH_SOURCE[0]}")")"
1919
2020# initial default values
2121APP_NAME=" RDF DB Filler"
22+ BATCH_SIZE_DEFAULT=5000
2223CLONE_URL_OKH_ONTOLOGY=" https://github.com/iop-alliance/OpenKnowHow.git"
2324CLONE_URL_OLD_DATA=" https://gitlab.opensourceecology.de/verein/projekte/okh/data.git"
2425CLONE_URL_EXP_DATA=
" [email protected] :OSEGermany/OKH-data-experimental.git" @@ -27,6 +28,7 @@ data_url="$CLONE_URL_OLD_DATA"
2728data_branch=" main"
2829cleanup=false
2930online=true
31+ batch_size=" $BATCH_SIZE_DEFAULT "
3032
3133function print_help() {
3234
@@ -37,6 +39,8 @@ function print_help() {
3739 echo " Options:"
3840 echo " -h, --help"
3941 echo " Print this usage help and exit"
42+ echo " --batch-size <NUMBER>"
43+ echo " The number of Turtle fiels to load into the DB at once [default: $BATCH_SIZE_DEFAULT ]"
4044 echo " --offline"
4145 echo " Do not try to fetch git repos"
4246 echo " --experimental"
@@ -54,7 +58,7 @@ function print_help() {
5458 echo " $script_name --help"
5559 echo " $script_name --experimental"
5660 echo " $script_name --custom-data \" $CLONE_URL_OLD_DATA \" main"
57- echo " $script_name --local-data /data"
61+ echo " $script_name --local-data /data --batch-size 1000 "
5862 echo " $script_name --samples"
5963}
6064
7074 print_help
7175 exit 0
7276 ;;
77+ --batch-size)
78+ batch_size=" $1 "
79+ shift
80+ ;;
7381 -o|--offline)
7482 online=false
7583 ;;
@@ -187,7 +195,7 @@ find \
187195 " $data_ttls_root " \
188196 -name " *.ttl" \
189197 > " $ttl_list_file "
190- time while mapfile -t -n 10000 batch && (( ${# batch[@]} ))
198+ time while mapfile -t -n " $batch_size " batch && (( ${# batch[@]} ))
191199do
192200 " $jena_db_data_injector " \
193201 --loc " $db_tmp_dir " \
0 commit comments