1+ #! /bin/sh
2+ # counter_weekly.sh
3+
4+ # This script iterates through all published Datasets in all Dataverses and calls the Make Data Count API to update their citations from DataCite
5+ # Note: Requires curl and jq for parsing JSON responses form curl
6+
7+ # A recursive method to process each Dataverse
8+ processDV () {
9+ echo " Processing Dataverse ID#: $1 "
10+
11+ # Call the Dataverse API to get the contents of the Dataverse (without credentials, this will only list published datasets and dataverses
12+ DVCONTENTS=$( curl -s http://localhost:8080/api/dataverses/$1 /contents)
13+
14+ # Iterate over all datasets, pulling the value of their DOIs (as part of the persistentUrl) from the json returned
15+ for subds in $( echo " ${DVCONTENTS} " | jq -r ' .data[] | select(.type == "dataset") | .persistentUrl' ) ; do
16+
17+ # The authority/identifier are preceded by a protocol/host, i.e. https://doi.org/
18+ DOI=` expr " $subds " : ' .*:\/\/\doi\.org\/\(.*\)' `
19+
20+ # Call the Dataverse API for this dataset and capture both the response and HTTP status code
21+ HTTP_RESPONSE=$( curl -s -w " \n%{http_code}" -X POST " http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI " )
22+
23+ # Extract the HTTP status code from the last line
24+ HTTP_STATUS=$( echo " $HTTP_RESPONSE " | tail -n1)
25+ # Extract the response body (everything except the last line)
26+ RESPONSE_BODY=$( echo " $HTTP_RESPONSE " | sed ' $d' )
27+
28+ # Check the HTTP status code and report accordingly
29+ case $HTTP_STATUS in
30+ 200)
31+ # Successfully queued
32+ # Extract status from the nested data object
33+ STATUS=$( echo " $RESPONSE_BODY " | jq -r ' .data.status' )
34+
35+ # Extract message from the nested data object
36+ if echo " $RESPONSE_BODY " | jq -e ' .data.message' > /dev/null 2>&1 && [ " $( echo " $RESPONSE_BODY " | jq -r ' .data.message' ) " != " null" ]; then
37+ MESSAGE=$( echo " $RESPONSE_BODY " | jq -r ' .data.message' )
38+ echo " [SUCCESS] doi:$DOI - $STATUS : $MESSAGE "
39+ else
40+ # If message is missing or null, just show the status
41+ echo " [SUCCESS] doi:$DOI - $STATUS : Citation update queued"
42+ fi
43+ ;;
44+ 400)
45+ # Bad request
46+ if echo " $RESPONSE_BODY " | jq -e ' .message' > /dev/null 2>&1 ; then
47+ ERROR=$( echo " $RESPONSE_BODY " | jq -r ' .message' )
48+ echo " [ERROR 400] doi:$DOI - Bad request: $ERROR "
49+ else
50+ echo " [ERROR 400] doi:$DOI - Bad request"
51+ fi
52+ ;;
53+ 404)
54+ # Not found
55+ if echo " $RESPONSE_BODY " | jq -e ' .message' > /dev/null 2>&1 ; then
56+ ERROR=$( echo " $RESPONSE_BODY " | jq -r ' .message' )
57+ echo " [ERROR 404] doi:$DOI - Not found: $ERROR "
58+ else
59+ echo " [ERROR 404] doi:$DOI - Not found"
60+ fi
61+ ;;
62+ 503)
63+ # Service unavailable (queue full)
64+ if echo " $RESPONSE_BODY " | jq -e ' .message' > /dev/null 2>&1 ; then
65+ ERROR=$( echo " $RESPONSE_BODY " | jq -r ' .message' )
66+ echo " [ERROR 503] doi:$DOI - Service unavailable: $ERROR "
67+ elif echo " $RESPONSE_BODY " | jq -e ' .data.message' > /dev/null 2>&1 ; then
68+ ERROR=$( echo " $RESPONSE_BODY " | jq -r ' .data.message' )
69+ echo " [ERROR 503] doi:$DOI - Service unavailable: $ERROR "
70+ else
71+ echo " [ERROR 503] doi:$DOI - Service unavailable: Queue is full"
72+ fi
73+ ;;
74+ * )
75+ # Other error
76+ echo " [ERROR $HTTP_STATUS ] doi:$DOI - Unexpected error"
77+ echo " Response: $RESPONSE_BODY "
78+ ;;
79+ esac
80+
81+ done
82+
83+ # Now iterate over any child Dataverses and recursively process them
84+ for subdv in $( echo " ${DVCONTENTS} " | jq -r ' .data[] | select(.type == "dataverse") | .id' ) ; do
85+ echo $subdv
86+ processDV $subdv
87+ done
88+
89+ }
90+
91+ # Call the function on the root dataverse to start processing
92+ processDV 1
0 commit comments