@@ -15,30 +15,63 @@ log() {
1515run_script () {
1616 local script_name=$1
1717 local description=$2
18+ local criticality=$3 # "critical" or "optional"
1819
1920 log " Starting: $description "
2021
2122 if python " $script_name " ; then
2223 log " Completed: $description "
2324 else
2425 log " Error: $description failed"
25- exit 1
26+ if [ " $criticality " = " critical" ]; then
27+ log " Critical error - stopping workflow"
28+ exit 1
29+ else
30+ log " Non-critical error - continuing workflow"
31+ fi
2632 fi
2733}
2834
2935# Create logs directory if it doesn't exist
3036mkdir -p /app/logs
3137
38+ # Debug environment
39+ log " Environment check:"
40+ log " CKAN_API_KEY: $( if [ -n " $CKAN_API_KEY " ]; then echo " Set (${# CKAN_API_KEY} chars)" ; else echo " NOT SET" ; fi) "
41+ log " PYTHONPATH: $PYTHONPATH "
42+ log " Working directory: $( pwd) "
43+ log " Files in directory: $( ls -la) "
44+
3245log " Starting CKAN metadata workflow..."
3346
34- # Run scripts in sequential order
35- run_script " 1getSitesURL.py" " Fetching CKAN portal URLs"
36- run_script " 2CKANActionAPI.py" " Fetching metadata using CKAN API"
37- run_script " 31downloadDataset.py" " Downloading existing dataset"
38- run_script " 32merger.py" " Merging datasets"
39- run_script " 33delete.py" " Deleting old dataset"
40- run_script " 3updateSitesCatalog.py" " Updating sites catalog"
41- run_script " 4uploadDataset.py" " Uploading final dataset"
47+ # Run diagnostic script first
48+ log " Running diagnostics..."
49+ if python debug_api_access.py; then
50+ log " Diagnostics completed"
51+ else
52+ log " Diagnostics had issues - proceeding anyway"
53+ fi
54+
55+ # Run scripts in sequential order with better error handling
56+ # Skip 1getSitesURL.py since we now use a static sites_urls.csv file
57+ log " Using pre-existing sites_urls.csv file"
58+
59+ # Check if the static CSV file exists
60+ if [ ! -f " sites_urls.csv" ]; then
61+ log " ERROR: sites_urls.csv file not found!"
62+ exit 1
63+ fi
64+
65+ # Show info about the CSV file
66+ csv_lines=$( wc -l < sites_urls.csv)
67+ log " Found sites_urls.csv with $csv_lines lines"
68+
69+ run_script " 2CKANActionAPI.py" " Fetching metadata using CKAN API" " critical"
70+ run_script " 31downloadDataset.py" " Downloading existing dataset" " optional"
71+ run_script " 32merger.py" " Merging datasets" " optional"
72+ run_script " 33delete.py" " Deleting old dataset" " optional"
73+ run_script " 3updateSitesCatalog.py" " Updating sites catalog" " optional"
74+ run_script " 4uploadDataset.py" " Uploading final dataset" " optional"
4275
4376log " CKAN metadata workflow completed successfully!"
4477
0 commit comments