Skip to content

DataPusher+ Testing Run #13

DataPusher+ Testing Run

DataPusher+ Testing Run #13

Workflow file for this run

name: DataPusher+ Testing Run
on:
workflow_dispatch:
inputs:
datapusher_branch:
description: 'DataPusher+ branch or commit'
required: false
default: 'main'
type: string
testing_directory:
description: 'testing files directory (ignored if test_mode=urls)'
required: true
default: 'quick'
type: string
test_mode:
description: 'dir (use tests/<dir>) or urls (use pasted links)'
required: false
default: 'dir'
type: choice
options: [dir, urls]
test_urls:
description: "When test_mode=urls: paste one URL per line. Optional inline format 'URL|FORMAT|NAME'."
required: false
default: ''
type: string
env:
FILES_DIR: ${{ github.event.inputs.testing_directory || 'quick' }}
TEST_MODE: ${{ github.event.inputs.test_mode || 'dir' }}
TEST_URLS: ${{ github.event.inputs.test_urls }}
DATAPUSHER_BRANCH: ${{ github.event.inputs.datapusher_branch || 'main' }}
CKAN_VERSION: "2.11"
POSTGRES_PASSWORD: postgres
CKAN_DB_PASSWORD: pass
CKAN_SITE_URL: http://localhost:5000
CKAN_SITE_ID: default
CKAN_SITE_TITLE: "CKAN Test Instance"
jobs:
setup:
runs-on: ubuntu-latest
container:
image: ckan/ckan-dev:2.11
options: --user root
services:
solr:
image: ckan/ckan-solr:2.11-solr9
ports: ["8983:8983"]
postgres:
image: ckan/ckan-postgres-dev:2.11
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
redis:
image: redis:3
ports: ["6379:6379"]
env:
CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test
CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test
CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test
CKAN_SOLR_URL: http://solr:8983/solr/ckan
CKAN_REDIS_URL: redis://redis:6379/1
CKAN_SITE_URL: http://localhost:5000
steps:
- name: Fix permissions and install essential tools
run: |
mkdir -p /__w/_temp
chmod -R 777 /__w/_temp
chmod -R 777 /__w/
apt-get update -y
apt-get install -y curl wget net-tools procps postgresql-client jq
echo "Essential tools installed successfully"
- uses: actions/checkout@v4
- name: Wait for PostgreSQL to be ready
run: |
echo "Waiting for PostgreSQL to be ready..."
timeout=90
while [ $timeout -gt 0 ]; do
if PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "SELECT 1;" >/dev/null 2>&1; then
echo "PostgreSQL is ready!"
break
fi
echo "Postgres not ready yet ($timeout s left)..."
sleep 3
timeout=$((timeout-3))
done
if [ $timeout -le 0 ]; then
echo "Timeout waiting for PostgreSQL"
exit 1
fi
- name: Setup database users and permissions
run: |
set -eu
echo "Creating database users (if not exist)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='ckan_default'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_write'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_write WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_read'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_read WITH PASSWORD '$CKAN_DB_PASSWORD';"
echo "Creating databases (if not exist)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='ckan_test'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE ckan_test OWNER ckan_default;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='datastore_test'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE datastore_test OWNER ckan_default;"
echo "Granting permissions (best-effort)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE ckan_test TO ckan_default;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE datastore_test TO datastore_write;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT CONNECT ON DATABASE datastore_test TO datastore_read;"
echo "Database setup completed"
- name: Install requirements, ckanapi and datapusher-plus
run: |
set -eu
python3 -m pip install --upgrade pip setuptools wheel
if [ -f requirements.txt ]; then
pip install -r requirements.txt
fi
if [ -f requirements-dev.txt ]; then
pip install -r requirements-dev.txt
fi
if [ -f setup.py ] || [ -f pyproject.toml ]; then
pip install -e .
fi
pip install --upgrade ckanapi
pip install datasize
apt install -y python3-virtualenv python3-dev python3-pip python3-wheel build-essential libxslt1-dev libxml2-dev zlib1g-dev git libffi-dev libpq-dev uchardet unzip
echo "Installing datapusher-plus from branch: $DATAPUSHER_BRANCH"
pip install -e "git+https://github.com/dathere/datapusher-plus.git@$DATAPUSHER_BRANCH#egg=datapusher-plus"
pip install -e 'git+https://github.com/ckan/ckanext-scheming.git#egg=ckanext-scheming'
echo "Installed ckanapi and datapusher-plus (best-effort)"
- name: Install qsv (musl static)
run: |
set -eu
echo "Attempting to download static qsv musl binary (best-effort)..."
QSV_VER="7.1.0"
QSV_ZIP="qsv-${QSV_VER}-x86_64-unknown-linux-musl.zip"
QSV_URL="https://github.com/dathere/qsv/releases/download/${QSV_VER}/${QSV_ZIP}"
mkdir -p /tmp/qsv && cd /tmp/qsv
if wget -q --spider "$QSV_URL"; then
wget -q "$QSV_URL" -O "$QSV_ZIP"
unzip -o "$QSV_ZIP"
if [ -f qsvdp ]; then
mv qsvdp /usr/local/bin/qsvdp
chmod +x /usr/local/bin/qsvdp
echo "Installed qsvdp to /usr/local/bin/qsvdp"
elif [ -f qsv ]; then
mv qsv /usr/local/bin/qsv
chmod +x /usr/local/bin/qsv
echo "Installed qsv to /usr/local/bin/qsv"
else
echo "Downloaded archive but could not find qsv binary inside"
fi
else
echo "qsv release URL not reachable; skipping qsv install"
fi
/usr/local/bin/qsvdp --version >/dev/null 2>&1 || /usr/local/bin/qsv --version >/dev/null 2>&1 || echo "qsv not installed or not runnable (this is okay for plugin presence test)."
- name: Setup CKAN configuration (/srv/app/src/ckan/test-core.ini)
run: |
set -eu
sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini
if ! grep -q "^solr_url" /srv/app/src/ckan/test-core.ini; then
echo "solr_url = ${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" >> /srv/app/src/ckan/test-core.ini
fi
if ! grep -q "^ckan.redis.url" /srv/app/src/ckan/test-core.ini; then
echo "ckan.redis.url = ${CKAN_REDIS_URL:-redis://redis:6379/1}" >> /srv/app/src/ckan/test-core.ini
fi
CKAN_SITE_URL="${CKAN_SITE_URL:-http://localhost:5000}"
CKAN_SQLALCHEMY_URL="${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}"
CKAN_DATASTORE_WRITE_URL="${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}"
CKAN_DATASTORE_READ_URL="${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}"
CKAN_SOLR_URL="${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}"
CKAN_REDIS_URL="${CKAN_REDIS_URL:-redis://redis:6379/1}"
REPLACE_FILE="$(mktemp)"
ADD_FILE="$(mktemp)"
MISSING_ADD_FILE="$(mktemp)"
: > "$REPLACE_FILE"
: > "$ADD_FILE"
: > "$MISSING_ADD_FILE"
printf '%s\n' \
"ckan.site_url|${CKAN_SITE_URL}" \
"sqlalchemy.url|${CKAN_SQLALCHEMY_URL}" \
"ckan.datastore.write_url|${CKAN_DATASTORE_WRITE_URL}" \
"ckan.datastore.read_url|${CKAN_DATASTORE_READ_URL}" \
"solr_url|${CKAN_SOLR_URL}" \
"ckan.redis.url|${CKAN_REDIS_URL}" \
> "$REPLACE_FILE"
cat > "$ADD_FILE" <<'EOF'
ckan.site_id = default
ckan.site_title = CKAN Test
ckan.auth.create_default_api_keys = true
ckanext.datapusher_plus.qsv_bin = /usr/local/bin/qsvdp
scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml
scheming.presets = ckanext.scheming:presets.json
scheming.dataset_fallback = false
ckanext.datapusher_plus.use_proxy = false
ckanext.datapusher_plus.download_proxy =
ckanext.datapusher_plus.ssl_verify = false
ckanext.datapusher_plus.upload_log_level = INFO
ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip
ckanext.datapusher_plus.pii_screening = false
ckanext.datapusher_plus.pii_found_abort = false
ckanext.datapusher_plus.pii_regex_resource_id_or_alias =
ckanext.datapusher_plus.pii_show_candidates = false
ckanext.datapusher_plus.pii_quick_screen = false
ckanext.datapusher_plus.preview_rows = 100
ckanext.datapusher_plus.download_timeout = 300
ckanext.datapusher_plus.max_content_length = 1256000000000
ckanext.datapusher_plus.chunk_size = 16384
ckanext.datapusher_plus.default_excel_sheet = 0
ckanext.datapusher_plus.sort_and_dupe_check = true
ckanext.datapusher_plus.dedup = false
ckanext.datapusher_plus.unsafe_prefix = unsafe_
ckanext.datapusher_plus.reserved_colnames = _id
ckanext.datapusher_plus.prefer_dmy = false
ckanext.datapusher_plus.ignore_file_hash = true
ckanext.datapusher_plus.auto_index_threshold = 3
ckanext.datapusher_plus.auto_index_dates = true
ckanext.datapusher_plus.auto_unique_index = true
ckanext.datapusher_plus.summary_stats_options =
ckanext.datapusher_plus.add_summary_stats_resource = false
ckanext.datapusher_plus.summary_stats_with_preview = false
ckanext.datapusher_plus.qsv_stats_string_max_length = 32767
ckanext.datapusher_plus.qsv_dates_whitelist = date,time,due,open,close,created
ckanext.datapusher_plus.qsv_freq_limit = 10
ckanext.datapusher_plus.auto_alias = true
ckanext.datapusher_plus.auto_alias_unique = false
ckanext.datapusher_plus.copy_readbuffer_size = 1048576
ckanext.datapusher_plus.type_mapping = {"String": "text", "Integer": "numeric","Float": "numeric","DateTime": "timestamp","Date": "date","NULL": "text"}
ckanext.datapusher_plus.auto_spatial_simplication = true
ckanext.datapusher_plus.spatial_simplication_relative_tolerance = 0.1
ckanext.datapusher_plus.latitude_fields = latitude,lat
ckanext.datapusher_plus.longitude_fields = longitude,long,lon
ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache
ckanext.datapusher_plus.auto_unzip_one_file = true
EOF
if [ -f /srv/app/src/ckan/test-core.ini ]; then
echo "Patching selective keys in /srv/app/src/ckan/test-core.ini..."
awk 'BEGIN{in=0}
/^\[DEFAULT\]/{ print; in=1; next }
/^\[.*\]/{ if(in){ print "debug = true"; in=0 } }
{ if(in){ if($1 == "debug") next; print } else { print } }
END { if(in) print "debug = true" }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.tmp && mv /srv/app/src/ckan/test-core.ini.tmp /srv/app/src/ckan/test-core.ini
while IFS= read -r entry || [ -n "$entry" ]; do
key="$(printf '%s' "$entry" | cut -d'|' -f1)"
value="$(printf '%s' "$entry" | cut -d'|' -f2-)"
esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')"
if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini
else
printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
fi
done < "$REPLACE_FILE"
while IFS= read -r ln || [ -n "$ln" ]; do
[ -z "$ln" ] && continue
case "$ln" in
\#*)
if ! grep -Fq "$ln" /srv/app/src/ckan/test-core.ini; then
printf '%s\n' "$ln" >> "$MISSING_ADD_FILE"
fi
;;
*)
key="$(printf '%s' "$ln" | cut -d'=' -f1 | sed 's/[[:space:]]*$//')"
value="$(printf '%s' "$ln" | cut -d'=' -f2- | sed 's/^[[:space:]]*//')"
esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')"
if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini
else
printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
fi
;;
esac
done < "$ADD_FILE"
if [ -s "$MISSING_ADD_FILE" ]; then
awk -v addfile="$MISSING_ADD_FILE" '
BEGIN{ inserted=0; while ((getline line < addfile) > 0) { add[++na]=line } close(addfile) }
{ print; if(!inserted && $0=="[app:main]") { for(i=1;i<=na;i++) print add[i]; inserted=1 } }
END{ if(!inserted){ print "[app:main]"; for(i=1;i<=na;i++) print add[i] } }' \
/srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
fi
sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL}|g" /srv/app/src/ckan/test-core.ini
else
echo "/srv/app/src/ckan/test-core.ini not found — no selective patching performed."
fi
REQUIRED_PLUGINS="datastore datapusher_plus scheming_datasets"
if grep -q "^ckan.plugins" /srv/app/src/ckan/test-core.ini; then
echo "Appending required plugins to existing ckan.plugins line"
current=$(grep "^ckan.plugins" /srv/app/src/ckan/test-core.ini | head -n1 | cut -d'=' -f2-)
for p in $REQUIRED_PLUGINS; do
echo "$current" | grep -qw "$p" || current="$current $p"
done
awk -v new="ckan.plugins = $current" 'BEGIN{done=0} {if(!done && $1=="ckan.plugins") {print new; done=1} else print $0}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
else
echo "ckan.plugins = $REQUIRED_PLUGINS" >> /srv/app/src/ckan/test-core.ini
echo "Added ckan.plugins line with required plugins."
fi
echo "---- /srv/app/src/ckan/test-core.ini (cat) ----"
cat /srv/app/src/ckan/test-core.ini
echo "---- end ----"
- name: Initialize CKAN database
run: |
echo "Testing connectivity with CKAN DB user..."
if ! PGPASSWORD=$CKAN_DB_PASSWORD psql -h postgres -U ckan_default -d ckan_test -c "SELECT 1;" >/dev/null 2>&1; then
echo "Cannot connect as ckan_default. Attempting to create database owner and db..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER IF NOT EXISTS ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE IF NOT EXISTS ckan_test OWNER ckan_default;"
fi
echo "Running ckan db init (may be idempotent)..."
if ckan -c /srv/app/src/ckan/test-core.ini db init; then
echo "CKAN DB initialized."
else
echo "ckan db init returned non-zero; continuing (may already be initialized)."
fi
echo "Setting datastore permissions..."
if ckan -c /srv/app/src/ckan/test-core.ini datastore set-permissions | PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres --set ON_ERROR_STOP=1; then
echo "Datastore permissions set."
else
echo "Datastore permission step returned non-zero; continuing."
fi
- name: Start CKAN server
run: |
set -eu
echo "Starting CKAN server in background..."
nohup ckan -c /srv/app/src/ckan/test-core.ini run --host 0.0.0.0 --port 5000 --disable-reloader > /tmp/ckan_stdout.log 2>&1 &
CKAN_PID=$!
echo "CKAN PID=$CKAN_PID"
timeout=120
while [ $timeout -gt 0 ]; do
if ! kill -0 "$CKAN_PID" >/dev/null 2>&1; then
echo "CKAN process died. Showing last lines of log:"
tail -n 200 /tmp/ckan_stdout.log
exit 1
fi
if curl -fsS "${CKAN_SITE_URL}/api/3/action/status_show" >/dev/null 2>&1; then
echo "CKAN API responding"
break
fi
echo "Waiting for CKAN API... ($timeout s left)"
sleep 3
timeout=$((timeout-3))
done
if [ $timeout -le 0 ]; then
echo "Timeout waiting for CKAN to start. Dumping logs..."
tail -n 200 /tmp/ckan_stdout.log
ss -tlnp || netstat -tlnp
exit 1
fi
echo "CKAN started successfully"
- name: Create sysadmin user admin_ckan and get apikey
run: |
set -eu
echo "Creating user admin_ckan..."
user_response=$(ckanapi action user_create --config /srv/app/src/ckan/test-core.ini \
name=admin_ckan \
email=admins@example.com \
password=test1234 \
fullname="CKAN Administrator" \
with_apitoken=true \
about="Created by GitHub Actions test" 2>/dev/null) || echo "user_create returned non-zero (user may already exist)"
echo "User creation response: $user_response"
echo "Converting admin_ckan user to sysadmin..."
ckan -c /srv/app/src/ckan/test-core.ini sysadmin add admin_ckan
echo "User admin_ckan promoted to sysadmin"
json_response=$(echo "$user_response" | sed -n '/{/,/}/p')
api_key=$(echo "$json_response" | jq -r '.token // empty')
if [ -n "$api_key" ] && [ "$api_key" != "null" ] && [ "$api_key" != "empty" ]; then
echo "CKAN_API_KEY=$api_key" >> $GITHUB_ENV
echo "API key saved: $api_key"
else
echo "No API key found in response"
fi
echo "User admin_ckan creation completed"
- name: Create API token for datapusher-plus and add to config
run: |
set -eu
echo "Creating API token for datapusher-plus service account..."
echo "Running: ckan user token add admin_ckan dpplus"
dp_token_output=$(ckan -c /srv/app/src/ckan/test-core.ini user token add admin_ckan dpplus 2>&1)
echo "Full token creation output:"
echo "$dp_token_output"
dp_token=$(echo "$dp_token_output" | tail -n 1 | tr -d '\t')
echo "Extracted token: '$dp_token'"
if [ -n "$dp_token" ] && [ "$dp_token" != "null" ]; then
echo "Created datapusher-plus API token: $dp_token"
ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$dp_token"
echo "Verifying token was added to config:"
grep "ckanext.datapusher_plus.api_token" /srv/app/src/ckan/test-core.ini || echo "Token not found in config!"
echo "DATAPUSHER_PLUS_API_TOKEN=$dp_token" >> $GITHUB_ENV
echo "API token added to CKAN configuration successfully"
else
echo "Failed to create API token for datapusher-plus; using main CKAN API key as fallback..."
ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$CKAN_API_KEY"
fi
- name: Create organization with ckanapi
run: |
set -eu
echo "Creating organization demo-organization (idempotent)..."
ckanapi action organization_create --config /srv/app/src/ckan/test-core.ini \
name=demo-organization \
title="Demo Data Publishing Organization" \
description="Demo org created by GitHub Actions for datapusher-plus testing." || echo "organization_create returned non-zero (may already exist)"
echo "Add admin_ckan as admin to the organization"
ckanapi action organization_member_create --config /srv/app/src/ckan/test-core.ini \
id=demo-organization username=admin_ckan role=admin || echo "organization_member_create returned non-zero (may already be member)"
- name: Create dataset with ckanapi
run: |
set -eu
echo "Creating dataset my-first-dataset (idempotent)..."
if ckanapi action package_create \
name=my-first-dataset \
title="My First Comprehensive Dataset" \
notes="This is a comprehensive demo dataset created via ckanapi and GitHub Actions for testing CKAN functionality and datapusher-plus integration." \
owner_org=demo-organization \
license_id=cc-by \
version=1.0.0 \
author="GitHub Actions Automation" \
author_email=noreply@example.com \
maintainer="CKAN Admin" \
maintainer_email=admin@example.com \
url=https://github.com/your-repo/your-project \
private:false \
state=active \
'tags:[{"name":"demo"},{"name":"test"},{"name":"github-actions"},{"name":"automation"},{"name":"csv-data"},{"name":"datapusher-plus"}]' \
-c /srv/app/src/ckan/test-core.ini; then
echo "Dataset created successfully!"
else
echo "Dataset might already exist, continuing..."
fi
- name: Add resource to dataset with ckanapi
run: |
set -eu
echo "Adding resource to my-first-dataset..."
if ckanapi action resource_create \
package_id=my-first-dataset \
url="https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/100kb.csv" \
name="Sample CSV Data - 100KB Test File" \
description="Test CSV resource for datapusher-plus pipeline." \
format=CSV \
mimetype="text/csv" \
-c /srv/app/src/ckan/test-core.ini; then
echo "Resource created successfully!"
else
echo "Resource creation failed (ok for pipeline); showing dataset:"
ckanapi action package_show id=my-first-dataset -c /srv/app/src/ckan/test-core.ini || true
fi
- name: Display CKAN instance inventory
run: |
set -eu
echo "=== CKAN Status (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/status_show" | python3 -m json.tool
echo ""
echo "=== All Datasets (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/package_list" | python3 -m json.tool
echo ""
echo "=== All Organizations (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/organization_list" | python3 -m json.tool
echo ""
echo "=== All Users (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/user_list" | python3 -m json.tool
- name: Test datastore functionality
run: |
set -eu
echo "Testing datastore functionality..."
echo "=== Testing datastore read access ==="
metadata_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=_table_metadata")
echo "Table metadata response: $metadata_response"
if echo "$metadata_response" | jq -e '.success == true' >/dev/null 2>&1; then
echo "✓ Datastore read access working"
else
echo "✗ Datastore read access failed"
fi
echo "=== Testing datastore write access ==="
test_response=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Authorization: $CKAN_API_KEY" \
-d '{
"resource": {"package_id": "my-first-dataset"},
"fields": [{"id": "test_col", "type": "text"}, {"id": "value", "type": "int"}],
"records": [{"test_col": "hello", "value": 1}, {"test_col": "world", "value": 2}]
}' \
"http://localhost:5000/api/3/action/datastore_create")
echo "Test table creation response: $test_response"
if echo "$test_response" | jq -e '.success == true' >/dev/null 2>&1; then
echo "✓ Datastore write access working"
test_resource_id=$(echo "$test_response" | jq -r '.result.resource_id')
echo "=== Testing datastore query ==="
query_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$test_resource_id")
echo "Query response: $query_response"
echo "=== Cleaning up test table ==="
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Authorization: $CKAN_API_KEY" \
-d "{\"resource_id\": \"$test_resource_id\"}" \
"http://localhost:5000/api/3/action/datastore_delete" >/dev/null
echo "✓ Datastore functionality test completed successfully"
else
echo "✗ Datastore write access failed"
fi
- name: Start CKAN background job worker
run: |
set -eu
echo "Starting CKAN background job worker (CRITICAL for DataPusher Plus)..."
nohup ckan -c /srv/app/src/ckan/test-core.ini jobs worker > /tmp/ckan_worker.log 2>&1 &
WORKER_PID=$!
echo "CKAN Worker PID=$WORKER_PID"
echo "CKAN_WORKER_PID=$WORKER_PID" >> $GITHUB_ENV
sleep 5
if kill -0 "$WORKER_PID" >/dev/null 2>&1; then
echo "Background job worker started successfully"
head -n 20 /tmp/ckan_worker.log || echo "No worker logs yet"
else
echo "Worker failed to start"; cat /tmp/ckan_worker.log || true
fi
- name: Test DataPusher Plus functionality - Remote URLs
if: env.TEST_MODE == 'urls'
continue-on-error: true
run: |
set -u
echo "=== Testing DataPusher Plus Functionality - Remote URLs ==="
echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv
echo "Creating test dataset for URL mode..."
if ! ckanapi action package_show id=datapusher-plus-test-urls -c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then
ckanapi action package_create \
name=datapusher-plus-test-urls \
title="DataPusher Plus Remote URLs Test Dataset" \
owner_org=demo-organization \
-c /srv/app/src/ckan/test-core.ini
fi
total_files=0; passed_files=0; failed_files=0
echo "$TEST_URLS" | while IFS= read -r line || [ -n "$line" ]; do
[ -z "$line" ] && continue
if echo "$line" | grep -q '|'; then
url=$(echo "$line" | cut -d'|' -f1)
format=$(echo "$line" | cut -d'|' -f2)
name=$(echo "$line" | cut -d'|' -f3)
else
url="$line"; format="CSV"; name=$(basename "$url")
fi
[ -z "$format" ] && format="CSV"
[ -z "$name" ] && name=$(basename "$url")
total_files=$((total_files + 1))
start_time=$(date +%s)
upload_status="FAILED"; resource_id=""; datapusher_status="N/A"
datastore_active="false"; rows_imported="0"; error_message=""
if resource_response=$(ckanapi action resource_create \
package_id=datapusher-plus-test-urls \
url="$url" \
name="Remote URL Test: $name" \
format="$format" \
-c /srv/app/src/ckan/test-core.ini 2>&1); then
upload_status="SUCCESS"
resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
[ -n "$resource_id" ] || resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p')
if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then
max_attempts=90
for attempt in $(seq 1 $max_attempts); do
sleep 2
if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \
"http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then
if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then
datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
[ -n "$datapusher_status" ] || datapusher_status="unknown"
datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10)
[ "$datapusher_status" = "complete" ] && break
if [ "$datapusher_status" = "error" ]; then
error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1)
[ -n "$error_info" ] || error_info="DataPusher processing error"
error_message="DataPusher error: $error_info"; break
fi
fi
fi
done
if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then
if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then
datastore_active="true"
if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then
rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/')
[ -n "$rows_imported" ] || rows_imported="0"
fi
fi
fi
else
error_message="No valid resource ID extracted for $name"
fi
else
error_message="Resource creation failed: $(echo "$resource_response" | head -1)"
fi
end_time=$(date +%s); processing_time=$((end_time - start_time))
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp,$name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv
if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then
passed_files=$((passed_files + 1))
else
failed_files=$((failed_files + 1))
fi
sleep 1
done
echo "URL mode summary: total=$total_files passed=$passed_files failed=$failed_files"
- name: Test DataPusher Plus functionality - Local Files (HTTP Served)
if: ${{ env.TEST_MODE == 'dir' }}
shell: bash
continue-on-error: true
run: |
set -u
echo "=== Testing DataPusher Plus Functionality - Local Repository Files ==="
echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv
echo "file_name,reason_skipped" > /tmp/skipped_files.csv
TEST_FILES_DIR="${GITHUB_WORKSPACE}/tests/$FILES_DIR"
if [ ! -d "$TEST_FILES_DIR" ]; then
echo "ERROR: Test files directory not found: $TEST_FILES_DIR"
# Fail-graceful: still let summary run
touch /tmp/worker_analysis.csv
exit 0
fi
echo "Starting HTTP server to serve test files..."
cd "$TEST_FILES_DIR"
python3 -m http.server 8080 > /tmp/http_server.log 2>&1 &
HTTP_SERVER_PID=$!
sleep 3
if ! curl -s "http://localhost:8080/" > /dev/null; then
echo "ERROR: HTTP server failed to start (graceful)."
# Still produce empty results so summary/artifacts exist
kill $HTTP_SERVER_PID 2>/dev/null || true
touch /tmp/test_results.csv /tmp/worker_analysis.csv
exit 0
fi
: > /tmp/test_files.txt
find "$TEST_FILES_DIR" -type f -name "*" | while read filepath; do
filename=$(basename "$filepath")
name=$(echo "$filename" | sed 's/\.[^.]*$//')
extension=$(echo "$filename" | sed 's/.*\.//' | tr '[:upper:]' '[:lower:]')
case "$extension" in
csv) echo "$name|http://localhost:8080/$filename|CSV|text/csv|CSV file: $filename" >> /tmp/test_files.txt ;;
tsv) echo "$name|http://localhost:8080/$filename|TSV|text/tab-separated-values|TSV file: $filename" >> /tmp/test_files.txt ;;
xlsx|xls) echo "$name|http://localhost:8080/$filename|XLSX|application/vnd.openxmlformats-officedocument.spreadsheetml.sheet|Excel file: $filename" >> /tmp/test_files.txt ;;
json) echo "$name|http://localhost:8080/$filename|JSON|application/json|JSON file: $filename" >> /tmp/test_files.txt ;;
geojson) echo "$name|http://localhost:8080/$filename|GEOJSON|application/geo+json|GeoJSON file: $filename" >> /tmp/test_files.txt ;;
txt) echo "$name|http://localhost:8080/$filename|TXT|text/plain|Text file: $filename" >> /tmp/test_files.txt ;;
*) echo "$filename,Unsupported file format: .$extension" >> /tmp/skipped_files.csv ;;
esac
done
if [ ! -s /tmp/test_files.txt ]; then
echo "No supported files discovered (graceful)."
kill $HTTP_SERVER_PID 2>/dev/null || true
touch /tmp/worker_analysis.csv
exit 0
fi
if [ -f /tmp/skipped_files.csv ] && [ $(wc -l < /tmp/skipped_files.csv) -gt 1 ]; then
echo "Some files will be skipped."
fi
if ckanapi action package_create \
name=datapusher-plus-test-local-http \
title="DataPusher Plus Local Files Test Dataset (HTTP Served)" \
owner_org=demo-organization \
-c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then
echo "Test dataset created"
else
echo "Test dataset might already exist"
fi
total_files=0; passed_files=0; failed_files=0; skipped_files=0
while IFS='|' read -r file_name file_url file_format file_mimetype file_desc || [ -n "$file_name" ]; do
[ -z "$file_name" ] && continue
case "$file_name" in '#'*) continue ;; esac
if ! curl -s --head "$file_url" > /dev/null; then
filename_from_url=$(basename "$file_url")
echo "$filename_from_url,File not accessible via HTTP" >> /tmp/skipped_files.csv
skipped_files=$((skipped_files + 1))
continue
fi
total_files=$((total_files + 1))
start_time=$(date +%s)
upload_status="FAILED"; resource_id=""; datapusher_status="N/A"
datastore_active="false"; rows_imported="0"; error_message=""
if resource_response=$(ckanapi action resource_create \
package_id=datapusher-plus-test-local-http \
url="$file_url" \
name="Local HTTP Test: $file_name" \
description="$file_desc" \
format="$file_format" \
mimetype="$file_mimetype" \
-c /srv/app/src/ckan/test-core.ini 2>&1); then
upload_status="SUCCESS"
resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
[ -n "$resource_id" ] || resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p')
if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then
max_attempts=90
for attempt in $(seq 1 $max_attempts); do
sleep 2
if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \
"http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then
if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then
datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
[ -n "$datapusher_status" ] || datapusher_status="unknown"
datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10)
[ "$datapusher_status" = "complete" ] && break
if [ "$datapusher_status" = "error" ]; then
error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1)
[ -n "$error_info" ] || error_info="DataPusher processing error"
error_message="DataPusher error: $error_info"; break
fi
fi
fi
done
if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then
if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then
datastore_active="true"
if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then
rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/')
[ -n "$rows_imported" ] || rows_imported="0"
fi
fi
fi
else
error_message="No valid resource ID extracted for $file_name"
fi
else
error_message="Resource creation failed: $(echo "$resource_response" | head -1)"
fi
end_time=$(date +%s); processing_time=$((end_time - start_time))
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp,$file_name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv
if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then
passed_files=$((passed_files + 1))
else
failed_files=$((failed_files + 1))
fi
sleep 1
done < /tmp/test_files.txt
kill $HTTP_SERVER_PID 2>/dev/null || true
wait $HTTP_SERVER_PID 2>/dev/null || true
- name: Generate Combined Test Results and Worker Analysis
if: always()
shell: bash
run: |
# Bash for safe arithmetic; still guard math/greps carefully
set -euo pipefail
echo "=== Generating Combined Test Results and Worker Analysis ==="
echo "=== Processing DataPusher Plus Worker Logs ==="
if [[ ! -f /tmp/ckan_worker.log ]]; then
echo "No worker log file found at /tmp/ckan_worker.log"
echo "timestamp,job_id,file_name,status,qsv_version,file_format,encoding,normalized,valid_csv,sorted,db_safe_headers,analysis,records,total_time,download_time,analysis_time,copying_time,indexing_time,formulae_time,metadata_time,rows_copied,columns_indexed,error_type,error_message,data_quality_score,processing_efficiency" > /tmp/worker_analysis.csv
else
echo "Worker log file size: $(du -h /tmp/ckan_worker.log | cut -f1)"
python3 "${GITHUB_WORKSPACE}/tests/log_analyzer.py" analyze /tmp/ckan_worker.log /tmp/worker_analysis.csv || true
fi
# If neither file exists, still finish gracefully
if [[ ! -f /tmp/test_results.csv && ! -f /tmp/worker_analysis.csv ]]; then
{
echo "# DataPusher Plus Test Results"
echo
echo "No test data available to analyze"
} >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
# ---- Safe counters (all integers) ----
total_tests=0; skipped_count=0; total_files_in_dir=0
passed=0; failed=0; error_count=0
tested_success_rate=0; overall_success_rate=0
if [[ -f /tmp/test_results.csv ]]; then
total_lines=$(wc -l < /tmp/test_results.csv | tr -d '[:space:]')
if [[ "${total_lines:-0}" -gt 0 ]]; then
(( total_tests = total_lines - 1 ))
fi
if [[ -f /tmp/skipped_files.csv ]]; then
skipped_lines=$(wc -l < /tmp/skipped_files.csv | tr -d '[:space:]')
if [[ "${skipped_lines:-0}" -gt 0 ]]; then
(( skipped_count = skipped_lines - 1 ))
fi
fi
(( total_files_in_dir = total_tests + skipped_count ))
if [[ "$total_tests" -gt 0 ]]; then
# Count passes: SUCCESS + complete + datastore_active=true
passed=$(grep -E -c ',SUCCESS,.*,complete,true,' /tmp/test_results.csv 2>/dev/null || echo 0)
failed=$(tail -n +2 /tmp/test_results.csv 2>/dev/null | grep -Ev ',SUCCESS,.*,complete,true,' | wc -l | tr -d '[:space:]')
error_count=$(grep -E -c ',error,' /tmp/test_results.csv 2>/dev/null || echo 0)
(( tested_success_rate = (passed * 100) / total_tests ))
if [[ "$total_files_in_dir" -gt 0 ]]; then
(( overall_success_rate = (passed * 100) / total_files_in_dir ))
fi
fi
fi
worker_analysis_available=false
[[ -f /tmp/worker_analysis.csv ]] && worker_analysis_available=true
{
echo "# DataPusher Plus Test Results"
echo
echo "## Summary"
echo
echo "| Metric | Value |"
echo "|--------|-------|"
echo "| Total Files in Directory | $total_files_in_dir |"
echo "| Files Tested | $total_tests |"
echo "| Files Skipped | $skipped_count |"
echo "| Passed | $passed |"
echo "| Failed | $failed |"
echo "| Errors | $error_count |"
echo "| Success Rate (Tested Files) | ${tested_success_rate}% |"
echo "| Success Rate (All Files) | ${overall_success_rate}% |"
echo
if [[ "$skipped_count" -gt 0 && -f /tmp/skipped_files.csv ]]; then
echo "## Skipped Files"
echo
echo "| File Name | Reason Skipped |"
echo "|-----------|----------------|"
tail -n +2 /tmp/skipped_files.csv | while IFS=',' read -r file_name reason; do
echo "| $file_name | $reason |"
done
echo
fi
if $worker_analysis_available; then
total_jobs=$(tail -n +2 /tmp/worker_analysis.csv 2>/dev/null | wc -l | tr -d '[:space:]'); total_jobs=${total_jobs:-0}
if [[ "$total_jobs" -gt 0 ]]; then
echo "## Complete Job Analysis"
echo
echo "| # | File Name | Status | Records | Columns | Time (s) | Valid CSV | Headers Safe | Error Type | Quality Score |"
echo "|---|-----------|--------|---------|---------|----------|-----------|--------------|------------|---------------|"
counter=1
tail -n +2 /tmp/worker_analysis.csv | while IFS=',' read -r timestamp job_id file_name status qsv_version file_format encoding normalized valid_csv sorted db_safe_headers analysis records total_time download_time analysis_time copying_time indexing_time formulae_time metadata_time rows_copied columns_indexed error_type error_message data_quality_score processing_efficiency; do
full_file_name="${file_name//.../}"
: "${records:=0}"; : "${columns_indexed:=0}"; : "${total_time:=0}"; : "${data_quality_score:=-}"; : "${error_type:=-}"
case "$status" in
SUCCESS) status_display="✅ SUCCESS" ;;
ERROR) status_display="❌ ERROR" ;;
INCOMPLETE) status_display="⏸️ INCOMPLETE" ;;
*) status_display="❓ $status" ;;
esac
echo "| $counter | $full_file_name | $status_display | $records | $columns_indexed | $total_time | $valid_csv | $db_safe_headers | $error_type | $data_quality_score |"
((counter++))
done
echo
success_jobs=$(grep -c ',SUCCESS,' /tmp/worker_analysis.csv 2>/dev/null || echo 0)
error_jobs=$(grep -c ',ERROR,' /tmp/worker_analysis.csv 2>/dev/null || echo 0)
echo "## File Analysis"
echo
if [[ "$success_jobs" -gt 0 ]]; then
echo "### File Formats Processed"
echo
formats=$(tail -n +2 /tmp/worker_analysis.csv 2>/dev/null | grep ',SUCCESS,' | cut -d',' -f6 | sort | uniq -c)
if [[ -n "$formats" ]]; then
echo "| Format | Files | Percentage |"
echo "|--------|-------|------------|"
while read -r count format; do
count="${count//[[:space:]]/}"; [[ -n "$count" ]] || count=0
(( percentage = (count * 100) / success_jobs ))
case "$format" in
CSV) icon="📊" ;;
XLSX|XLS) icon="📈" ;;
JSON) icon="🔧" ;;
TXT) icon="📝" ;;
*) icon="📄" ;;
esac
echo "| $icon $format | $count | $percentage% |"
done <<< "$formats"
else
echo "No format data available"
fi
echo
echo "### Encoding Distribution"
echo
encodings=$(tail -n +2 /tmp/worker_analysis.csv 2>/dev/null | grep ',SUCCESS,' | cut -d',' -f7 | sort | uniq -c)
if [[ -n "$encodings" ]]; then
echo "| Encoding | Files | Status |"
echo "|----------|-------|--------|"
while read -r count encoding; do
if [[ -n "$encoding" ]]; then
if [[ "$encoding" == "UTF-8" || "$encoding" == "UTF" ]]; then status_icon="✅"; else status_icon="⚠️"; fi
echo "| $status_icon $encoding | $count | Compatible |"
else
echo "| ❓ Unknown | $count | Needs Review |"
fi
done <<< "$encodings"
else
echo "No encoding data available"
fi
echo
fi
echo "## Error Analysis"
echo
if [[ "$error_jobs" -gt 0 ]]; then
echo "| File | Error Type | Error Message |"
echo "|------|------------|---------------|"
tail -n +2 /tmp/worker_analysis.csv | grep ',ERROR,' | cut -d',' -f3,23,24 | \
while IFS=',' read -r file error_type error_msg; do
clean_error="${error_msg%\"}"; clean_error="${clean_error#\"}"
clean_file="${file//.../}"
echo "| $clean_file | $error_type | $clean_error |"
done
echo
else
echo "✅ No errors found in worker logs"
echo
fi
fi
fi
if [[ "$total_tests" -eq 0 && "$skipped_count" -gt 0 ]]; then
echo "## No Testable Files"
echo
echo "All files in the test directory were skipped."
elif [[ "$total_tests" -eq 0 ]]; then
echo "## No Files Found"
echo
echo "No files found in test directory to test."
elif [[ "$passed" -eq "$total_tests" ]]; then
echo "## All Tested Files Passed 🎉"
elif [[ "$passed" -gt 0 ]]; then
echo "## Result: Partial Success"
else
echo "## Result: All Tested Files Failed ❌"
echo
echo "### All Failed Files:"
tail -n +2 /tmp/test_results.csv | \
while IFS=',' read -r timestamp file_name upload_status resource_id datapusher_status datastore_active rows_imported processing_time error_message; do
clean_error="${error_message%\"}"; clean_error="${clean_error#\"}"
echo "- **$file_name**: $clean_error"
done
fi
echo
echo "---"
echo
echo "**Analysis completed:** $(date '+%A, %B %d, %Y at %I:%M %p %Z')"
} > /tmp/combined_summary.md
cat /tmp/combined_summary.md >> "$GITHUB_STEP_SUMMARY"
echo "Combined analysis summary generated."
- name: Upload test results as artifact
uses: actions/upload-artifact@v4
if: always()
with:
name: datapusher-plus-test-results
path: |
/tmp/test_results.csv
/tmp/ckan_stdout.log
/tmp/ckan_worker.log
/tmp/worker_analysis.csv
retention-days: 3
- name: Cleanup
if: always()
run: |
echo "Stopping any running CKAN processes..."
pkill -f "ckan.*run" || true
echo "Cleanup completed"