DataPusher+ Testing Run #3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: DataPusher+ Testing Run | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| datapusher_branch: | |
| description: 'DataPusher+ branch or commit' | |
| required: false | |
| default: 'main' | |
| type: string | |
| testing_directory: | |
| description: 'testing files directory (ignored if test_mode=urls)' | |
| required: true | |
| default: 'quick' | |
| type: string | |
| test_mode: | |
| description: 'dir (use tests/<dir>) or urls (use pasted links)' | |
| required: false | |
| default: 'dir' | |
| type: choice | |
| options: [dir, urls] | |
| test_urls: | |
| description: "When test_mode=urls: paste one URL per line. Optional inline format 'URL|FORMAT|NAME'." | |
| required: false | |
| default: '' | |
| type: string | |
| env: | |
| FILES_DIR: ${{ github.event.inputs.testing_directory || 'quick' }} | |
| TEST_MODE: ${{ github.event.inputs.test_mode || 'dir' }} | |
| TEST_URLS: ${{ github.event.inputs.test_urls }} | |
| DATAPUSHER_BRANCH: ${{ github.event.inputs.datapusher_branch || 'main' }} | |
| CKAN_VERSION: "2.11" | |
| POSTGRES_PASSWORD: postgres | |
| CKAN_DB_PASSWORD: pass | |
| CKAN_SITE_URL: http://localhost:5000 | |
| CKAN_SITE_ID: default | |
| CKAN_SITE_TITLE: "CKAN Test Instance" | |
| jobs: | |
| setup: | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ckan/ckan-dev:2.11 | |
| options: --user root | |
| services: | |
| solr: | |
| image: ckan/ckan-solr:2.11-solr9 | |
| ports: ["8983:8983"] | |
| postgres: | |
| image: ckan/ckan-postgres-dev:2.11 | |
| env: | |
| POSTGRES_USER: postgres | |
| POSTGRES_PASSWORD: postgres | |
| POSTGRES_DB: postgres | |
| options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 | |
| redis: | |
| image: redis:3 | |
| ports: ["6379:6379"] | |
| env: | |
| CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test | |
| CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test | |
| CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test | |
| CKAN_SOLR_URL: http://solr:8983/solr/ckan | |
| CKAN_REDIS_URL: redis://redis:6379/1 | |
| CKAN_SITE_URL: http://localhost:5000 | |
| steps: | |
| - name: Fix permissions and install essential tools | |
| run: | | |
| mkdir -p /__w/_temp | |
| chmod -R 777 /__w/_temp | |
| chmod -R 777 /__w/ | |
| apt-get update -y | |
| apt-get install -y curl wget net-tools procps postgresql-client jq | |
| echo "Essential tools installed successfully" | |
| - uses: actions/checkout@v4 | |
| - name: Wait for PostgreSQL to be ready | |
| run: | | |
| echo "Waiting for PostgreSQL to be ready..." | |
| timeout=90 | |
| while [ $timeout -gt 0 ]; do | |
| if PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "SELECT 1;" >/dev/null 2>&1; then | |
| echo "PostgreSQL is ready!" | |
| break | |
| fi | |
| echo "Postgres not ready yet ($timeout s left)..." | |
| sleep 3 | |
| timeout=$((timeout-3)) | |
| done | |
| if [ $timeout -le 0 ]; then | |
| echo "Timeout waiting for PostgreSQL" | |
| exit 1 | |
| fi | |
| - name: Setup database users and permissions | |
| run: | | |
| set -eu | |
| echo "Creating database users (if not exist)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='ckan_default'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_write'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_write WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_read'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_read WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| echo "Creating databases (if not exist)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='ckan_test'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE ckan_test OWNER ckan_default;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='datastore_test'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE datastore_test OWNER ckan_default;" | |
| echo "Granting permissions (best-effort)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE ckan_test TO ckan_default;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE datastore_test TO datastore_write;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT CONNECT ON DATABASE datastore_test TO datastore_read;" | |
| echo "Database setup completed" | |
| - name: Install requirements, ckanapi and datapusher-plus | |
| run: | | |
| set -eu | |
| python3 -m pip install --upgrade pip setuptools wheel | |
| if [ -f requirements.txt ]; then | |
| pip install -r requirements.txt | |
| fi | |
| if [ -f requirements-dev.txt ]; then | |
| pip install -r requirements-dev.txt | |
| fi | |
| if [ -f setup.py ] || [ -f pyproject.toml ]; then | |
| pip install -e . | |
| fi | |
| pip install --upgrade ckanapi | |
| pip install datasize | |
| apt install -y python3-virtualenv python3-dev python3-pip python3-wheel build-essential libxslt1-dev libxml2-dev zlib1g-dev git libffi-dev libpq-dev uchardet unzip | |
| echo "Installing datapusher-plus from branch: $DATAPUSHER_BRANCH" | |
| pip install -e "git+https://github.com/dathere/datapusher-plus.git@$DATAPUSHER_BRANCH#egg=datapusher-plus" | |
| pip install -e 'git+https://github.com/ckan/ckanext-scheming.git#egg=ckanext-scheming' | |
| echo "Installed ckanapi and datapusher-plus (best-effort)" | |
| - name: Install qsv (musl static) | |
| run: | | |
| set -eu | |
| echo "Attempting to download static qsv musl binary (best-effort)..." | |
| QSV_VER="7.1.0" | |
| QSV_ZIP="qsv-${QSV_VER}-x86_64-unknown-linux-musl.zip" | |
| QSV_URL="https://github.com/dathere/qsv/releases/download/${QSV_VER}/${QSV_ZIP}" | |
| mkdir -p /tmp/qsv && cd /tmp/qsv | |
| if wget -q --spider "$QSV_URL"; then | |
| wget -q "$QSV_URL" -O "$QSV_ZIP" | |
| unzip -o "$QSV_ZIP" | |
| if [ -f qsvdp ]; then | |
| mv qsvdp /usr/local/bin/qsvdp | |
| chmod +x /usr/local/bin/qsvdp | |
| echo "Installed qsvdp to /usr/local/bin/qsvdp" | |
| elif [ -f qsv ]; then | |
| mv qsv /usr/local/bin/qsv | |
| chmod +x /usr/local/bin/qsv | |
| echo "Installed qsv to /usr/local/bin/qsv" | |
| else | |
| echo "Downloaded archive but could not find qsv binary inside" | |
| fi | |
| else | |
| echo "qsv release URL not reachable; skipping qsv install" | |
| fi | |
| /usr/local/bin/qsvdp --version >/dev/null 2>&1 || /usr/local/bin/qsv --version >/dev/null 2>&1 || echo "qsv not installed or not runnable (this is okay for plugin presence test)." | |
| - name: Setup CKAN configuration (/srv/app/src/ckan/test-core.ini) | |
| run: | | |
| set -eu | |
| sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini | |
| if ! grep -q "^solr_url" /srv/app/src/ckan/test-core.ini; then | |
| echo "solr_url = ${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" >> /srv/app/src/ckan/test-core.ini | |
| fi | |
| if ! grep -q "^ckan.redis.url" /srv/app/src/ckan/test-core.ini; then | |
| echo "ckan.redis.url = ${CKAN_REDIS_URL:-redis://redis:6379/1}" >> /srv/app/src/ckan/test-core.ini | |
| fi | |
| CKAN_SITE_URL="${CKAN_SITE_URL:-http://localhost:5000}" | |
| CKAN_SQLALCHEMY_URL="${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}" | |
| CKAN_DATASTORE_WRITE_URL="${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}" | |
| CKAN_DATASTORE_READ_URL="${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}" | |
| CKAN_SOLR_URL="${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" | |
| CKAN_REDIS_URL="${CKAN_REDIS_URL:-redis://redis:6379/1}" | |
| REPLACE_FILE="$(mktemp)" | |
| ADD_FILE="$(mktemp)" | |
| MISSING_ADD_FILE="$(mktemp)" | |
| : > "$REPLACE_FILE" | |
| : > "$ADD_FILE" | |
| : > "$MISSING_ADD_FILE" | |
| printf '%s\n' \ | |
| "ckan.site_url|${CKAN_SITE_URL}" \ | |
| "sqlalchemy.url|${CKAN_SQLALCHEMY_URL}" \ | |
| "ckan.datastore.write_url|${CKAN_DATASTORE_WRITE_URL}" \ | |
| "ckan.datastore.read_url|${CKAN_DATASTORE_READ_URL}" \ | |
| "solr_url|${CKAN_SOLR_URL}" \ | |
| "ckan.redis.url|${CKAN_REDIS_URL}" \ | |
| > "$REPLACE_FILE" | |
| cat > "$ADD_FILE" <<'EOF' | |
| ckan.site_id = default | |
| ckan.site_title = CKAN Test | |
| ckan.auth.create_default_api_keys = true | |
| ckanext.datapusher_plus.qsv_bin = /usr/local/bin/qsvdp | |
| scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml | |
| scheming.presets = ckanext.scheming:presets.json | |
| scheming.dataset_fallback = false | |
| ckanext.datapusher_plus.use_proxy = false | |
| ckanext.datapusher_plus.download_proxy = | |
| ckanext.datapusher_plus.ssl_verify = false | |
| ckanext.datapusher_plus.upload_log_level = INFO | |
| ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip | |
| ckanext.datapusher_plus.pii_screening = false | |
| ckanext.datapusher_plus.pii_found_abort = false | |
| ckanext.datapusher_plus.pii_regex_resource_id_or_alias = | |
| ckanext.datapusher_plus.pii_show_candidates = false | |
| ckanext.datapusher_plus.pii_quick_screen = false | |
| ckanext.datapusher_plus.preview_rows = 100 | |
| ckanext.datapusher_plus.download_timeout = 300 | |
| ckanext.datapusher_plus.max_content_length = 1256000000000 | |
| ckanext.datapusher_plus.chunk_size = 16384 | |
| ckanext.datapusher_plus.default_excel_sheet = 0 | |
| ckanext.datapusher_plus.sort_and_dupe_check = true | |
| ckanext.datapusher_plus.dedup = false | |
| ckanext.datapusher_plus.unsafe_prefix = unsafe_ | |
| ckanext.datapusher_plus.reserved_colnames = _id | |
| ckanext.datapusher_plus.prefer_dmy = false | |
| ckanext.datapusher_plus.ignore_file_hash = true | |
| ckanext.datapusher_plus.auto_index_threshold = 3 | |
| ckanext.datapusher_plus.auto_index_dates = true | |
| ckanext.datapusher_plus.auto_unique_index = true | |
| ckanext.datapusher_plus.summary_stats_options = | |
| ckanext.datapusher_plus.add_summary_stats_resource = false | |
| ckanext.datapusher_plus.summary_stats_with_preview = false | |
| ckanext.datapusher_plus.qsv_stats_string_max_length = 32767 | |
| ckanext.datapusher_plus.qsv_dates_whitelist = date,time,due,open,close,created | |
| ckanext.datapusher_plus.qsv_freq_limit = 10 | |
| ckanext.datapusher_plus.auto_alias = true | |
| ckanext.datapusher_plus.auto_alias_unique = false | |
| ckanext.datapusher_plus.copy_readbuffer_size = 1048576 | |
| ckanext.datapusher_plus.type_mapping = {"String": "text", "Integer": "numeric","Float": "numeric","DateTime": "timestamp","Date": "date","NULL": "text"} | |
| ckanext.datapusher_plus.auto_spatial_simplication = true | |
| ckanext.datapusher_plus.spatial_simplication_relative_tolerance = 0.1 | |
| ckanext.datapusher_plus.latitude_fields = latitude,lat | |
| ckanext.datapusher_plus.longitude_fields = longitude,long,lon | |
| ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache | |
| ckanext.datapusher_plus.auto_unzip_one_file = true | |
| EOF | |
| if [ -f /srv/app/src/ckan/test-core.ini ]; then | |
| echo "Patching selective keys in /srv/app/src/ckan/test-core.ini (only the keys you listed)..." | |
| awk 'BEGIN{in=0} | |
| /^\[DEFAULT\]/{ print; in=1; next } | |
| /^\[.*\]/{ if(in){ print "debug = true"; in=0 } } | |
| { | |
| if(in){ | |
| if($1 == "debug") next | |
| } else { | |
| } | |
| } | |
| END { if(in) print "debug = true" }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.tmp && mv /srv/app/src/ckan/test-core.ini.tmp /srv/app/src/ckan/test-core.ini | |
| while IFS= read -r entry || [ -n "$entry" ]; do | |
| key="$(printf '%s' "$entry" | cut -d'|' -f1)" | |
| value="$(printf '%s' "$entry" | cut -d'|' -f2-)" | |
| esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')" | |
| if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then | |
| sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE" | |
| fi | |
| done < "$REPLACE_FILE" | |
| while IFS= read -r ln || [ -n "$ln" ]; do | |
| [ -z "$ln" ] && continue | |
| case "$ln" in | |
| \#*) | |
| if ! grep -Fq "$ln" /srv/app/src/ckan/test-core.ini; then | |
| printf '%s\n' "$ln" >> "$MISSING_ADD_FILE" | |
| fi | |
| ;; | |
| *) | |
| key="$(printf '%s' "$ln" | cut -d'=' -f1 | sed 's/[[:space:]]*$//')" | |
| value="$(printf '%s' "$ln" | cut -d'=' -f2- | sed 's/^[[:space:]]*//')" | |
| esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')" | |
| if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then | |
| sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE" | |
| fi | |
| ;; | |
| esac | |
| done < "$ADD_FILE" | |
| if [ -s "$MISSING_ADD_FILE" ]; then | |
| awk -v addfile="$MISSING_ADD_FILE" ' | |
| BEGIN{ | |
| inserted=0 | |
| while ((getline line < addfile) > 0) { add[++na]=line } | |
| close(addfile) | |
| } | |
| { | |
| if(!inserted && $0=="[app:main]") { | |
| for(i=1;i<=na;i++) print add[i] | |
| inserted=1 | |
| } | |
| } | |
| END{ | |
| if(!inserted){ | |
| print "[app:main]" | |
| for(i=1;i<=na;i++) print add[i] | |
| } | |
| }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini | |
| fi | |
| sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| echo "/srv/app/src/ckan/test-core.ini not found — no selective patching performed." | |
| fi | |
| REQUIRED_PLUGINS="datastore datapusher_plus scheming_datasets" | |
| if grep -q "^ckan.plugins" /srv/app/src/ckan/test-core.ini; then | |
| echo "Appending required plugins to existing ckan.plugins line" | |
| current=$(grep "^ckan.plugins" /srv/app/src/ckan/test-core.ini | head -n1 | cut -d'=' -f2-) | |
| for p in $REQUIRED_PLUGINS; do | |
| echo "$current" | grep -qw "$p" || current="$current $p" | |
| done | |
| awk -v new="ckan.plugins = $current" 'BEGIN{done=0} {if(!done && $1=="ckan.plugins") {print new; done=1} else print $0}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini | |
| else | |
| echo "ckan.plugins = $REQUIRED_PLUGINS" >> /srv/app/src/ckan/test-core.ini | |
| echo "Added ckan.plugins line with required plugins." | |
| fi | |
| echo "---- /srv/app/src/ckan/test-core.ini (cat) ----" | |
| cat /srv/app/src/ckan/test-core.ini | |
| echo "---- end ----" | |
| - name: Initialize CKAN database | |
| run: | | |
| echo "Testing connectivity with CKAN DB user..." | |
| if ! PGPASSWORD=$CKAN_DB_PASSWORD psql -h postgres -U ckan_default -d ckan_test -c "SELECT 1;" >/dev/null 2>&1; then | |
| echo "Cannot connect as ckan_default. Attempting to create database owner and db..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER IF NOT EXISTS ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE IF NOT EXISTS ckan_test OWNER ckan_default;" | |
| fi | |
| echo "Running ckan db init (may be idempotent)..." | |
| if ckan -c /srv/app/src/ckan/test-core.ini db init; then | |
| echo "CKAN DB initialized." | |
| else | |
| echo "ckan db init returned non-zero; continuing (may already be initialized)." | |
| fi | |
| echo "Setting datastore permissions..." | |
| if ckan -c /srv/app/src/ckan/test-core.ini datastore set-permissions | PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres --set ON_ERROR_STOP=1; then | |
| echo "Datastore permissions set." | |
| else | |
| echo "Datastore permission step returned non-zero; continuing." | |
| fi | |
| - name: Start CKAN server | |
| run: | | |
| set -eu | |
| echo "Starting CKAN server in background..." | |
| nohup ckan -c /srv/app/src/ckan/test-core.ini run --host 0.0.0.0 --port 5000 --disable-reloader > /tmp/ckan_stdout.log 2>&1 & | |
| CKAN_PID=$! | |
| echo "CKAN PID=$CKAN_PID" | |
| timeout=120 | |
| while [ $timeout -gt 0 ]; do | |
| if ! kill -0 "$CKAN_PID" >/dev/null 2>&1; then | |
| echo "CKAN process died. Showing last lines of log:" | |
| tail -n 200 /tmp/ckan_stdout.log | |
| exit 1 | |
| fi | |
| if curl -fsS "${CKAN_SITE_URL}/api/3/action/status_show" >/dev/null 2>&1; then | |
| echo "CKAN API responding" | |
| break | |
| fi | |
| echo "Waiting for CKAN API... ($timeout s left)" | |
| sleep 3 | |
| timeout=$((timeout-3)) | |
| done | |
| if [ $timeout -le 0 ]; then | |
| echo "Timeout waiting for CKAN to start. Dumping logs..." | |
| tail -n 200 /tmp/ckan_stdout.log | |
| ss -tlnp || netstat -tlnp | |
| exit 1 | |
| fi | |
| echo "CKAN started successfully" | |
| - name: Create sysadmin user admin_ckan and get apikey | |
| run: | | |
| set -eu | |
| echo "Creating user admin_ckan..." | |
| user_response=$(ckanapi action user_create --config /srv/app/src/ckan/test-core.ini \ | |
| name=admin_ckan \ | |
| email=admins@example.com \ | |
| password=test1234 \ | |
| fullname="CKAN Administrator" \ | |
| with_apitoken=true \ | |
| about="Created by GitHub Actions test" 2>/dev/null) || echo "user_create returned non-zero (user may already exist)" | |
| echo "User creation response: $user_response" | |
| echo "Converting admin_ckan user to sysadmin..." | |
| ckan -c /srv/app/src/ckan/test-core.ini sysadmin add admin_ckan | |
| echo "User admin_ckan promoted to sysadmin" | |
| json_response=$(echo "$user_response" | sed -n '/{/,/}/p') | |
| api_key=$(echo "$json_response" | jq -r '.token // empty') | |
| if [ -n "$api_key" ] && [ "$api_key" != "null" ] && [ "$api_key" != "empty" ]; then | |
| echo "CKAN_API_KEY=$api_key" >> $GITHUB_ENV | |
| echo "API key saved: $api_key" | |
| else | |
| echo "No API key found in response" | |
| fi | |
| echo "User admin_ckan creation completed" | |
| - name: Create API token for datapusher-plus and add to config | |
| run: | | |
| set -eu | |
| echo "Creating API token for datapusher-plus service account..." | |
| echo "Running: ckan user token add admin_ckan dpplus" | |
| dp_token_output=$(ckan -c /srv/app/src/ckan/test-core.ini user token add admin_ckan dpplus 2>&1) | |
| echo "Full token creation output:" | |
| echo "$dp_token_output" | |
| dp_token=$(echo "$dp_token_output" | tail -n 1 | tr -d '\t') | |
| echo "Extracted token: '$dp_token'" | |
| if [ -n "$dp_token" ] && [ "$dp_token" != "null" ]; then | |
| echo "Created datapusher-plus API token: $dp_token" | |
| ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$dp_token" | |
| echo "Verifying token was added to config:" | |
| grep "ckanext.datapusher_plus.api_token" /srv/app/src/ckan/test-core.ini || echo "Token not found in config!" | |
| echo "DATAPUSHER_PLUS_API_TOKEN=$dp_token" >> $GITHUB_ENV | |
| echo "API token added to CKAN configuration successfully" | |
| else | |
| echo "Failed to create API token for datapusher-plus" | |
| echo "Using main CKAN API key as fallback..." | |
| ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$CKAN_API_KEY" | |
| fi | |
| - name: Create organization with ckanapi | |
| run: | | |
| set -eu | |
| echo "Creating organization demo-organization (idempotent)..." | |
| ckanapi action organization_create --config /srv/app/src/ckan/test-core.ini \ | |
| name=demo-organization \ | |
| title="Demo Data Publishing Organization" \ | |
| description="Demo org created by GitHub Actions for datapusher-plus testing." || echo "organization_create returned non-zero (may already exist)" | |
| echo "Add admin_ckan as admin to the organization" | |
| ckanapi action organization_member_create --config /srv/app/src/ckan/test-core.ini \ | |
| id=demo-organization username=admin_ckan role=admin || echo "organization_member_create returned non-zero (may already be member)" | |
| - name: Create dataset with ckanapi | |
| run: | | |
| set -eu | |
| echo "Creating dataset my-first-dataset (idempotent)..." | |
| if ckanapi action package_create \ | |
| name=my-first-dataset \ | |
| title="My First Comprehensive Dataset" \ | |
| notes="This is a comprehensive demo dataset created via ckanapi and GitHub Actions for testing CKAN functionality and datapusher-plus integration." \ | |
| owner_org=demo-organization \ | |
| license_id=cc-by \ | |
| version=1.0.0 \ | |
| author="GitHub Actions Automation" \ | |
| author_email=noreply@example.com \ | |
| maintainer="CKAN Admin" \ | |
| maintainer_email=admin@example.com \ | |
| url=https://github.com/your-repo/your-project \ | |
| private:false \ | |
| state=active \ | |
| 'tags:[{"name":"demo"},{"name":"test"},{"name":"github-actions"},{"name":"automation"},{"name":"csv-data"},{"name":"datapusher-plus"}]' \ | |
| -c /srv/app/src/ckan/test-core.ini; then | |
| echo "Dataset created successfully!" | |
| else | |
| echo "Dataset might already exist, continuing..." | |
| fi | |
| - name: Add resource to dataset with ckanapi | |
| run: | | |
| set -eu | |
| echo "Adding resource to my-first-dataset..." | |
| if ckanapi action resource_create \ | |
| package_id=my-first-dataset \ | |
| url="https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/100kb.csv" \ | |
| name="Sample CSV Data - 100KB Test File" \ | |
| description="Test CSV resource for datapusher-plus pipeline." \ | |
| format=CSV \ | |
| mimetype="text/csv" \ | |
| -c /srv/app/src/ckan/test-core.ini; then | |
| echo "Resource created successfully!" | |
| else | |
| echo "Resource creation failed" | |
| ckanapi action package_show id=my-first-dataset -c /srv/app/src/ckan/test-core.ini | |
| exit 1 | |
| fi | |
| - name: Display CKAN instance inventory | |
| run: | | |
| set -eu | |
| echo "=== CKAN Status (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/status_show" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Datasets (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/package_list" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Organizations (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/organization_list" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Users (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/user_list" | python3 -m json.tool | |
| - name: Test datastore functionality | |
| run: | | |
| set -eu | |
| echo "Testing datastore functionality..." | |
| echo "=== Testing datastore read access ===" | |
| metadata_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=_table_metadata") | |
| echo "Table metadata response: $metadata_response" | |
| if echo "$metadata_response" | jq -e '.success == true' >/dev/null 2>&1; then | |
| echo "✓ Datastore read access working" | |
| else | |
| echo "✗ Datastore read access failed" | |
| exit 1 | |
| fi | |
| echo "=== Testing datastore write access ===" | |
| test_response=$(curl -s -X POST \ | |
| -H "Content-Type: application/json" \ | |
| -H "Authorization: $CKAN_API_KEY" \ | |
| -d '{ | |
| "resource": {"package_id": "my-first-dataset"}, | |
| "fields": [{"id": "test_col", "type": "text"}, {"id": "value", "type": "int"}], | |
| "records": [{"test_col": "hello", "value": 1}, {"test_col": "world", "value": 2}] | |
| }' \ | |
| "http://localhost:5000/api/3/action/datastore_create") | |
| echo "Test table creation response: $test_response" | |
| if echo "$test_response" | jq -e '.success == true' >/dev/null 2>&1; then | |
| echo "✓ Datastore write access working" | |
| test_resource_id=$(echo "$test_response" | jq -r '.result.resource_id') | |
| echo "=== Testing datastore query ===" | |
| query_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$test_resource_id") | |
| echo "Query response: $query_response" | |
| echo "=== Cleaning up test table ===" | |
| curl -s -X POST \ | |
| -H "Content-Type: application/json" \ | |
| -H "Authorization: $CKAN_API_KEY" \ | |
| -d "{\"resource_id\": \"$test_resource_id\"}" \ | |
| "http://localhost:5000/api/3/action/datastore_delete" >/dev/null | |
| echo "✓ Datastore functionality test completed successfully" | |
| else | |
| echo "✗ Datastore write access failed" | |
| fi | |
| - name: Start CKAN background job worker | |
| run: | | |
| set -eu | |
| echo "Starting CKAN background job worker (CRITICAL for DataPusher Plus)..." | |
| nohup ckan -c /srv/app/src/ckan/test-core.ini jobs worker > /tmp/ckan_worker.log 2>&1 & | |
| WORKER_PID=$! | |
| echo "CKAN Worker PID=$WORKER_PID" | |
| echo "CKAN_WORKER_PID=$WORKER_PID" >> $GITHUB_ENV | |
| sleep 5 | |
| if kill -0 "$WORKER_PID" >/dev/null 2>&1; then | |
| echo "Background job worker started successfully" | |
| echo "Worker logs:" | |
| head -n 20 /tmp/ckan_worker.log || echo "No worker logs yet" | |
| else | |
| echo "Worker failed to start" | |
| cat /tmp/ckan_worker.log | |
| exit 1 | |
| fi | |
| - name: Test DataPusher Plus functionality - Remote URLs | |
| if: env.TEST_MODE == 'urls' | |
| run: | | |
| set -eu | |
| echo "=== Testing DataPusher Plus Functionality - Remote URLs ===" | |
| echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv | |
| # Create dataset for URL mode | |
| echo "Creating test dataset for URL mode..." | |
| if ! ckanapi action package_show id=datapusher-plus-test-urls -c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then | |
| ckanapi action package_create \ | |
| name=datapusher-plus-test-urls \ | |
| title="DataPusher Plus Remote URLs Test Dataset" \ | |
| owner_org=demo-organization \ | |
| -c /srv/app/src/ckan/test-core.ini | |
| echo "Test dataset created" | |
| else | |
| echo "Test dataset already exists" | |
| fi | |
| # Initialize counters | |
| total_files=0 | |
| passed_files=0 | |
| failed_files=0 | |
| # Process each URL from input | |
| echo "$TEST_URLS" | while IFS= read -r line || [ -n "$line" ]; do | |
| [ -z "$line" ] && continue | |
| # Parse line format: URL|FORMAT|NAME or just URL | |
| if echo "$line" | grep -q '|'; then | |
| url=$(echo "$line" | cut -d'|' -f1) | |
| format=$(echo "$line" | cut -d'|' -f2) | |
| name=$(echo "$line" | cut -d'|' -f3) | |
| else | |
| url="$line" | |
| format="CSV" | |
| name=$(basename "$url") | |
| fi | |
| [ -z "$format" ] && format="CSV" | |
| [ -z "$name" ] && name=$(basename "$url") | |
| total_files=$((total_files + 1)) | |
| echo "" | |
| echo "==========================================" | |
| echo "Testing Remote URL #${total_files}: $name" | |
| echo "URL: $url" | |
| echo "Format: $format" | |
| echo "==========================================" | |
| # Initialize tracking variables | |
| start_time=$(date +%s) | |
| upload_status="FAILED" | |
| resource_id="" | |
| datapusher_status="N/A" | |
| datastore_active="false" | |
| rows_imported="0" | |
| error_message="" | |
| # Create resource | |
| echo "Creating resource for $name..." | |
| if resource_response=$(ckanapi action resource_create \ | |
| package_id=datapusher-plus-test-urls \ | |
| url="$url" \ | |
| name="Remote URL Test: $name" \ | |
| format="$format" \ | |
| -c /srv/app/src/ckan/test-core.ini 2>&1); then | |
| echo "Resource created successfully for $name" | |
| upload_status="SUCCESS" | |
| # Extract resource ID | |
| resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$resource_id" ]; then | |
| resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p') | |
| fi | |
| echo "Resource ID: $resource_id" | |
| if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then | |
| # Monitor DataPusher Plus processing | |
| echo "Monitoring DataPusher Plus processing for $name..." | |
| max_attempts=90 | |
| for attempt in $(seq 1 $max_attempts); do | |
| sleep 2 | |
| if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \ | |
| "http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then | |
| if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then | |
| datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$datapusher_status" ]; then | |
| datapusher_status="unknown" | |
| fi | |
| datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10) | |
| echo " Attempt $attempt/$max_attempts: DataPusher status = $datapusher_status" | |
| if [ "$datapusher_status" = "complete" ]; then | |
| echo " ✓ DataPusher processing completed for $name!" | |
| break | |
| elif [ "$datapusher_status" = "error" ]; then | |
| error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1) | |
| if [ -z "$error_info" ]; then | |
| error_info="DataPusher processing error" | |
| fi | |
| error_message="DataPusher error: $error_info" | |
| echo " ✗ DataPusher processing failed for $name: $error_message" | |
| break | |
| fi | |
| fi | |
| fi | |
| if [ $((attempt % 15)) -eq 0 ]; then | |
| echo " Still processing $name... (${attempt}/${max_attempts})" | |
| fi | |
| done | |
| # Check final resource status | |
| echo "Checking final status for $name..." | |
| if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then | |
| if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then | |
| datastore_active="true" | |
| echo " ✓ DataStore activated for $name" | |
| if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then | |
| rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/') | |
| if [ -z "$rows_imported" ]; then | |
| rows_imported="0" | |
| fi | |
| echo " ✓ Rows imported for $name: $rows_imported" | |
| fi | |
| else | |
| datastore_active="false" | |
| echo " ✗ DataStore not activated for $name" | |
| fi | |
| fi | |
| else | |
| error_message="No valid resource ID extracted for $name" | |
| echo " ✗ $error_message" | |
| fi | |
| else | |
| echo " ✗ Resource creation failed for $name" | |
| error_message="Resource creation failed: $(echo "$resource_response" | head -1)" | |
| fi | |
| # Calculate processing time | |
| end_time=$(date +%s) | |
| processing_time=$((end_time - start_time)) | |
| # Log results | |
| timestamp=$(date '+%Y-%m-%d %H:%M:%S') | |
| echo "$timestamp,$name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv | |
| # Update counters | |
| if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then | |
| passed_files=$((passed_files + 1)) | |
| echo " 🎉 PASS: $name processed successfully" | |
| else | |
| failed_files=$((failed_files + 1)) | |
| echo " ❌ FAIL: $name had issues" | |
| fi | |
| echo " Processing time: ${processing_time}s" | |
| sleep 3 | |
| done | |
| echo "" | |
| echo "==========================================" | |
| echo "=== URL MODE TEST RESULTS SUMMARY ===" | |
| echo "==========================================" | |
| echo "Total URLs tested: $total_files" | |
| echo "Passed: $passed_files" | |
| echo "Failed: $failed_files" | |
| if [ $total_files -gt 0 ]; then | |
| echo "Success rate: $(( passed_files * 100 / total_files ))%" | |
| fi | |
| - name: Test DataPusher Plus functionality - Local Files (HTTP Served) | |
| if: env.TEST_MODE == 'dir' | |
| run: | | |
| set -eu | |
| echo "=== Testing DataPusher Plus Functionality - Local Repository Files ===" | |
| echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv | |
| echo "file_name,reason_skipped" > /tmp/skipped_files.csv | |
| TEST_FILES_DIR="${GITHUB_WORKSPACE}/tests/$FILES_DIR" | |
| if [ ! -d "$TEST_FILES_DIR" ]; then | |
| echo "ERROR: Test files directory not found: $TEST_FILES_DIR" | |
| echo "Please ensure the tests/$FILES_DIR directory exists in your repository" | |
| exit 1 | |
| fi | |
| echo "Using test files from: $TEST_FILES_DIR" | |
| echo "Available test files:" | |
| find "$TEST_FILES_DIR" -type f | head -10 | |
| echo "" | |
| echo "Complete inventory of files in tests/$FILES_DIR:" | |
| find "$TEST_FILES_DIR" -type f -name "*" | while read filepath; do | |
| filename=$(basename "$filepath") | |
| filesize=$(du -h "$filepath" | cut -f1) | |
| echo " $filename ($filesize)" | |
| done | |
| echo "" | |
| echo "Starting HTTP server to serve test files..." | |
| cd "$TEST_FILES_DIR" | |
| python3 -m http.server 8080 > /tmp/http_server.log 2>&1 & | |
| HTTP_SERVER_PID=$! | |
| echo "HTTP Server PID: $HTTP_SERVER_PID" | |
| sleep 3 | |
| if ! curl -s "http://localhost:8080/" > /dev/null; then | |
| echo "ERROR: HTTP server failed to start" | |
| cat /tmp/http_server.log | |
| exit 1 | |
| fi | |
| echo "HTTP server running at http://localhost:8080/" | |
| echo "" | |
| echo "Discovering ALL files in $TEST_FILES_DIR for testing..." | |
| : > /tmp/test_files.txt | |
| find "$TEST_FILES_DIR" -type f -name "*" | while read filepath; do | |
| filename=$(basename "$filepath") | |
| name=$(echo "$filename" | sed 's/\.[^.]*$//') | |
| extension=$(echo "$filename" | sed 's/.*\.//' | tr '[:upper:]' '[:lower:]') | |
| case "$extension" in | |
| csv) | |
| echo "$name|http://localhost:8080/$filename|CSV|text/csv|CSV file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| tsv) | |
| echo "$name|http://localhost:8080/$filename|TSV|text/tab-separated-values|TSV file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| xlsx|xls) | |
| echo "$name|http://localhost:8080/$filename|XLSX|application/vnd.openxmlformats-officedocument.spreadsheetml.sheet|Excel file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| json) | |
| echo "$name|http://localhost:8080/$filename|JSON|application/json|JSON file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| geojson) | |
| echo "$name|http://localhost:8080/$filename|GEOJSON|application/geo+json|GeoJSON file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| txt) | |
| echo "$name|http://localhost:8080/$filename|TXT|text/plain|Text file: $filename" >> /tmp/test_files.txt | |
| ;; | |
| *) | |
| echo "$filename,Unsupported file format: .$extension" >> /tmp/skipped_files.csv | |
| echo "SKIP: Unsupported format .$extension for file: $filename" | |
| ;; | |
| esac | |
| done | |
| if [ ! -s /tmp/test_files.txt ]; then | |
| echo "ERROR: No supported test files found in $TEST_FILES_DIR" | |
| echo "Supported formats: .csv, .tsv, .xlsx, .xls, .json, .geojson, .txt" | |
| kill $HTTP_SERVER_PID 2>/dev/null || true | |
| exit 1 | |
| fi | |
| echo "Test files to process:" | |
| cat /tmp/test_files.txt | |
| echo "" | |
| if [ -f /tmp/skipped_files.csv ] && [ $(wc -l < /tmp/skipped_files.csv) -gt 1 ]; then | |
| echo "Files that will be skipped:" | |
| tail -n +2 /tmp/skipped_files.csv | |
| echo "" | |
| fi | |
| echo "Creating test dataset for DataPusher Plus..." | |
| if ckanapi action package_create \ | |
| name=datapusher-plus-test-local-http \ | |
| title="DataPusher Plus Local Files Test Dataset (HTTP Served)" \ | |
| owner_org=demo-organization \ | |
| -c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then | |
| echo "Test dataset created" | |
| else | |
| echo "Test dataset might already exist, continuing..." | |
| fi | |
| total_files=0 | |
| passed_files=0 | |
| failed_files=0 | |
| skipped_files=0 | |
| while IFS='|' read -r file_name file_url file_format file_mimetype file_desc || [ -n "$file_name" ]; do | |
| [ -z "$file_name" ] && continue | |
| case "$file_name" in | |
| '#'*) continue ;; | |
| esac | |
| if ! curl -s --head "$file_url" > /dev/null; then | |
| echo "SKIP: File not accessible via HTTP: $file_url" | |
| filename_from_url=$(basename "$file_url") | |
| echo "$filename_from_url,File not accessible via HTTP" >> /tmp/skipped_files.csv | |
| skipped_files=$((skipped_files + 1)) | |
| continue | |
| fi | |
| total_files=$((total_files + 1)) | |
| echo "" | |
| echo "==========================================" | |
| echo "Testing File #${total_files}: $file_name" | |
| echo "URL: $file_url" | |
| echo "Format: $file_format" | |
| echo "Description: $file_desc" | |
| echo "File size via HTTP: $(curl -sI "$file_url" | grep -i content-length | cut -d' ' -f2 | tr -d '\r')" | |
| echo "==========================================" | |
| start_time=$(date +%s) | |
| upload_status="FAILED" | |
| resource_id="" | |
| datapusher_status="N/A" | |
| datastore_active="false" | |
| rows_imported="0" | |
| error_message="" | |
| echo "Creating resource with URL for $file_name..." | |
| if resource_response=$(ckanapi action resource_create \ | |
| package_id=datapusher-plus-test-local-http \ | |
| url="$file_url" \ | |
| name="Local HTTP Test: $file_name" \ | |
| description="$file_desc" \ | |
| format="$file_format" \ | |
| mimetype="$file_mimetype" \ | |
| -c /srv/app/src/ckan/test-core.ini 2>&1); then | |
| echo "Resource created successfully for $file_name" | |
| upload_status="SUCCESS" | |
| resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$resource_id" ]; then | |
| resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p') | |
| fi | |
| echo "Resource ID: $resource_id" | |
| if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then | |
| echo "Monitoring DataPusher Plus processing for $file_name..." | |
| max_attempts=90 | |
| for attempt in $(seq 1 $max_attempts); do | |
| sleep 2 | |
| if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \ | |
| "http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then | |
| if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then | |
| datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$datapusher_status" ]; then | |
| datapusher_status="unknown" | |
| fi | |
| datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10) | |
| echo " Attempt $attempt/$max_attempts: DataPusher status = $datapusher_status" | |
| if [ "$datapusher_status" = "complete" ]; then | |
| echo " ✓ DataPusher processing completed for $file_name!" | |
| break | |
| elif [ "$datapusher_status" = "error" ]; then | |
| error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1) | |
| if [ -z "$error_info" ]; then | |
| error_info="DataPusher processing error" | |
| fi | |
| error_message="DataPusher error: $error_info" | |
| echo " ✗ DataPusher processing failed for $file_name: $error_message" | |
| break | |
| fi | |
| else | |
| if [ $attempt -eq $max_attempts ]; then | |
| error_message="DataPusher status API returned success=false" | |
| echo " ✗ DataPusher status API error for $file_name" | |
| fi | |
| fi | |
| else | |
| if [ $attempt -eq $max_attempts ]; then | |
| error_message="Failed to get DataPusher status" | |
| echo " ✗ Cannot reach DataPusher status API for $file_name" | |
| fi | |
| fi | |
| if [ $((attempt % 15)) -eq 0 ]; then | |
| echo " Still processing $file_name... (${attempt}/${max_attempts})" | |
| fi | |
| done | |
| echo "Checking final status for $file_name..." | |
| if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then | |
| if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then | |
| datastore_active="true" | |
| echo " ✓ DataStore activated for $file_name" | |
| if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then | |
| rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/') | |
| if [ -z "$rows_imported" ]; then | |
| rows_imported="0" | |
| fi | |
| echo " ✓ Rows imported for $file_name: $rows_imported" | |
| fi | |
| else | |
| datastore_active="false" | |
| echo " ✗ DataStore not activated for $file_name" | |
| fi | |
| else | |
| echo " ✗ Cannot check final resource status for $file_name" | |
| fi | |
| else | |
| error_message="No valid resource ID extracted for $file_name" | |
| echo " ✗ $error_message" | |
| fi | |
| else | |
| echo " ✗ Resource creation failed for $file_name" | |
| error_message="Resource creation failed: $(echo "$resource_response" | head -1)" | |
| fi | |
| end_time=$(date +%s) | |
| processing_time=$((end_time - start_time)) | |
| timestamp=$(date '+%Y-%m-%d %H:%M:%S') | |
| echo "$timestamp,$file_name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv | |
| if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then | |
| passed_files=$((passed_files + 1)) | |
| echo " 🎉 PASS: $file_name processed successfully" | |
| else | |
| failed_files=$((failed_files + 1)) | |
| echo " ❌ FAIL: $file_name had issues" | |
| fi | |
| echo " Processing time: ${processing_time}s" | |
| echo " Waiting 3 seconds before next file..." | |
| sleep 3 | |
| done < /tmp/test_files.txt | |
| echo "Stopping HTTP server..." | |
| kill $HTTP_SERVER_PID 2>/dev/null || true | |
| wait $HTTP_SERVER_PID 2>/dev/null || true | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| skipped_count=$(tail -n +2 /tmp/skipped_files.csv | wc -l) | |
| skipped_files=$skipped_count | |
| fi | |
| echo "" | |
| echo "==========================================" | |
| echo "=== FINAL TEST RESULTS SUMMARY ===" | |
| echo "==========================================" | |
| echo "Total files in directory: $((total_files + skipped_files))" | |
| echo "Files tested: $total_files" | |
| echo "Files skipped: $skipped_files" | |
| echo "Passed: $passed_files" | |
| echo "Failed: $failed_files" | |
| if [ $total_files -gt 0 ]; then | |
| echo "Success rate (of tested files): $(( passed_files * 100 / total_files ))%" | |
| else | |
| echo "No files were tested" | |
| fi | |
| echo "" | |
| echo "=== Detailed Results ===" | |
| echo "Results saved to: /tmp/test_results.csv" | |
| cat /tmp/test_results.csv | |
| echo "" | |
| if [ $skipped_files -gt 0 ]; then | |
| echo "=== Skipped Files ===" | |
| echo "Skipped files saved to: /tmp/skipped_files.csv" | |
| cat /tmp/skipped_files.csv | |
| echo "" | |
| fi | |
| if [ $total_files -eq 0 ] && [ $skipped_files -gt 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: NO TESTABLE FILES" | |
| echo "All files in directory were skipped - check file formats and accessibility" | |
| elif [ $total_files -eq 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: NO FILES TESTED" | |
| echo "No test files found in $TEST_FILES_DIR" | |
| elif [ $failed_files -eq 0 ] && [ $passed_files -gt 0 ]; then | |
| echo "" | |
| echo "🎉 OVERALL RESULT: ALL TESTED FILES PASSED" | |
| echo "DataPusher Plus is working correctly with all testable local files" | |
| elif [ $passed_files -gt 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: PARTIAL SUCCESS" | |
| echo "DataPusher Plus works with some local files but has issues with others" | |
| else | |
| echo "" | |
| echo "❌ OVERALL RESULT: ALL TESTED FILES FAILED" | |
| echo "DataPusher Plus is not working correctly with local files" | |
| fi | |
| echo "" | |
| echo "Test completed at: $(date)" | |
| - name: Generate Combined Test Results and Worker Analysis | |
| if: always() | |
| run: | | |
| set -eu | |
| echo "=== Generating Combined Test Results and Worker Analysis ===" | |
| echo "=== Processing DataPusher Plus Worker Logs ===" | |
| if [ ! -f /tmp/ckan_worker.log ]; then | |
| echo "No worker log file found at /tmp/ckan_worker.log" | |
| echo "timestamp,job_id,file_name,status,qsv_version,file_format,encoding,normalized,valid_csv,sorted,db_safe_headers,analysis,records,total_time,download_time,analysis_time,copying_time,indexing_time,formulae_time,metadata_time,rows_copied,columns_indexed,error_type,error_message,data_quality_score,processing_efficiency" > /tmp/worker_analysis.csv | |
| else | |
| echo "Worker log file size: $(du -h /tmp/ckan_worker.log | cut -f1)" | |
| echo "Running enhanced Python log analyzer..." | |
| python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py analyze /tmp/ckan_worker.log /tmp/worker_analysis.csv | |
| fi | |
| if [ ! -f /tmp/test_results.csv ] && [ ! -f /tmp/worker_analysis.csv ]; then | |
| echo "No test results or worker analysis files found" | |
| echo "# DataPusher Plus Test Results" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "No test data available to analyze" >> $GITHUB_STEP_SUMMARY | |
| exit 0 | |
| fi | |
| total_tests=0 | |
| skipped_count=0 | |
| total_files_in_dir=0 | |
| passed=0 | |
| failed=0 | |
| error_count=0 | |
| tested_success_rate=0 | |
| overall_success_rate=0 | |
| if [ -f /tmp/test_results.csv ]; then | |
| total_lines=$(wc -l < /tmp/test_results.csv) | |
| total_tests=$((total_lines - 1)) | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| skipped_lines=$(wc -l < /tmp/skipped_files.csv) | |
| skipped_count=$((skipped_lines - 1)) | |
| fi | |
| total_files_in_dir=$((total_tests + skipped_count)) | |
| if [ $total_tests -gt 0 ]; then | |
| passed=$(grep -c ",SUCCESS,.*,complete,true," /tmp/test_results.csv 2>/dev/null || echo "0") | |
| failed=$(tail -n +2 /tmp/test_results.csv | grep -v ",SUCCESS,.*,complete,true," | wc -l) | |
| error_count=$(grep -c ",error," /tmp/test_results.csv 2>/dev/null || echo "0") | |
| error_count=$(echo "$error_count" | tr -d '\n') | |
| tested_success_rate=$(( passed * 100 / total_tests )) | |
| if [ $total_files_in_dir -gt 0 ]; then | |
| overall_success_rate=$(( passed * 100 / total_files_in_dir )) | |
| fi | |
| fi | |
| fi | |
| worker_analysis_available=false | |
| if [ -f /tmp/worker_analysis.csv ]; then | |
| worker_analysis_available=true | |
| fi | |
| { | |
| echo "# DataPusher Plus Test Results" | |
| echo "" | |
| echo "## Summary" | |
| echo "" | |
| echo "| Metric | Value |" | |
| echo "|--------|-------|" | |
| echo "| Total Files in Directory | $total_files_in_dir |" | |
| echo "| Files Tested | $total_tests |" | |
| echo "| Files Skipped | $skipped_count |" | |
| echo "| Passed | $passed |" | |
| echo "| Failed | $failed |" | |
| echo "| Errors | $error_count |" | |
| echo "| Success Rate (Tested Files) | ${tested_success_rate}% |" | |
| echo "| Success Rate (All Files) | ${overall_success_rate}% |" | |
| echo "" | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "## Skipped Files" | |
| echo "" | |
| echo "| File Name | Reason Skipped |" | |
| echo "|-----------|----------------|" | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| tail -n +2 /tmp/skipped_files.csv | while IFS=',' read -r file_name reason; do | |
| echo "| $file_name | $reason |" | |
| done | |
| fi | |
| echo "" | |
| fi | |
| if [ "$worker_analysis_available" = true ]; then | |
| total_jobs=$(tail -n +2 /tmp/worker_analysis.csv | wc -l) | |
| if [ $total_jobs -gt 0 ]; then | |
| echo "## Complete Job Analysis" | |
| echo "" | |
| echo "| # | File Name | Status | Records | Columns | Time (s) | Valid CSV | Headers Safe | Error Type | Quality Score |" | |
| echo "|---|-----------|--------|---------|---------|----------|-----------|--------------|------------|---------------|" | |
| counter=1 | |
| tail -n +2 /tmp/worker_analysis.csv | while IFS=',' read timestamp job_id file_name status qsv_version file_format encoding normalized valid_csv sorted db_safe_headers analysis records total_time download_time analysis_time copying_time indexing_time formulae_time metadata_time rows_copied columns_indexed error_type error_message data_quality_score processing_efficiency; do | |
| full_file_name=$(echo "$file_name" | sed 's/\.\.\.//') | |
| full_error_type="$error_type" | |
| full_headers="$db_safe_headers" | |
| [ -z "$records" ] && records="0" | |
| [ -z "$columns_indexed" ] && columns_indexed="0" | |
| [ -z "$total_time" ] && total_time="0" | |
| [ -z "$data_quality_score" ] && data_quality_score="-" | |
| [ -z "$full_error_type" ] && full_error_type="-" | |
| case "$status" in | |
| "SUCCESS") status_display="✅ SUCCESS" ;; | |
| "ERROR") status_display="❌ ERROR" ;; | |
| "INCOMPLETE") status_display="⏸️ INCOMPLETE" ;; | |
| *) status_display="❓ $status" ;; | |
| esac | |
| echo "| $counter | $full_file_name | $status_display | $records | $columns_indexed | $total_time | $valid_csv | $full_headers | $full_error_type | $data_quality_score |" | |
| counter=$((counter + 1)) | |
| done | |
| echo "" | |
| success_jobs=$(grep -c ",SUCCESS," /tmp/worker_analysis.csv || echo "0") | |
| error_jobs=$(grep -c ",ERROR," /tmp/worker_analysis.csv || echo "0") | |
| echo "## File Analysis" | |
| echo "" | |
| if [ $success_jobs -gt 0 ]; then | |
| echo "### File Formats Processed" | |
| echo "" | |
| formats=$(tail -n +2 /tmp/worker_analysis.csv | grep ",SUCCESS," | cut -d',' -f6 | sort | uniq -c) | |
| if [ -n "$formats" ]; then | |
| echo "| Format | Files | Percentage |" | |
| echo "|--------|-------|------------|" | |
| echo "$formats" | while read count format; do | |
| percentage=$((count * 100 / success_jobs)) | |
| case "$format" in | |
| "CSV") format_icon="📊" ;; | |
| "XLSX"|"XLS") format_icon="📈" ;; | |
| "JSON") format_icon="🔧" ;; | |
| "TXT") format_icon="📝" ;; | |
| *) format_icon="📄" ;; | |
| esac | |
| echo "| $format_icon $format | $count | $percentage% |" | |
| done | |
| else | |
| echo "❌ No format data available" | |
| fi | |
| echo "" | |
| echo "### Encoding Distribution" | |
| echo "" | |
| encodings=$(tail -n +2 /tmp/worker_analysis.csv | grep ",SUCCESS," | cut -d',' -f7 | sort | uniq -c) | |
| if [ -n "$encodings" ]; then | |
| echo "| Encoding | Files | Status |" | |
| echo "|----------|-------|--------|" | |
| echo "$encodings" | while read count encoding; do | |
| if [ -n "$encoding" ]; then | |
| if [ "$encoding" = "UTF-8" ] || [ "$encoding" = "UTF" ]; then | |
| status_icon="✅" | |
| else | |
| status_icon="⚠️" | |
| fi | |
| echo "| $status_icon $encoding | $count | Compatible |" | |
| else | |
| echo "| ❓ Unknown | $count | Needs Review |" | |
| fi | |
| done | |
| else | |
| echo "❌ No encoding data available" | |
| fi | |
| echo "" | |
| fi | |
| echo "## Error Analysis" | |
| echo "" | |
| if [ $error_jobs -gt 0 ]; then | |
| echo "### Failed Files Details" | |
| echo "" | |
| echo "" | |
| echo "| File | Error Type | Error Message |" | |
| echo "|------|------------|---------------|" | |
| tail -n +2 /tmp/worker_analysis.csv | grep ",ERROR," | cut -d',' -f3,23,24 | while IFS=',' read file error_type error_msg; do | |
| clean_error=$(echo "$error_msg" | sed 's/^"//;s/"$//') | |
| clean_file=$(echo "$file" | sed 's/\.\.\.//') | |
| echo "| $clean_file | $error_type | $clean_error |" | |
| done | |
| echo "" | |
| else | |
| echo "✅ **No errors found in worker logs** - All processed jobs completed successfully!" | |
| echo "" | |
| fi | |
| echo "## Performance Anomalies" | |
| echo "" | |
| anomalies_output=$(python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py anomalies /tmp/worker_analysis.csv 2>/dev/null || echo "") | |
| if [ -z "$anomalies_output" ]; then | |
| echo "✅ **No performance anomalies detected** - All jobs processed within expected timeframes" | |
| else | |
| echo "⚠️ **Performance issues detected:**" | |
| echo "" | |
| echo "$anomalies_output" | sed 's/ANOMALY: /🐌 **Slow Processing**: /' | |
| fi | |
| echo "" | |
| fi | |
| fi | |
| if [ $total_tests -eq 0 ] && [ $skipped_count -gt 0 ]; then | |
| echo "## No Testable Files ⚠️" | |
| echo "" | |
| echo "All files in the test directory were skipped." | |
| echo "" | |
| echo "**Common reasons for skipped files:**" | |
| echo "- Unsupported file formats (only .csv, .tsv, .xlsx, .json, .geojson, .txt supported)" | |
| echo "- Files not accessible via HTTP server" | |
| echo "- Hidden files or system files" | |
| echo "" | |
| echo "**Recommendation:** Add supported data files to test directory." | |
| elif [ $total_tests -eq 0 ]; then | |
| echo "## No Files Found ❌" | |
| echo "" | |
| echo "No files found in test directory to test." | |
| elif [ $passed -eq $total_tests ]; then | |
| echo "## All Tested Files Passed! 🎉" | |
| echo "" | |
| echo "DataPusher Plus is working correctly with all testable files." | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "" | |
| echo "**Note:** $skipped_count file(s) were skipped. See the Skipped Files section above for details." | |
| fi | |
| elif [ $passed -gt 0 ]; then | |
| echo "## Result: Partial Success" | |
| echo "" | |
| echo "DataPusher Plus works with some files but has issues with others." | |
| echo "" | |
| else | |
| echo "## Result: All Tested Files Failed ❌" | |
| echo "" | |
| echo "DataPusher Plus is not working correctly with any tested files." | |
| echo "" | |
| echo "### All Failed Files:" | |
| if [ -f /tmp/test_results.csv ]; then | |
| tail -n +2 /tmp/test_results.csv | while IFS=',' read -r timestamp file_name upload_status resource_id datapusher_status datastore_active rows_imported processing_time error_message; do | |
| clean_error=$(echo "$error_message" | sed 's/^"//;s/"$//') | |
| echo "- **$file_name**: $clean_error" | |
| done | |
| fi | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "" | |
| echo "### Files Not Even Attempted:" | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| tail -n +2 /tmp/skipped_files.csv | while IFS=',' read -r file_name reason; do | |
| echo "- **$file_name**: $reason" | |
| done | |
| fi | |
| fi | |
| fi | |
| echo "" | |
| echo "---" | |
| echo "" | |
| echo "**Analysis completed:** $(date '+%A, %B %d, %Y at %I:%M %p %Z')" | |
| } > /tmp/combined_summary.md | |
| cat /tmp/combined_summary.md >> $GITHUB_STEP_SUMMARY | |
| echo "Combined analysis summary generated and added to workflow summary" | |
| echo "" | |
| echo "Preview of generated summary:" | |
| echo "==================================" | |
| cat /tmp/combined_summary.md | |
| - name: Upload test results as artifact | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: datapusher-plus-test-results | |
| path: | | |
| /tmp/test_results.csv | |
| /tmp/ckan_stdout.log | |
| /tmp/ckan_worker.log | |
| /tmp/worker_analysis.csv | |
| retention-days: 3 | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| echo "Stopping any running CKAN processes..." | |
| pkill -f "ckan.*run" || true | |
| echo "Cleanup completed" |