Automated DataPusher+ Testing Run #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Automated DataPusher+ Testing Run | |
| on: | |
| workflow_dispatch: | |
| env: | |
| FILES_DIR: "custom" | |
| DATAPUSHER_BRANCH: "main" | |
| CKAN_VERSION: "2.11" | |
| POSTGRES_PASSWORD: postgres | |
| CKAN_DB_PASSWORD: pass | |
| CKAN_SITE_URL: http://localhost:5000 | |
| CKAN_SITE_ID: default | |
| CKAN_SITE_TITLE: "CKAN Test Instance" | |
| QSV_VER : "7.1.0" | |
| jobs: | |
| setup: | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ckan/ckan-dev:2.11 | |
| options: --user root | |
| services: | |
| solr: | |
| image: ckan/ckan-solr:2.11-solr9 | |
| ports: ["8983:8983"] | |
| postgres: | |
| image: ckan/ckan-postgres-dev:2.11 | |
| env: | |
| POSTGRES_USER: postgres | |
| POSTGRES_PASSWORD: postgres | |
| POSTGRES_DB: postgres | |
| options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 | |
| redis: | |
| image: redis:3 | |
| ports: ["6379:6379"] | |
| # Job-specific environment (these will be available inside the container) | |
| env: | |
| CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test | |
| CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test | |
| CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test | |
| CKAN_SOLR_URL: http://solr:8983/solr/ckan | |
| CKAN_REDIS_URL: redis://redis:6379/1 | |
| CKAN_SITE_URL: http://localhost:5000 | |
| steps: | |
| - name: Fix permissions and install essential tools | |
| run: | | |
| mkdir -p /__w/_temp | |
| chmod -R 777 /__w/_temp | |
| chmod -R 777 /__w/ | |
| apt-get update -y | |
| apt-get install -y curl wget net-tools procps postgresql-client jq | |
| echo "Essential tools installed successfully" | |
| - uses: actions/checkout@v4 | |
| - name: Wait for PostgreSQL to be ready | |
| run: | | |
| echo "Waiting for PostgreSQL to be ready..." | |
| timeout=90 | |
| while [ $timeout -gt 0 ]; do | |
| if PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "SELECT 1;" >/dev/null 2>&1; then | |
| echo "PostgreSQL is ready!" | |
| break | |
| fi | |
| echo "Postgres not ready yet ($timeout s left)..." | |
| sleep 3 | |
| timeout=$((timeout-3)) | |
| done | |
| if [ $timeout -le 0 ]; then | |
| echo "Timeout waiting for PostgreSQL" | |
| exit 1 | |
| fi | |
| - name: Setup database users and permissions | |
| run: | | |
| set -eu | |
| echo "Creating database users (if not exist)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='ckan_default'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_write'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_write WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_read'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_read WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| echo "Creating databases (if not exist)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='ckan_test'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE ckan_test OWNER ckan_default;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='datastore_test'" | grep -q 1 || \ | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE datastore_test OWNER ckan_default;" | |
| echo "Granting permissions (best-effort)..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE ckan_test TO ckan_default;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE datastore_test TO datastore_write;" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT CONNECT ON DATABASE datastore_test TO datastore_read;" | |
| echo "Database setup completed" | |
| - name: Install requirements, ckanapi and datapusher-plus | |
| run: | | |
| set -eu | |
| # Use pip from the container (image usually has Python/pip) | |
| python3 -m pip install --upgrade pip setuptools wheel | |
| if [ -f requirements.txt ]; then | |
| pip install -r requirements.txt | |
| fi | |
| if [ -f requirements-dev.txt ]; then | |
| pip install -r requirements-dev.txt | |
| fi | |
| # install current repo editable if present | |
| if [ -f setup.py ] || [ -f pyproject.toml ]; then | |
| pip install -e . | |
| fi | |
| # Ensure ckanapi and datapusher-plus are available | |
| pip install --upgrade ckanapi | |
| pip install datasize | |
| apt install -y python3-virtualenv python3-dev python3-pip python3-wheel build-essential libxslt1-dev libxml2-dev zlib1g-dev git libffi-dev libpq-dev uchardet unzip | |
| # Install datapusher-plus package (the pip package name is typically datapusher-plus) | |
| echo "Installing datapusher-plus from branch: $DATAPUSHER_BRANCH" | |
| pip install -e "git+https://github.com/dathere/datapusher-plus.git@$DATAPUSHER_BRANCH#egg=datapusher-plus" | |
| pip install -e 'git+https://github.com/ckan/ckanext-scheming.git#egg=ckanext-scheming' | |
| echo "Installed ckanapi and datapusher-plus (best-effort)" | |
| - name: Install qsv (musl static) | |
| run: | | |
| set -eu | |
| echo "Attempting to download static qsv musl binary (best-effort)..." | |
| QSV_ZIP="qsv-${QSV_VER}-x86_64-unknown-linux-musl.zip" | |
| QSV_URL="https://github.com/dathere/qsv/releases/download/${QSV_VER}/${QSV_ZIP}" | |
| mkdir -p /tmp/qsv && cd /tmp/qsv | |
| if wget -q --spider "$QSV_URL"; then | |
| wget -q "$QSV_URL" -O "$QSV_ZIP" | |
| unzip -o "$QSV_ZIP" | |
| # try to find 'qsv' or 'qsvdp' binary | |
| if [ -f qsvdp ]; then | |
| mv qsvdp /usr/local/bin/qsvdp | |
| chmod +x /usr/local/bin/qsvdp | |
| echo "Installed qsvdp to /usr/local/bin/qsvdp" | |
| elif [ -f qsv ]; then | |
| mv qsv /usr/local/bin/qsv | |
| chmod +x /usr/local/bin/qsv | |
| echo "Installed qsv to /usr/local/bin/qsv" | |
| else | |
| echo "Downloaded archive but could not find qsv binary inside" | |
| fi | |
| else | |
| echo "qsv release URL not reachable; skipping qsv install" | |
| fi | |
| /usr/local/bin/qsvdp --version >/dev/null 2>&1 || /usr/local/bin/qsv --version >/dev/null 2>&1 || echo "qsv not installed or not runnable (this is okay for plugin presence test)." | |
| - name: Setup CKAN configuration (/srv/app/src/ckan/test-core.ini) | |
| run: | | |
| set -eu | |
| # Defensive URL substitutions (keep these) | |
| sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini | |
| if ! grep -q "^solr_url" /srv/app/src/ckan/test-core.ini; then | |
| echo "solr_url = ${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" >> /srv/app/src/ckan/test-core.ini | |
| fi | |
| if ! grep -q "^ckan.redis.url" /srv/app/src/ckan/test-core.ini; then | |
| echo "ckan.redis.url = ${CKAN_REDIS_URL:-redis://redis:6379/1}" >> /srv/app/src/ckan/test-core.ini | |
| fi | |
| # Desired values (use env vars when present, otherwise fall back) | |
| CKAN_SITE_URL="${CKAN_SITE_URL:-http://localhost:5000}" | |
| CKAN_SQLALCHEMY_URL="${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}" | |
| CKAN_DATASTORE_WRITE_URL="${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}" | |
| CKAN_DATASTORE_READ_URL="${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}" | |
| CKAN_SOLR_URL="${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" | |
| CKAN_REDIS_URL="${CKAN_REDIS_URL:-redis://redis:6379/1}" | |
| # create temp files to hold lists (POSIX sh-safe) | |
| REPLACE_FILE="$(mktemp)" | |
| ADD_FILE="$(mktemp)" | |
| MISSING_ADD_FILE="$(mktemp)" | |
| : > "$REPLACE_FILE" | |
| : > "$ADD_FILE" | |
| : > "$MISSING_ADD_FILE" | |
| # REPLACE_ENTRIES (key|value) - write expanded lines to REPLACE_FILE | |
| printf '%s\n' \ | |
| "ckan.site_url|${CKAN_SITE_URL}" \ | |
| "sqlalchemy.url|${CKAN_SQLALCHEMY_URL}" \ | |
| "ckan.datastore.write_url|${CKAN_DATASTORE_WRITE_URL}" \ | |
| "ckan.datastore.read_url|${CKAN_DATASTORE_READ_URL}" \ | |
| "solr_url|${CKAN_SOLR_URL}" \ | |
| "ckan.redis.url|${CKAN_REDIS_URL}" \ | |
| > "$REPLACE_FILE" | |
| # ADD_LINES content (one entry per line). Comments start with '#' | |
| cat > "$ADD_FILE" <<'EOF' | |
| ckan.site_id = default | |
| ckan.site_title = CKAN Test | |
| ckan.auth.create_default_api_keys = true | |
| ckanext.datapusher_plus.qsv_bin = /usr/local/bin/qsvdp | |
| scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml | |
| scheming.presets = ckanext.scheming:presets.json | |
| scheming.dataset_fallback = false | |
| ckanext.datapusher_plus.use_proxy = false | |
| ckanext.datapusher_plus.download_proxy = | |
| ckanext.datapusher_plus.ssl_verify = false | |
| # supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas | |
| ckanext.datapusher_plus.upload_log_level = INFO | |
| ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip | |
| ckanext.datapusher_plus.pii_screening = false | |
| ckanext.datapusher_plus.pii_found_abort = false | |
| ckanext.datapusher_plus.pii_regex_resource_id_or_alias = | |
| ckanext.datapusher_plus.pii_show_candidates = false | |
| ckanext.datapusher_plus.pii_quick_screen = false | |
| ckanext.datapusher_plus.preview_rows = 100 | |
| ckanext.datapusher_plus.download_timeout = 300 | |
| ckanext.datapusher_plus.max_content_length = 1256000000000 | |
| ckanext.datapusher_plus.chunk_size = 16384 | |
| ckanext.datapusher_plus.default_excel_sheet = 0 | |
| ckanext.datapusher_plus.sort_and_dupe_check = true | |
| ckanext.datapusher_plus.dedup = false | |
| ckanext.datapusher_plus.unsafe_prefix = unsafe_ | |
| ckanext.datapusher_plus.reserved_colnames = _id | |
| ckanext.datapusher_plus.prefer_dmy = false | |
| ckanext.datapusher_plus.ignore_file_hash = true | |
| ckanext.datapusher_plus.auto_index_threshold = 3 | |
| ckanext.datapusher_plus.auto_index_dates = true | |
| ckanext.datapusher_plus.auto_unique_index = true | |
| ckanext.datapusher_plus.summary_stats_options = | |
| ckanext.datapusher_plus.add_summary_stats_resource = false | |
| ckanext.datapusher_plus.summary_stats_with_preview = false | |
| ckanext.datapusher_plus.qsv_stats_string_max_length = 32767 | |
| ckanext.datapusher_plus.qsv_dates_whitelist = date,time,due,open,close,created | |
| ckanext.datapusher_plus.qsv_freq_limit = 10 | |
| ckanext.datapusher_plus.auto_alias = true | |
| ckanext.datapusher_plus.auto_alias_unique = false | |
| ckanext.datapusher_plus.copy_readbuffer_size = 1048576 | |
| ckanext.datapusher_plus.type_mapping = {"String": "text", "Integer": "numeric","Float": "numeric","DateTime": "timestamp","Date": "date","NULL": "text"} | |
| ckanext.datapusher_plus.auto_spatial_simplication = true | |
| ckanext.datapusher_plus.spatial_simplication_relative_tolerance = 0.1 | |
| ckanext.datapusher_plus.latitude_fields = latitude,lat | |
| ckanext.datapusher_plus.longitude_fields = longitude,long,lon | |
| ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache | |
| ckanext.datapusher_plus.auto_unzip_one_file = true | |
| EOF | |
| if [ -f /srv/app/src/ckan/test-core.ini ]; then | |
| echo "Patching selective keys in /srv/app/src/ckan/test-core.ini (only the keys you listed)..." | |
| # Ensure single debug = true under [DEFAULT]: remove existing debug lines in DEFAULT then add one | |
| awk 'BEGIN{in=0} | |
| /^\[DEFAULT\]/{ print; in=1; next } | |
| /^\[.*\]/{ if(in){ print "debug = true"; in=0 } } | |
| { | |
| if(in){ | |
| if($1 == "debug") next | |
| } else { | |
| } | |
| } | |
| END { if(in) print "debug = true" }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.tmp && mv /srv/app/src/ckan/test-core.ini.tmp /srv/app/src/ckan/test-core.ini | |
| # Process REPLACE_FILE: replace if present, otherwise write to missing file | |
| while IFS= read -r entry || [ -n "$entry" ]; do | |
| key="$(printf '%s' "$entry" | cut -d'|' -f1)" | |
| value="$(printf '%s' "$entry" | cut -d'|' -f2-)" | |
| # escape backslashes and ampersands for sed replacement | |
| esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')" | |
| if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then | |
| sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE" | |
| fi | |
| done < "$REPLACE_FILE" | |
| # Process ADD_FILE: replace if present, otherwise collect to missing file | |
| while IFS= read -r ln || [ -n "$ln" ]; do | |
| # comment lines - check if exact comment exists | |
| case "$ln" in | |
| \#*) | |
| if ! grep -Fq "$ln" /srv/app/src/ckan/test-core.ini; then | |
| printf '%s\n' "$ln" >> "$MISSING_ADD_FILE" | |
| fi | |
| ;; | |
| *) | |
| key="$(printf '%s' "$ln" | cut -d'=' -f1 | sed 's/[[:space:]]*$//')" | |
| value="$(printf '%s' "$ln" | cut -d'=' -f2- | sed 's/^[[:space:]]*//')" | |
| esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')" | |
| if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then | |
| sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE" | |
| fi | |
| ;; | |
| esac | |
| done < "$ADD_FILE" | |
| # If there are missing lines, insert them after the first [app:main] header, or append the section | |
| if [ -s "$MISSING_ADD_FILE" ]; then | |
| awk -v addfile="$MISSING_ADD_FILE" ' | |
| BEGIN{ | |
| inserted=0 | |
| while ((getline line < addfile) > 0) { add[++na]=line } | |
| close(addfile) | |
| } | |
| { | |
| if(!inserted && $0=="[app:main]") { | |
| for(i=1;i<=na;i++) print add[i] | |
| inserted=1 | |
| } | |
| } | |
| END{ | |
| if(!inserted){ | |
| print "[app:main]" | |
| for(i=1;i<=na;i++) print add[i] | |
| } | |
| }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini | |
| fi | |
| # Final defensive catch: ensure sqlalchemy and datastore URLs reflect env (again) | |
| sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL}|g" /srv/app/src/ckan/test-core.ini | |
| sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL}|g" /srv/app/src/ckan/test-core.ini | |
| else | |
| echo "/srv/app/src/ckan/test-core.ini not found — no selective patching performed." | |
| fi | |
| # Append datapusher plugin(s) to ckan.plugins if present; otherwise add a plugins line | |
| REQUIRED_PLUGINS="datastore datapusher_plus scheming_datasets" | |
| if grep -q "^ckan.plugins" /srv/app/src/ckan/test-core.ini; then | |
| echo "Appending required plugins to existing ckan.plugins line" | |
| current=$(grep "^ckan.plugins" /srv/app/src/ckan/test-core.ini | head -n1 | cut -d'=' -f2-) | |
| for p in $REQUIRED_PLUGINS; do | |
| echo "$current" | grep -qw "$p" || current="$current $p" | |
| done | |
| awk -v new="ckan.plugins = $current" 'BEGIN{done=0} {if(!done && $1=="ckan.plugins") {print new; done=1} else print $0}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini | |
| else | |
| echo "ckan.plugins = $REQUIRED_PLUGINS" >> /srv/app/src/ckan/test-core.ini | |
| echo "Added ckan.plugins line with required plugins." | |
| fi | |
| echo "---- /srv/app/src/ckan/test-core.ini (cat) ----" | |
| cat /srv/app/src/ckan/test-core.ini | |
| echo "---- end ----" | |
| - name: Initialize CKAN database | |
| run: | | |
| echo "Testing connectivity with CKAN DB user..." | |
| if ! PGPASSWORD=$CKAN_DB_PASSWORD psql -h postgres -U ckan_default -d ckan_test -c "SELECT 1;" >/dev/null 2>&1; then | |
| echo "Cannot connect as ckan_default. Attempting to create database owner and db..." | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER IF NOT EXISTS ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';" | |
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE IF NOT EXISTS ckan_test OWNER ckan_default;" | |
| fi | |
| echo "Running ckan db init (may be idempotent)..." | |
| if ckan -c /srv/app/src/ckan/test-core.ini db init; then | |
| echo "CKAN DB initialized." | |
| else | |
| echo "ckan db init returned non-zero; continuing (may already be initialized)." | |
| fi | |
| echo "Setting datastore permissions..." | |
| if ckan -c /srv/app/src/ckan/test-core.ini datastore set-permissions | PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres --set ON_ERROR_STOP=1; then | |
| echo "Datastore permissions set." | |
| else | |
| echo "Datastore permission step returned non-zero; continuing." | |
| fi | |
| - name: Start CKAN server | |
| run: | | |
| set -eu | |
| echo "Starting CKAN server in background..." | |
| # Use nohup to keep it running in background | |
| nohup ckan -c /srv/app/src/ckan/test-core.ini run --host 0.0.0.0 --port 5000 --disable-reloader > /tmp/ckan_stdout.log 2>&1 & | |
| CKAN_PID=$! | |
| echo "CKAN PID=$CKAN_PID" | |
| # wait for port / API | |
| timeout=120 | |
| while [ $timeout -gt 0 ]; do | |
| if ! kill -0 "$CKAN_PID" >/dev/null 2>&1; then | |
| echo "CKAN process died. Showing last lines of log:" | |
| tail -n 200 /tmp/ckan_stdout.log | |
| exit 1 | |
| fi | |
| if curl -fsS "${CKAN_SITE_URL}/api/3/action/status_show" >/dev/null 2>&1; then | |
| echo "CKAN API responding" | |
| break | |
| fi | |
| echo "Waiting for CKAN API... ($timeout s left)" | |
| sleep 3 | |
| timeout=$((timeout-3)) | |
| done | |
| if [ $timeout -le 0 ]; then | |
| echo "Timeout waiting for CKAN to start. Dumping logs..." | |
| tail -n 200 /tmp/ckan_stdout.log | |
| ss -tlnp || netstat -tlnp | |
| exit 1 | |
| fi | |
| echo "CKAN started successfully" | |
| - name: Create sysadmin user admin_ckan and get apikey | |
| run: | | |
| set -eu | |
| echo "Creating user admin_ckan..." | |
| user_response=$(ckanapi action user_create --config /srv/app/src/ckan/test-core.ini \ | |
| name=admin_ckan \ | |
| [email protected] \ | |
| password=test1234 \ | |
| fullname="CKAN Administrator" \ | |
| with_apitoken=true \ | |
| about="Created by GitHub Actions test" 2>/dev/null) || echo "user_create returned non-zero (user may already exist)" | |
| echo "User creation response: $user_response" | |
| echo "Converting admin_ckan user to sysadmin..." | |
| ckan -c /srv/app/src/ckan/test-core.ini sysadmin add admin_ckan | |
| echo "User admin_ckan promoted to sysadmin" | |
| # Extract only the JSON part (everything from { to }) | |
| json_response=$(echo "$user_response" | sed -n '/{/,/}/p') | |
| # Extract API key from the JSON | |
| api_key=$(echo "$json_response" | jq -r '.token // empty') | |
| if [ -n "$api_key" ] && [ "$api_key" != "null" ] && [ "$api_key" != "empty" ]; then | |
| echo "CKAN_API_KEY=$api_key" >> $GITHUB_ENV | |
| echo "API key saved: $api_key" | |
| else | |
| echo "No API key found in response" | |
| fi | |
| echo "User admin_ckan creation completed" | |
| - name: Create API token for datapusher-plus and add to config | |
| run: | | |
| set -eu | |
| echo "Creating API token for datapusher-plus service account..." | |
| # Create API token for admin_ckan user specifically for datapusher-plus | |
| echo "Running: ckan user token add admin_ckan dpplus" | |
| dp_token_output=$(ckan -c /srv/app/src/ckan/test-core.ini user token add admin_ckan dpplus 2>&1) | |
| echo "Full token creation output:" | |
| echo "$dp_token_output" | |
| dp_token=$(echo "$dp_token_output" | tail -n 1 | tr -d '\t') | |
| echo "Extracted token: '$dp_token'" | |
| if [ -n "$dp_token" ] && [ "$dp_token" != "null" ]; then | |
| echo "Created datapusher-plus API token: $dp_token" | |
| # Add the token to the CKAN configuration file | |
| ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$dp_token" | |
| # Verify it was added | |
| echo "Verifying token was added to config:" | |
| grep "ckanext.datapusher_plus.api_token" /srv/app/src/ckan/test-core.ini || echo "Token not found in config!" | |
| # Also set in environment for potential use in other steps | |
| echo "DATAPUSHER_PLUS_API_TOKEN=$dp_token" >> $GITHUB_ENV | |
| echo "API token added to CKAN configuration successfully" | |
| else | |
| echo "Failed to create API token for datapusher-plus" | |
| echo "Using main CKAN API key as fallback..." | |
| ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$CKAN_API_KEY" | |
| fi | |
| - name: Create organization with ckanapi | |
| run: | | |
| set -eu | |
| echo "Creating organization demo-organization (idempotent)..." | |
| ckanapi action organization_create --config /srv/app/src/ckan/test-core.ini \ | |
| name=demo-organization \ | |
| title="Demo Data Publishing Organization" \ | |
| description="Demo org created by GitHub Actions for datapusher-plus testing." || echo "organization_create returned non-zero (may already exist)" | |
| echo "Add admin_ckan as admin to the organization" | |
| ckanapi action organization_member_create --config /srv/app/src/ckan/test-core.ini \ | |
| id=demo-organization username=admin_ckan role=admin || echo "organization_member_create returned non-zero (may already be member)" | |
| - name: Create dataset with ckanapi | |
| run: | | |
| set -eu | |
| echo "Creating dataset my-first-dataset (idempotent)..." | |
| if ckanapi action package_create \ | |
| name=my-first-dataset \ | |
| title="My First Comprehensive Dataset" \ | |
| notes="This is a comprehensive demo dataset created via ckanapi and GitHub Actions for testing CKAN functionality and datapusher-plus integration." \ | |
| owner_org=demo-organization \ | |
| license_id=cc-by \ | |
| version=1.0.0 \ | |
| author="GitHub Actions Automation" \ | |
| [email protected] \ | |
| maintainer="CKAN Admin" \ | |
| [email protected] \ | |
| url=https://github.com/your-repo/your-project \ | |
| private:false \ | |
| state=active \ | |
| 'tags:[{"name":"demo"},{"name":"test"},{"name":"github-actions"},{"name":"automation"},{"name":"csv-data"},{"name":"datapusher-plus"}]' \ | |
| -c /srv/app/src/ckan/test-core.ini; then | |
| echo "Dataset created successfully!" | |
| else | |
| echo "Dataset might already exist, continuing..." | |
| fi | |
| - name: Add resource to dataset with ckanapi | |
| run: | | |
| set -eu | |
| echo "Adding resource to my-first-dataset..." | |
| if ckanapi action resource_create \ | |
| package_id=my-first-dataset \ | |
| url="https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/100kb.csv" \ | |
| name="Sample CSV Data - 100KB Test File" \ | |
| description="Test CSV resource for datapusher-plus pipeline." \ | |
| format=CSV \ | |
| mimetype="text/csv" \ | |
| -c /srv/app/src/ckan/test-core.ini; then | |
| echo "Resource created successfully!" | |
| else | |
| echo "Resource creation failed" | |
| ckanapi action package_show id=my-first-dataset -c /srv/app/src/ckan/test-core.ini | |
| exit 1 | |
| fi | |
| - name: Display CKAN instance inventory | |
| run: | | |
| set -eu | |
| echo "=== CKAN Status (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/status_show" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Datasets (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/package_list" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Organizations (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/organization_list" | python3 -m json.tool | |
| echo "" | |
| echo "=== All Users (HTTP API) ===" | |
| curl -s "http://localhost:5000/api/3/action/user_list" | python3 -m json.tool | |
| - name: Test datastore functionality | |
| run: | | |
| set -eu | |
| echo "Testing datastore functionality..." | |
| # Test 1: Check if datastore is accessible by querying table metadata | |
| echo "=== Testing datastore read access ===" | |
| metadata_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=_table_metadata") | |
| echo "Table metadata response: $metadata_response" | |
| if echo "$metadata_response" | jq -e '.success == true' >/dev/null 2>&1; then | |
| echo "✓ Datastore read access working" | |
| else | |
| echo "✗ Datastore read access failed" | |
| exit 1 | |
| fi | |
| # Test 2: Create a test datastore table | |
| echo "=== Testing datastore write access ===" | |
| test_response=$(curl -s -X POST \ | |
| -H "Content-Type: application/json" \ | |
| -H "Authorization: $CKAN_API_KEY" \ | |
| -d '{ | |
| "resource": {"package_id": "my-first-dataset"}, | |
| "fields": [{"id": "test_col", "type": "text"}, {"id": "value", "type": "int"}], | |
| "records": [{"test_col": "hello", "value": 1}, {"test_col": "world", "value": 2}] | |
| }' \ | |
| "http://localhost:5000/api/3/action/datastore_create") | |
| echo "Test table creation response: $test_response" | |
| if echo "$test_response" | jq -e '.success == true' >/dev/null 2>&1; then | |
| echo "✓ Datastore write access working" | |
| # Extract resource_id for cleanup | |
| test_resource_id=$(echo "$test_response" | jq -r '.result.resource_id') | |
| # Test 3: Query the test table | |
| echo "=== Testing datastore query ===" | |
| query_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$test_resource_id") | |
| echo "Query response: $query_response" | |
| # Cleanup: Delete test table | |
| echo "=== Cleaning up test table ===" | |
| curl -s -X POST \ | |
| -H "Content-Type: application/json" \ | |
| -H "Authorization: $CKAN_API_KEY" \ | |
| -d "{\"resource_id\": \"$test_resource_id\"}" \ | |
| "http://localhost:5000/api/3/action/datastore_delete" >/dev/null | |
| echo "✓ Datastore functionality test completed successfully" | |
| else | |
| echo "✗ Datastore write access failed" | |
| fi | |
| - name: Start CKAN background job worker | |
| run: | | |
| set -eu | |
| echo "Starting CKAN background job worker (CRITICAL for DataPusher Plus)..." | |
| nohup ckan -c /srv/app/src/ckan/test-core.ini jobs worker > /tmp/ckan_worker.log 2>&1 & | |
| WORKER_PID=$! | |
| echo "CKAN Worker PID=$WORKER_PID" | |
| echo "CKAN_WORKER_PID=$WORKER_PID" >> $GITHUB_ENV | |
| # Give worker a moment to start up | |
| sleep 5 | |
| # Verify worker is running | |
| if kill -0 "$WORKER_PID" >/dev/null 2>&1; then | |
| echo "Background job worker started successfully" | |
| echo "Worker logs:" | |
| head -n 20 /tmp/ckan_worker.log || echo "No worker logs yet" | |
| else | |
| echo "Worker failed to start" | |
| cat /tmp/ckan_worker.log | |
| exit 1 | |
| fi | |
| - name: Test DataPusher Plus functionality - Remote Files (CSV Input) | |
| run: | | |
| set -eu | |
| echo "=== Testing DataPusher Plus Functionality - Remote Files from CSV ===" | |
| # Initialize results tracking | |
| echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv | |
| # Initialize skipped files tracking | |
| echo "file_name,reason_skipped" > /tmp/skipped_files.csv | |
| # Set path for CSV input file | |
| CSV_INPUT_FILE="${GITHUB_WORKSPACE}/tests/$FILES_DIR/base_files.csv" | |
| # Check if CSV input file exists | |
| if [ ! -f "$CSV_INPUT_FILE" ]; then | |
| echo "ERROR: CSV input file not found: $CSV_INPUT_FILE" | |
| echo "Please ensure the tests/$FILES_DIR/base_files.csv file exists in your repository" | |
| echo "Expected CSV format: file_name,file_url,file_format,file_mimetype,file_description" | |
| exit 1 | |
| fi | |
| echo "Using CSV input file: $CSV_INPUT_FILE" | |
| echo "CSV file size: $(du -h "$CSV_INPUT_FILE" | cut -f1)" | |
| echo "" | |
| # Validate CSV structure | |
| echo "Validating CSV structure..." | |
| header=$(head -n 1 "$CSV_INPUT_FILE") | |
| echo "CSV Header: $header" | |
| # Check if header contains required columns | |
| if ! echo "$header" | grep -qi "file_url"; then | |
| echo "ERROR: CSV must contain 'file_url' column" | |
| echo "Expected format: file_name,file_url,file_format,file_mimetype,file_description" | |
| exit 1 | |
| fi | |
| # Count total entries in CSV | |
| total_entries=$(tail -n +2 "$CSV_INPUT_FILE" | grep -v '^[[:space:]]*$' | wc -l) | |
| echo "Total entries in CSV: $total_entries" | |
| echo "" | |
| # Display first few entries for verification | |
| echo "First 5 entries from CSV:" | |
| head -n 6 "$CSV_INPUT_FILE" | |
| echo "" | |
| # Create test dataset once | |
| echo "Creating test dataset for DataPusher Plus..." | |
| if ckanapi action package_create \ | |
| name=datapusher-plus-test-remote \ | |
| title="DataPusher Plus Remote Files Test Dataset" \ | |
| owner_org=demo-organization \ | |
| -c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then | |
| echo "Test dataset created" | |
| else | |
| echo "Test dataset might already exist, continuing..." | |
| fi | |
| # Initialize counters | |
| total_files=0 | |
| passed_files=0 | |
| failed_files=0 | |
| skipped_files=0 | |
| # Process each line from CSV (skip header) | |
| tail -n +2 "$CSV_INPUT_FILE" | while IFS=',' read -r file_name file_url file_format file_mimetype file_desc || [ -n "$file_name" ]; do | |
| # Skip empty lines and comments | |
| [ -z "$file_name" ] && continue | |
| case "$file_name" in | |
| '#'*) continue ;; | |
| ''|*[[:space:]]*) | |
| # Skip lines with only whitespace | |
| [ -z "$(echo "$file_name" | tr -d '[:space:]')" ] && continue | |
| ;; | |
| esac | |
| # Trim whitespace from all fields | |
| file_name=$(echo "$file_name" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') | |
| file_url=$(echo "$file_url" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr -d '"') | |
| file_format=$(echo "$file_format" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') | |
| file_mimetype=$(echo "$file_mimetype" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') | |
| file_desc=$(echo "$file_desc" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') | |
| # Validate required fields | |
| if [ -z "$file_url" ]; then | |
| echo "SKIP: Missing URL for file: $file_name" | |
| echo "$file_name,Missing file_url in CSV" >> /tmp/skipped_files.csv | |
| skipped_files=$((skipped_files + 1)) | |
| continue | |
| fi | |
| # Set defaults if fields are empty | |
| [ -z "$file_name" ] && file_name=$(basename "$file_url") | |
| [ -z "$file_format" ] && file_format="UNKNOWN" | |
| [ -z "$file_mimetype" ] && file_mimetype="application/octet-stream" | |
| [ -z "$file_desc" ] && file_desc="Remote file: $file_name" | |
| # Test if URL is accessible | |
| echo "Testing accessibility of: $file_url" | |
| if ! curl -s --head --max-time 10 "$file_url" > /dev/null 2>&1; then | |
| echo "SKIP: File not accessible via HTTP: $file_url" | |
| echo "$file_name,File not accessible or timed out" >> /tmp/skipped_files.csv | |
| skipped_files=$((skipped_files + 1)) | |
| continue | |
| fi | |
| total_files=$((total_files + 1)) | |
| echo "" | |
| echo "==========================================" | |
| echo "Testing File #${total_files}: $file_name" | |
| echo "URL: $file_url" | |
| echo "Format: $file_format" | |
| echo "Description: $file_desc" | |
| # Try to get file size | |
| file_size=$(curl -sI "$file_url" | grep -i content-length | cut -d' ' -f2 | tr -d '\r' || echo "unknown") | |
| echo "File size: $file_size bytes" | |
| echo "==========================================" | |
| # Initialize tracking variables for this file | |
| start_time=$(date +%s) | |
| upload_status="FAILED" | |
| resource_id="" | |
| datapusher_status="N/A" | |
| datastore_active="false" | |
| rows_imported="0" | |
| error_message="" | |
| # Create resource with URL for this test file | |
| echo "Creating resource with URL for $file_name..." | |
| if resource_response=$(ckanapi action resource_create \ | |
| package_id=datapusher-plus-test-remote \ | |
| url="$file_url" \ | |
| name="Remote Test: $file_name" \ | |
| description="$file_desc" \ | |
| format="$file_format" \ | |
| mimetype="$file_mimetype" \ | |
| -c /srv/app/src/ckan/test-core.ini 2>&1); then | |
| echo "Resource created successfully for $file_name" | |
| upload_status="SUCCESS" | |
| # Extract resource ID | |
| resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$resource_id" ]; then | |
| resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p') | |
| fi | |
| echo "Resource ID: $resource_id" | |
| if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then | |
| # Monitor DataPusher Plus processing | |
| echo "Monitoring DataPusher Plus processing for $file_name..." | |
| max_attempts=90 # 3 minutes max per file | |
| for attempt in $(seq 1 $max_attempts); do | |
| sleep 2 | |
| # Check DataPusher status | |
| if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \ | |
| "http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then | |
| if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then | |
| datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') | |
| if [ -z "$datapusher_status" ]; then | |
| datapusher_status="unknown" | |
| fi | |
| # Clean up status string | |
| datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10) | |
| echo " Attempt $attempt/$max_attempts: DataPusher status = $datapusher_status" | |
| if [ "$datapusher_status" = "complete" ]; then | |
| echo " ✓ DataPusher processing completed for $file_name!" | |
| break | |
| elif [ "$datapusher_status" = "error" ]; then | |
| error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1) | |
| if [ -z "$error_info" ]; then | |
| error_info="DataPusher processing error" | |
| fi | |
| error_message="DataPusher error: $error_info" | |
| echo " ✗ DataPusher processing failed for $file_name: $error_message" | |
| break | |
| fi | |
| else | |
| # API returned success=false | |
| if [ $attempt -eq $max_attempts ]; then | |
| error_message="DataPusher status API returned success=false" | |
| echo " ✗ DataPusher status API error for $file_name" | |
| fi | |
| fi | |
| else | |
| # Curl failed | |
| if [ $attempt -eq $max_attempts ]; then | |
| error_message="Failed to get DataPusher status" | |
| echo " ✗ Cannot reach DataPusher status API for $file_name" | |
| fi | |
| fi | |
| # Progress indicator | |
| if [ $((attempt % 15)) -eq 0 ]; then | |
| echo " Still processing $file_name... (${attempt}/${max_attempts})" | |
| fi | |
| done | |
| # Check final resource status | |
| echo "Checking final status for $file_name..." | |
| if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then | |
| if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then | |
| datastore_active="true" | |
| echo " ✓ DataStore activated for $file_name" | |
| # Get row count | |
| if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then | |
| rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/') | |
| if [ -z "$rows_imported" ]; then | |
| rows_imported="0" | |
| fi | |
| echo " ✓ Rows imported for $file_name: $rows_imported" | |
| fi | |
| else | |
| datastore_active="false" | |
| echo " ✗ DataStore not activated for $file_name" | |
| fi | |
| else | |
| echo " ✗ Cannot check final resource status for $file_name" | |
| fi | |
| else | |
| error_message="No valid resource ID extracted for $file_name" | |
| echo " ✗ $error_message" | |
| fi | |
| else | |
| echo " ✗ Resource creation failed for $file_name" | |
| error_message="Resource creation failed: $(echo "$resource_response" | head -1)" | |
| fi | |
| # Calculate processing time | |
| end_time=$(date +%s) | |
| processing_time=$((end_time - start_time)) | |
| # Log results for this file | |
| timestamp=$(date '+%Y-%m-%d %H:%M:%S') | |
| echo "$timestamp,$file_name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv | |
| # Update counters | |
| if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then | |
| passed_files=$((passed_files + 1)) | |
| echo " 🎉 PASS: $file_name processed successfully" | |
| else | |
| failed_files=$((failed_files + 1)) | |
| echo " ❌ FAIL: $file_name had issues" | |
| fi | |
| echo " Processing time: ${processing_time}s" | |
| # Brief pause between files to avoid overwhelming the system | |
| echo " Waiting 3 seconds before next file..." | |
| sleep 3 | |
| done | |
| # Count skipped files from CSV | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| skipped_count=$(tail -n +2 /tmp/skipped_files.csv | wc -l) | |
| skipped_files=$skipped_count | |
| fi | |
| echo "" | |
| echo "==========================================" | |
| echo "=== FINAL TEST RESULTS SUMMARY ===" | |
| echo "==========================================" | |
| echo "Total files in CSV: $total_entries" | |
| echo "Files tested: $total_files" | |
| echo "Files skipped: $skipped_files" | |
| echo "Passed: $passed_files" | |
| echo "Failed: $failed_files" | |
| if [ $total_files -gt 0 ]; then | |
| echo "Success rate (of tested files): $(( passed_files * 100 / total_files ))%" | |
| else | |
| echo "No files were tested" | |
| fi | |
| echo "" | |
| echo "=== Detailed Results ===" | |
| echo "Results saved to: /tmp/test_results.csv" | |
| cat /tmp/test_results.csv | |
| echo "" | |
| if [ $skipped_files -gt 0 ]; then | |
| echo "=== Skipped Files ===" | |
| echo "Skipped files saved to: /tmp/skipped_files.csv" | |
| cat /tmp/skipped_files.csv | |
| echo "" | |
| fi | |
| # Determine overall result | |
| if [ $total_files -eq 0 ] && [ $skipped_files -gt 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: NO TESTABLE FILES" | |
| echo "All files in CSV were skipped - check URLs and accessibility" | |
| elif [ $total_files -eq 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: NO FILES TESTED" | |
| echo "No valid entries found in CSV file" | |
| elif [ $failed_files -eq 0 ] && [ $passed_files -gt 0 ]; then | |
| echo "" | |
| echo "🎉 OVERALL RESULT: ALL TESTED FILES PASSED" | |
| echo "DataPusher Plus is working correctly with all testable remote files" | |
| elif [ $passed_files -gt 0 ]; then | |
| echo "" | |
| echo "⚠ OVERALL RESULT: PARTIAL SUCCESS" | |
| echo "DataPusher Plus works with some remote files but has issues with others" | |
| else | |
| echo "" | |
| echo "❌ OVERALL RESULT: ALL TESTED FILES FAILED" | |
| echo "DataPusher Plus is not working correctly with remote files" | |
| fi | |
| echo "" | |
| echo "Test completed at: $(date)" | |
| - name: Generate Combined Test Results and Worker Analysis | |
| if: always() | |
| run: | | |
| set -eu | |
| echo "=== Generating Combined Test Results and Worker Analysis ===" | |
| # First, process worker logs if they exist | |
| echo "=== Processing DataPusher Plus Worker Logs ===" | |
| # Check if worker log exists | |
| if [ ! -f /tmp/ckan_worker.log ]; then | |
| echo "No worker log file found at /tmp/ckan_worker.log" | |
| # Create comprehensive header structure with all new fields including enhanced analytics | |
| echo "timestamp,job_id,file_name,status,qsv_version,file_format,encoding,normalized,valid_csv,sorted,db_safe_headers,analysis,records,total_time,download_time,analysis_time,copying_time,indexing_time,formulae_time,metadata_time,rows_copied,columns_indexed,error_type,error_message,data_quality_score,processing_efficiency" > /tmp/worker_analysis.csv | |
| else | |
| echo "Worker log file size: $(du -h /tmp/ckan_worker.log | cut -f1)" | |
| echo "Running enhanced Python log analyzer..." | |
| # Run the Python script to analyze logs | |
| python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py analyze /tmp/ckan_worker.log /tmp/worker_analysis.csv | |
| fi | |
| # Now check if both results files exist | |
| if [ ! -f /tmp/test_results.csv ] && [ ! -f /tmp/worker_analysis.csv ]; then | |
| echo "No test results or worker analysis files found" | |
| echo "# DataPusher Plus Test Results" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "No test data available to analyze" >> $GITHUB_STEP_SUMMARY | |
| exit 0 | |
| fi | |
| # Initialize counters for test results | |
| total_tests=0 | |
| skipped_count=0 | |
| total_files_in_dir=0 | |
| passed=0 | |
| failed=0 | |
| error_count=0 | |
| tested_success_rate=0 | |
| overall_success_rate=0 | |
| # Process test results if available | |
| if [ -f /tmp/test_results.csv ]; then | |
| # Count total results | |
| total_lines=$(wc -l < /tmp/test_results.csv) | |
| total_tests=$((total_lines - 1)) # Subtract header line | |
| # Count skipped files | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| skipped_lines=$(wc -l < /tmp/skipped_files.csv) | |
| skipped_count=$((skipped_lines - 1)) # Subtract header line | |
| fi | |
| total_files_in_dir=$((total_tests + skipped_count)) | |
| if [ $total_tests -gt 0 ]; then | |
| # Count results by status | |
| passed=$(grep -c ",SUCCESS,.*,complete,true," /tmp/test_results.csv 2>/dev/null || echo "0") | |
| failed=$(tail -n +2 /tmp/test_results.csv | grep -v ",SUCCESS,.*,complete,true," | wc -l) | |
| error_count=$(grep -c ",error," /tmp/test_results.csv 2>/dev/null || echo "0") | |
| error_count=$(echo "$error_count" | tr -d '\n') | |
| # Calculate success rates | |
| tested_success_rate=$(( passed * 100 / total_tests )) | |
| if [ $total_files_in_dir -gt 0 ]; then | |
| overall_success_rate=$(( passed * 100 / total_files_in_dir )) | |
| fi | |
| fi | |
| fi | |
| # Check if worker analysis is available | |
| worker_analysis_available=false | |
| if [ -f /tmp/worker_analysis.csv ]; then | |
| worker_analysis_available=true | |
| fi | |
| # Start building the combined summary | |
| { | |
| echo "# DataPusher Plus Test Results" | |
| echo "" | |
| echo "## Summary" | |
| echo "" | |
| echo "| Metric | Value |" | |
| echo "|--------|-------|" | |
| echo "| Total Files in Directory | $total_files_in_dir |" | |
| echo "| Files Tested | $total_tests |" | |
| echo "| Files Skipped | $skipped_count |" | |
| echo "| Passed | $passed |" | |
| echo "| Failed | $failed |" | |
| echo "| Errors | $error_count |" | |
| echo "| Success Rate (Tested Files) | ${tested_success_rate}% |" | |
| echo "| Success Rate (All Files) | ${overall_success_rate}% |" | |
| echo "" | |
| # Show skipped files section if any exist | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "## Skipped Files" | |
| echo "" | |
| echo "| File Name | Reason Skipped |" | |
| echo "|-----------|----------------|" | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| tail -n +2 /tmp/skipped_files.csv | while IFS=',' read -r file_name reason; do | |
| echo "| $file_name | $reason |" | |
| done | |
| fi | |
| echo "" | |
| fi | |
| # Show worker analysis table if available | |
| if [ "$worker_analysis_available" = true ]; then | |
| total_jobs=$(tail -n +2 /tmp/worker_analysis.csv | wc -l) | |
| if [ $total_jobs -gt 0 ]; then | |
| echo "## Complete Job Analysis" | |
| echo "" | |
| echo "| # | File Name | Status | Records | Columns | Time (s) | Valid CSV | Headers Safe | Error Type | Quality Score |" | |
| echo "|---|-----------|--------|---------|---------|----------|-----------|--------------|------------|---------------|" | |
| counter=1 | |
| tail -n +2 /tmp/worker_analysis.csv | while IFS=',' read timestamp job_id file_name status qsv_version file_format encoding normalized valid_csv sorted db_safe_headers analysis records total_time download_time analysis_time copying_time indexing_time formulae_time metadata_time rows_copied columns_indexed error_type error_message data_quality_score processing_efficiency; do | |
| # Don't truncate values - allow full content with horizontal scroll | |
| full_file_name=$(echo "$file_name" | sed 's/\.\.\.//') | |
| full_error_type="$error_type" | |
| full_headers="$db_safe_headers" | |
| # Handle empty values | |
| [ -z "$records" ] && records="0" | |
| [ -z "$columns_indexed" ] && columns_indexed="0" | |
| [ -z "$total_time" ] && total_time="0" | |
| [ -z "$data_quality_score" ] && data_quality_score="-" | |
| [ -z "$full_error_type" ] && full_error_type="-" | |
| # Add status emoji | |
| case "$status" in | |
| "SUCCESS") status_display="✅ SUCCESS" ;; | |
| "ERROR") status_display="❌ ERROR" ;; | |
| "INCOMPLETE") status_display="⏸️ INCOMPLETE" ;; | |
| *) status_display="❓ $status" ;; | |
| esac | |
| echo "| $counter | $full_file_name | $status_display | $records | $columns_indexed | $total_time | $valid_csv | $full_headers | $full_error_type | $data_quality_score |" | |
| counter=$((counter + 1)) | |
| done | |
| echo "" | |
| # Add worker analysis sections | |
| success_jobs=$(grep -c ",SUCCESS," /tmp/worker_analysis.csv || echo "0") | |
| error_jobs=$(grep -c ",ERROR," /tmp/worker_analysis.csv || echo "0") | |
| # File Analysis | |
| echo "## File Analysis" | |
| echo "" | |
| if [ $success_jobs -gt 0 ]; then | |
| # File formats processed | |
| echo "### File Formats Processed" | |
| echo "" | |
| formats=$(tail -n +2 /tmp/worker_analysis.csv | grep ",SUCCESS," | cut -d',' -f6 | sort | uniq -c) | |
| if [ -n "$formats" ]; then | |
| echo "| Format | Files | Percentage |" | |
| echo "|--------|-------|------------|" | |
| echo "$formats" | while read count format; do | |
| percentage=$((count * 100 / success_jobs)) | |
| # Add format icon | |
| case "$format" in | |
| "CSV") format_icon="📊" ;; | |
| "XLSX"|"XLS") format_icon="📈" ;; | |
| "JSON") format_icon="🔧" ;; | |
| "TXT") format_icon="📝" ;; | |
| *) format_icon="📄" ;; | |
| esac | |
| echo "| $format_icon $format | $count | $percentage% |" | |
| done | |
| else | |
| echo "❌ No format data available" | |
| fi | |
| echo "" | |
| # Encoding types | |
| echo "### Encoding Distribution" | |
| echo "" | |
| encodings=$(tail -n +2 /tmp/worker_analysis.csv | grep ",SUCCESS," | cut -d',' -f7 | sort | uniq -c) | |
| if [ -n "$encodings" ]; then | |
| echo "| Encoding | Files | Status |" | |
| echo "|----------|-------|--------|" | |
| echo "$encodings" | while read count encoding; do | |
| if [ -n "$encoding" ]; then | |
| if [ "$encoding" = "UTF-8" ] || [ "$encoding" = "UTF" ]; then | |
| status_icon="✅" | |
| else | |
| status_icon="⚠️" | |
| fi | |
| echo "| $status_icon $encoding | $count | Compatible |" | |
| else | |
| echo "| ❓ Unknown | $count | Needs Review |" | |
| fi | |
| done | |
| else | |
| echo "❌ No encoding data available" | |
| fi | |
| echo "" | |
| fi | |
| # Error Analysis | |
| echo "## Error Analysis" | |
| echo "" | |
| if [ $error_jobs -gt 0 ]; then | |
| echo "### Failed Files Details" | |
| echo "" | |
| echo "" | |
| echo "| File | Error Type | Error Message |" | |
| echo "|------|------------|---------------|" | |
| tail -n +2 /tmp/worker_analysis.csv | grep ",ERROR," | cut -d',' -f3,23,24 | while IFS=',' read file error_type error_msg; do | |
| clean_error=$(echo "$error_msg" | sed 's/^"//;s/"$//') | |
| clean_file=$(echo "$file" | sed 's/\.\.\.//') | |
| echo "| $clean_file | $error_type | $clean_error |" | |
| done | |
| echo "" | |
| else | |
| echo "✅ **No errors found in worker logs** - All processed jobs completed successfully!" | |
| echo "" | |
| fi | |
| # Performance Anomalies | |
| echo "## Performance Anomalies" | |
| echo "" | |
| anomalies_output=$(python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py anomalies /tmp/worker_analysis.csv 2>/dev/null || echo "") | |
| if [ -z "$anomalies_output" ]; then | |
| echo "✅ **No performance anomalies detected** - All jobs processed within expected timeframes" | |
| else | |
| echo "⚠️ **Performance issues detected:**" | |
| echo "" | |
| echo "$anomalies_output" | sed 's/ANOMALY: /🐌 **Slow Processing**: /' | |
| fi | |
| echo "" | |
| fi | |
| fi | |
| # Add comprehensive analysis based on results (keep original logic) | |
| if [ $total_tests -eq 0 ] && [ $skipped_count -gt 0 ]; then | |
| echo "## No Testable Files ⚠️" | |
| echo "" | |
| echo "All files in the test directory were skipped." | |
| echo "" | |
| echo "**Common reasons for skipped files:**" | |
| echo "- Unsupported file formats (only .csv, .tsv, .xlsx, .json, .geojson, .txt supported)" | |
| echo "- Files not accessible via HTTP server" | |
| echo "- Hidden files or system files" | |
| echo "" | |
| echo "**Recommendation:** Add supported data files to test directory." | |
| elif [ $total_tests -eq 0 ]; then | |
| echo "## No Files Found ❌" | |
| echo "" | |
| echo "No files found in test directory to test." | |
| elif [ $passed -eq $total_tests ]; then | |
| echo "## All Tested Files Passed! 🎉" | |
| echo "" | |
| echo "DataPusher Plus is working correctly with all testable files." | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "" | |
| echo "**Note:** $skipped_count file(s) were skipped. See the Skipped Files section above for details." | |
| fi | |
| elif [ $passed -gt 0 ]; then | |
| echo "## Result: Partial Success" | |
| echo "" | |
| echo "DataPusher Plus works with some files but has issues with others." | |
| echo "" | |
| else | |
| echo "## Result: All Tested Files Failed ❌" | |
| echo "" | |
| echo "DataPusher Plus is not working correctly with any tested files." | |
| echo "" | |
| echo "### All Failed Files:" | |
| if [ -f /tmp/test_results.csv ]; then | |
| tail -n +2 /tmp/test_results.csv | while IFS=',' read -r timestamp file_name upload_status resource_id datapusher_status datastore_active rows_imported processing_time error_message; do | |
| clean_error=$(echo "$error_message" | sed 's/^"//;s/"$//') | |
| echo "- **$file_name**: $clean_error" | |
| done | |
| fi | |
| if [ $skipped_count -gt 0 ]; then | |
| echo "" | |
| echo "### Files Not Even Attempted:" | |
| if [ -f /tmp/skipped_files.csv ]; then | |
| tail -n +2 /tmp/skipped_files.csv | while IFS=',' read -r file_name reason; do | |
| echo "- **$file_name**: $reason" | |
| done | |
| fi | |
| fi | |
| fi | |
| echo "" | |
| echo "---" | |
| echo "" | |
| echo "**Analysis completed:** $(date '+%A, %B %d, %Y at %I:%M %p %Z')" | |
| } > /tmp/combined_summary.md | |
| # Write to GitHub Actions step summary | |
| cat /tmp/combined_summary.md >> $GITHUB_STEP_SUMMARY | |
| echo "Combined analysis summary generated and added to workflow summary" | |
| echo "" | |
| echo "Preview of generated summary:" | |
| echo "==================================" | |
| cat /tmp/combined_summary.md | |
| - name: Upload test results as artifact | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: datapusher-plus-test-results | |
| path: | | |
| /tmp/test_results.csv | |
| /tmp/ckan_stdout.log | |
| /tmp/ckan_worker.log | |
| /tmp/worker_analysis.csv | |
| retention-days: 3 | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| echo "Stopping any running CKAN processes..." | |
| pkill -f "ckan.*run" | |
| echo "Cleanup completed" |