Automated DataPusher+ Testing Run #2

Workflow file for this run

	name: Automated DataPusher+ Testing Run
	on:
	workflow_dispatch:
	env:
	FILES_DIR: "custom"
	DATAPUSHER_BRANCH: "main"
	CKAN_VERSION: "2.11"
	POSTGRES_PASSWORD: postgres
	CKAN_DB_PASSWORD: pass
	CKAN_SITE_URL: http://localhost:5000
	CKAN_SITE_ID: default
	CKAN_SITE_TITLE: "CKAN Test Instance"
	QSV_VER : "7.1.0"
	jobs:
	setup:
	runs-on: ubuntu-latest
	container:
	image: ckan/ckan-dev:2.11
	options: --user root
	services:
	solr:
	image: ckan/ckan-solr:2.11-solr9
	ports: ["8983:8983"]
	postgres:
	image: ckan/ckan-postgres-dev:2.11
	env:
	POSTGRES_USER: postgres
	POSTGRES_PASSWORD: postgres
	POSTGRES_DB: postgres
	options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
	redis:
	image: redis:3
	ports: ["6379:6379"]
	# Job-specific environment (these will be available inside the container)
	env:
	CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test
	CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test
	CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test
	CKAN_SOLR_URL: http://solr:8983/solr/ckan
	CKAN_REDIS_URL: redis://redis:6379/1
	CKAN_SITE_URL: http://localhost:5000
	steps:
	- name: Fix permissions and install essential tools
	run: \|
	mkdir -p /__w/_temp
	chmod -R 777 /__w/_temp
	chmod -R 777 /__w/
	apt-get update -y
	apt-get install -y curl wget net-tools procps postgresql-client jq
	echo "Essential tools installed successfully"
	- uses: actions/checkout@v4
	- name: Wait for PostgreSQL to be ready
	run: \|
	echo "Waiting for PostgreSQL to be ready..."
	timeout=90
	while [ $timeout -gt 0 ]; do
	if PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "SELECT 1;" >/dev/null 2>&1; then
	echo "PostgreSQL is ready!"
	break
	fi
	echo "Postgres not ready yet ($timeout s left)..."
	sleep 3
	timeout=$((timeout-3))
	done
	if [ $timeout -le 0 ]; then
	echo "Timeout waiting for PostgreSQL"
	exit 1
	fi
	- name: Setup database users and permissions
	run: \|
	set -eu
	echo "Creating database users (if not exist)..."
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='ckan_default'" \| grep -q 1 \|\| \
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_write'" \| grep -q 1 \|\| \
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_write WITH PASSWORD '$CKAN_DB_PASSWORD';"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_read'" \| grep -q 1 \|\| \
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_read WITH PASSWORD '$CKAN_DB_PASSWORD';"
	echo "Creating databases (if not exist)..."
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='ckan_test'" \| grep -q 1 \|\| \
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE ckan_test OWNER ckan_default;"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='datastore_test'" \| grep -q 1 \|\| \
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE datastore_test OWNER ckan_default;"
	echo "Granting permissions (best-effort)..."
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE ckan_test TO ckan_default;"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE datastore_test TO datastore_write;"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT CONNECT ON DATABASE datastore_test TO datastore_read;"
	echo "Database setup completed"
	- name: Install requirements, ckanapi and datapusher-plus
	run: \|
	set -eu
	# Use pip from the container (image usually has Python/pip)
	python3 -m pip install --upgrade pip setuptools wheel
	if [ -f requirements.txt ]; then
	pip install -r requirements.txt
	fi
	if [ -f requirements-dev.txt ]; then
	pip install -r requirements-dev.txt
	fi
	# install current repo editable if present
	if [ -f setup.py ] \|\| [ -f pyproject.toml ]; then
	pip install -e .
	fi
	# Ensure ckanapi and datapusher-plus are available
	pip install --upgrade ckanapi
	pip install datasize
	apt install -y python3-virtualenv python3-dev python3-pip python3-wheel build-essential libxslt1-dev libxml2-dev zlib1g-dev git libffi-dev libpq-dev uchardet unzip
	# Install datapusher-plus package (the pip package name is typically datapusher-plus)
	echo "Installing datapusher-plus from branch: $DATAPUSHER_BRANCH"
	pip install -e "git+https://github.com/dathere/datapusher-plus.git@$DATAPUSHER_BRANCH#egg=datapusher-plus"
	pip install -e 'git+https://github.com/ckan/ckanext-scheming.git#egg=ckanext-scheming'
	echo "Installed ckanapi and datapusher-plus (best-effort)"
	- name: Install qsv (musl static)
	run: \|
	set -eu
	echo "Attempting to download static qsv musl binary (best-effort)..."
	QSV_ZIP="qsv-${QSV_VER}-x86_64-unknown-linux-musl.zip"
	QSV_URL="https://github.com/dathere/qsv/releases/download/${QSV_VER}/${QSV_ZIP}"
	mkdir -p /tmp/qsv && cd /tmp/qsv
	if wget -q --spider "$QSV_URL"; then
	wget -q "$QSV_URL" -O "$QSV_ZIP"
	unzip -o "$QSV_ZIP"
	# try to find 'qsv' or 'qsvdp' binary
	if [ -f qsvdp ]; then
	mv qsvdp /usr/local/bin/qsvdp
	chmod +x /usr/local/bin/qsvdp
	echo "Installed qsvdp to /usr/local/bin/qsvdp"
	elif [ -f qsv ]; then
	mv qsv /usr/local/bin/qsv
	chmod +x /usr/local/bin/qsv
	echo "Installed qsv to /usr/local/bin/qsv"
	else
	echo "Downloaded archive but could not find qsv binary inside"
	fi
	else
	echo "qsv release URL not reachable; skipping qsv install"
	fi
	/usr/local/bin/qsvdp --version >/dev/null 2>&1 \|\| /usr/local/bin/qsv --version >/dev/null 2>&1 \|\| echo "qsv not installed or not runnable (this is okay for plugin presence test)."

	- name: Setup CKAN configuration (/srv/app/src/ckan/test-core.ini)
	run: \|
	set -eu
	# Defensive URL substitutions (keep these)
	sed -i "s\|^sqlalchemy.url.\|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL:-**postgres/ckan_test}\|g" /srv/app/src/ckan/test-core.ini
	sed -i "s\|^ckan.datastore.write_url.\|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL:-**postgres/datastore_test}\|g" /srv/app/src/ckan/test-core.ini
	sed -i "s\|^ckan.datastore.read_url.\|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL:-**postgres/datastore_test}\|g" /srv/app/src/ckan/test-core.ini
	if ! grep -q "^solr_url" /srv/app/src/ckan/test-core.ini; then
	echo "solr_url = ${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" >> /srv/app/src/ckan/test-core.ini
	fi
	if ! grep -q "^ckan.redis.url" /srv/app/src/ckan/test-core.ini; then
	echo "ckan.redis.url = ${CKAN_REDIS_URL:-redis://redis:6379/1}" >> /srv/app/src/ckan/test-core.ini
	fi
	# Desired values (use env vars when present, otherwise fall back)
	CKAN_SITE_URL="${CKAN_SITE_URL:-http://localhost:5000}"
	CKAN_SQLALCHEMY_URL="${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}"
	CKAN_DATASTORE_WRITE_URL="${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}"
	CKAN_DATASTORE_READ_URL="${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}"
	CKAN_SOLR_URL="${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}"
	CKAN_REDIS_URL="${CKAN_REDIS_URL:-redis://redis:6379/1}"
	# create temp files to hold lists (POSIX sh-safe)
	REPLACE_FILE="$(mktemp)"
	ADD_FILE="$(mktemp)"
	MISSING_ADD_FILE="$(mktemp)"
	: > "$REPLACE_FILE"
	: > "$ADD_FILE"
	: > "$MISSING_ADD_FILE"
	# REPLACE_ENTRIES (key\|value) - write expanded lines to REPLACE_FILE
	printf '%s\n' \
	"ckan.site_url\|${CKAN_SITE_URL}" \
	"sqlalchemy.url\|${CKAN_SQLALCHEMY_URL}" \
	"ckan.datastore.write_url\|${CKAN_DATASTORE_WRITE_URL}" \
	"ckan.datastore.read_url\|${CKAN_DATASTORE_READ_URL}" \
	"solr_url\|${CKAN_SOLR_URL}" \
	"ckan.redis.url\|${CKAN_REDIS_URL}" \
	> "$REPLACE_FILE"
	# ADD_LINES content (one entry per line). Comments start with '#'
	cat > "$ADD_FILE" <<'EOF'
	ckan.site_id = default
	ckan.site_title = CKAN Test
	ckan.auth.create_default_api_keys = true
	ckanext.datapusher_plus.qsv_bin = /usr/local/bin/qsvdp
	scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml
	scheming.presets = ckanext.scheming:presets.json
	scheming.dataset_fallback = false
	ckanext.datapusher_plus.use_proxy = false
	ckanext.datapusher_plus.download_proxy =
	ckanext.datapusher_plus.ssl_verify = false
	# supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas
	ckanext.datapusher_plus.upload_log_level = INFO
	ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip
	ckanext.datapusher_plus.pii_screening = false
	ckanext.datapusher_plus.pii_found_abort = false
	ckanext.datapusher_plus.pii_regex_resource_id_or_alias =
	ckanext.datapusher_plus.pii_show_candidates = false
	ckanext.datapusher_plus.pii_quick_screen = false
	ckanext.datapusher_plus.preview_rows = 100
	ckanext.datapusher_plus.download_timeout = 300
	ckanext.datapusher_plus.max_content_length = 1256000000000
	ckanext.datapusher_plus.chunk_size = 16384
	ckanext.datapusher_plus.default_excel_sheet = 0
	ckanext.datapusher_plus.sort_and_dupe_check = true
	ckanext.datapusher_plus.dedup = false
	ckanext.datapusher_plus.unsafe_prefix = unsafe_
	ckanext.datapusher_plus.reserved_colnames = _id
	ckanext.datapusher_plus.prefer_dmy = false
	ckanext.datapusher_plus.ignore_file_hash = true
	ckanext.datapusher_plus.auto_index_threshold = 3
	ckanext.datapusher_plus.auto_index_dates = true
	ckanext.datapusher_plus.auto_unique_index = true
	ckanext.datapusher_plus.summary_stats_options =
	ckanext.datapusher_plus.add_summary_stats_resource = false
	ckanext.datapusher_plus.summary_stats_with_preview = false
	ckanext.datapusher_plus.qsv_stats_string_max_length = 32767
	ckanext.datapusher_plus.qsv_dates_whitelist = date,time,due,open,close,created
	ckanext.datapusher_plus.qsv_freq_limit = 10
	ckanext.datapusher_plus.auto_alias = true
	ckanext.datapusher_plus.auto_alias_unique = false
	ckanext.datapusher_plus.copy_readbuffer_size = 1048576
	ckanext.datapusher_plus.type_mapping = {"String": "text", "Integer": "numeric","Float": "numeric","DateTime": "timestamp","Date": "date","NULL": "text"}
	ckanext.datapusher_plus.auto_spatial_simplication = true
	ckanext.datapusher_plus.spatial_simplication_relative_tolerance = 0.1
	ckanext.datapusher_plus.latitude_fields = latitude,lat
	ckanext.datapusher_plus.longitude_fields = longitude,long,lon
	ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache
	ckanext.datapusher_plus.auto_unzip_one_file = true
	EOF
	if [ -f /srv/app/src/ckan/test-core.ini ]; then
	echo "Patching selective keys in /srv/app/src/ckan/test-core.ini (only the keys you listed)..."
	# Ensure single debug = true under [DEFAULT]: remove existing debug lines in DEFAULT then add one
	awk 'BEGIN{in=0}
	/^\[DEFAULT\]/{ print; in=1; next }
	/^\[.*\]/{ if(in){ print "debug = true"; in=0 } }
	{
	if(in){
	if($1 == "debug") next
	print
	} else {
	print
	}
	}
	END { if(in) print "debug = true" }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.tmp && mv /srv/app/src/ckan/test-core.ini.tmp /srv/app/src/ckan/test-core.ini
	# Process REPLACE_FILE: replace if present, otherwise write to missing file
	while IFS= read -r entry \|\| [ -n "$entry" ]; do
	key="$(printf '%s' "$entry" \| cut -d'\|' -f1)"
	value="$(printf '%s' "$entry" \| cut -d'\|' -f2-)"
	# escape backslashes and ampersands for sed replacement
	esc_value="$(printf '%s' "$value" \| sed -e 's/[\/&]/\\&/g')"
	if grep -q -E "^[[:space:]]$(printf '%s' "$key" \| sed 's/[][^$./]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
	sed -i -E "s\|^[[:space:]]$(printf '%s' "$key" \| sed 's/[][^$./]/\\&/g')[[:space:]]=.\|${key} = ${esc_value}\|g" /srv/app/src/ckan/test-core.ini
	else
	printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
	fi
	done < "$REPLACE_FILE"
	# Process ADD_FILE: replace if present, otherwise collect to missing file
	while IFS= read -r ln \|\| [ -n "$ln" ]; do
	# comment lines - check if exact comment exists
	case "$ln" in
	\#*)
	if ! grep -Fq "$ln" /srv/app/src/ckan/test-core.ini; then
	printf '%s\n' "$ln" >> "$MISSING_ADD_FILE"
	fi
	;;
	*)
	key="$(printf '%s' "$ln" \| cut -d'=' -f1 \| sed 's/[[:space:]]*$//')"
	value="$(printf '%s' "$ln" \| cut -d'=' -f2- \| sed 's/^[[:space:]]*//')"
	esc_value="$(printf '%s' "$value" \| sed -e 's/[\/&]/\\&/g')"
	if grep -q -E "^[[:space:]]$(printf '%s' "$key" \| sed 's/[][^$./]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
	sed -i -E "s\|^[[:space:]]$(printf '%s' "$key" \| sed 's/[][^$./]/\\&/g')[[:space:]]=.\|${key} = ${esc_value}\|g" /srv/app/src/ckan/test-core.ini
	else
	printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
	fi
	;;
	esac
	done < "$ADD_FILE"
	# If there are missing lines, insert them after the first [app:main] header, or append the section
	if [ -s "$MISSING_ADD_FILE" ]; then
	awk -v addfile="$MISSING_ADD_FILE" '
	BEGIN{
	inserted=0
	while ((getline line < addfile) > 0) { add[++na]=line }
	close(addfile)
	}
	{
	print
	if(!inserted && $0=="[app:main]") {
	for(i=1;i<=na;i++) print add[i]
	inserted=1
	}
	}
	END{
	if(!inserted){
	print "[app:main]"
	for(i=1;i<=na;i++) print add[i]
	}
	}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
	fi
	# Final defensive catch: ensure sqlalchemy and datastore URLs reflect env (again)
	sed -i "s\|^sqlalchemy.url.*\|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL}\|g" /srv/app/src/ckan/test-core.ini
	sed -i "s\|^ckan.datastore.write_url.*\|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL}\|g" /srv/app/src/ckan/test-core.ini
	sed -i "s\|^ckan.datastore.read_url.*\|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL}\|g" /srv/app/src/ckan/test-core.ini
	else
	echo "/srv/app/src/ckan/test-core.ini not found — no selective patching performed."
	fi
	# Append datapusher plugin(s) to ckan.plugins if present; otherwise add a plugins line
	REQUIRED_PLUGINS="datastore datapusher_plus scheming_datasets"
	if grep -q "^ckan.plugins" /srv/app/src/ckan/test-core.ini; then
	echo "Appending required plugins to existing ckan.plugins line"
	current=$(grep "^ckan.plugins" /srv/app/src/ckan/test-core.ini \| head -n1 \| cut -d'=' -f2-)
	for p in $REQUIRED_PLUGINS; do
	echo "$current" \| grep -qw "$p" \|\| current="$current $p"
	done
	awk -v new="ckan.plugins = $current" 'BEGIN{done=0} {if(!done && $1=="ckan.plugins") {print new; done=1} else print $0}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
	else
	echo "ckan.plugins = $REQUIRED_PLUGINS" >> /srv/app/src/ckan/test-core.ini
	echo "Added ckan.plugins line with required plugins."
	fi
	echo "---- /srv/app/src/ckan/test-core.ini (cat) ----"
	cat /srv/app/src/ckan/test-core.ini
	echo "---- end ----"
	- name: Initialize CKAN database
	run: \|

	echo "Testing connectivity with CKAN DB user..."
	if ! PGPASSWORD=$CKAN_DB_PASSWORD psql -h postgres -U ckan_default -d ckan_test -c "SELECT 1;" >/dev/null 2>&1; then
	echo "Cannot connect as ckan_default. Attempting to create database owner and db..."
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER IF NOT EXISTS ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
	PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE IF NOT EXISTS ckan_test OWNER ckan_default;"
	fi
	echo "Running ckan db init (may be idempotent)..."
	if ckan -c /srv/app/src/ckan/test-core.ini db init; then
	echo "CKAN DB initialized."
	else
	echo "ckan db init returned non-zero; continuing (may already be initialized)."
	fi
	echo "Setting datastore permissions..."
	if ckan -c /srv/app/src/ckan/test-core.ini datastore set-permissions \| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres --set ON_ERROR_STOP=1; then
	echo "Datastore permissions set."
	else
	echo "Datastore permission step returned non-zero; continuing."
	fi
	- name: Start CKAN server
	run: \|
	set -eu
	echo "Starting CKAN server in background..."
	# Use nohup to keep it running in background
	nohup ckan -c /srv/app/src/ckan/test-core.ini run --host 0.0.0.0 --port 5000 --disable-reloader > /tmp/ckan_stdout.log 2>&1 &
	CKAN_PID=$!
	echo "CKAN PID=$CKAN_PID"
	# wait for port / API
	timeout=120
	while [ $timeout -gt 0 ]; do
	if ! kill -0 "$CKAN_PID" >/dev/null 2>&1; then
	echo "CKAN process died. Showing last lines of log:"
	tail -n 200 /tmp/ckan_stdout.log
	exit 1
	fi
	if curl -fsS "${CKAN_SITE_URL}/api/3/action/status_show" >/dev/null 2>&1; then
	echo "CKAN API responding"
	break
	fi
	echo "Waiting for CKAN API... ($timeout s left)"
	sleep 3
	timeout=$((timeout-3))
	done
	if [ $timeout -le 0 ]; then
	echo "Timeout waiting for CKAN to start. Dumping logs..."
	tail -n 200 /tmp/ckan_stdout.log
	ss -tlnp \|\| netstat -tlnp
	exit 1
	fi
	echo "CKAN started successfully"

	- name: Create sysadmin user admin_ckan and get apikey
	run: \|
	set -eu
	echo "Creating user admin_ckan..."

	user_response=$(ckanapi action user_create --config /srv/app/src/ckan/test-core.ini \
	name=admin_ckan \
	[email protected] \
	password=test1234 \
	fullname="CKAN Administrator" \
	with_apitoken=true \
	about="Created by GitHub Actions test" 2>/dev/null) \|\| echo "user_create returned non-zero (user may already exist)"

	echo "User creation response: $user_response"
	echo "Converting admin_ckan user to sysadmin..."
	ckan -c /srv/app/src/ckan/test-core.ini sysadmin add admin_ckan
	echo "User admin_ckan promoted to sysadmin"

	# Extract only the JSON part (everything from { to })
	json_response=$(echo "$user_response" \| sed -n '/{/,/}/p')

	# Extract API key from the JSON
	api_key=$(echo "$json_response" \| jq -r '.token // empty')

	if [ -n "$api_key" ] && [ "$api_key" != "null" ] && [ "$api_key" != "empty" ]; then
	echo "CKAN_API_KEY=$api_key" >> $GITHUB_ENV
	echo "API key saved: $api_key"
	else
	echo "No API key found in response"
	fi

	echo "User admin_ckan creation completed"

	- name: Create API token for datapusher-plus and add to config
	run: \|
	set -eu
	echo "Creating API token for datapusher-plus service account..."

	# Create API token for admin_ckan user specifically for datapusher-plus
	echo "Running: ckan user token add admin_ckan dpplus"
	dp_token_output=$(ckan -c /srv/app/src/ckan/test-core.ini user token add admin_ckan dpplus 2>&1)
	echo "Full token creation output:"
	echo "$dp_token_output"

	dp_token=$(echo "$dp_token_output" \| tail -n 1 \| tr -d '\t')
	echo "Extracted token: '$dp_token'"

	if [ -n "$dp_token" ] && [ "$dp_token" != "null" ]; then
	echo "Created datapusher-plus API token: $dp_token"

	# Add the token to the CKAN configuration file
	ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$dp_token"

	# Verify it was added
	echo "Verifying token was added to config:"
	grep "ckanext.datapusher_plus.api_token" /srv/app/src/ckan/test-core.ini \|\| echo "Token not found in config!"

	# Also set in environment for potential use in other steps
	echo "DATAPUSHER_PLUS_API_TOKEN=$dp_token" >> $GITHUB_ENV

	echo "API token added to CKAN configuration successfully"
	else
	echo "Failed to create API token for datapusher-plus"
	echo "Using main CKAN API key as fallback..."
	ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$CKAN_API_KEY"
	fi
	- name: Create organization with ckanapi
	run: \|
	set -eu
	echo "Creating organization demo-organization (idempotent)..."
	ckanapi action organization_create --config /srv/app/src/ckan/test-core.ini \
	name=demo-organization \
	title="Demo Data Publishing Organization" \
	description="Demo org created by GitHub Actions for datapusher-plus testing." \|\| echo "organization_create returned non-zero (may already exist)"
	echo "Add admin_ckan as admin to the organization"
	ckanapi action organization_member_create --config /srv/app/src/ckan/test-core.ini \
	id=demo-organization username=admin_ckan role=admin \|\| echo "organization_member_create returned non-zero (may already be member)"

	- name: Create dataset with ckanapi
	run: \|
	set -eu
	echo "Creating dataset my-first-dataset (idempotent)..."
	if ckanapi action package_create \
	name=my-first-dataset \
	title="My First Comprehensive Dataset" \
	notes="This is a comprehensive demo dataset created via ckanapi and GitHub Actions for testing CKAN functionality and datapusher-plus integration." \
	owner_org=demo-organization \
	license_id=cc-by \
	version=1.0.0 \
	author="GitHub Actions Automation" \
	[email protected] \
	maintainer="CKAN Admin" \
	[email protected] \
	url=https://github.com/your-repo/your-project \
	private:false \
	state=active \
	'tags:[{"name":"demo"},{"name":"test"},{"name":"github-actions"},{"name":"automation"},{"name":"csv-data"},{"name":"datapusher-plus"}]' \
	-c /srv/app/src/ckan/test-core.ini; then
	echo "Dataset created successfully!"
	else
	echo "Dataset might already exist, continuing..."
	fi
	- name: Add resource to dataset with ckanapi
	run: \|
	set -eu
	echo "Adding resource to my-first-dataset..."
	if ckanapi action resource_create \
	package_id=my-first-dataset \
	url="https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/100kb.csv" \
	name="Sample CSV Data - 100KB Test File" \
	description="Test CSV resource for datapusher-plus pipeline." \
	format=CSV \
	mimetype="text/csv" \
	-c /srv/app/src/ckan/test-core.ini; then
	echo "Resource created successfully!"
	else
	echo "Resource creation failed"
	ckanapi action package_show id=my-first-dataset -c /srv/app/src/ckan/test-core.ini
	exit 1
	fi

	- name: Display CKAN instance inventory
	run: \|
	set -eu
	echo "=== CKAN Status (HTTP API) ==="
	curl -s "http://localhost:5000/api/3/action/status_show" \| python3 -m json.tool
	echo ""
	echo "=== All Datasets (HTTP API) ==="
	curl -s "http://localhost:5000/api/3/action/package_list" \| python3 -m json.tool
	echo ""
	echo "=== All Organizations (HTTP API) ==="
	curl -s "http://localhost:5000/api/3/action/organization_list" \| python3 -m json.tool
	echo ""
	echo "=== All Users (HTTP API) ==="
	curl -s "http://localhost:5000/api/3/action/user_list" \| python3 -m json.tool
	- name: Test datastore functionality
	run: \|
	set -eu
	echo "Testing datastore functionality..."

	# Test 1: Check if datastore is accessible by querying table metadata
	echo "=== Testing datastore read access ==="
	metadata_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=_table_metadata")
	echo "Table metadata response: $metadata_response"

	if echo "$metadata_response" \| jq -e '.success == true' >/dev/null 2>&1; then
	echo "✓ Datastore read access working"
	else
	echo "✗ Datastore read access failed"
	exit 1
	fi

	# Test 2: Create a test datastore table
	echo "=== Testing datastore write access ==="
	test_response=$(curl -s -X POST \
	-H "Content-Type: application/json" \
	-H "Authorization: $CKAN_API_KEY" \
	-d '{
	"resource": {"package_id": "my-first-dataset"},
	"fields": [{"id": "test_col", "type": "text"}, {"id": "value", "type": "int"}],
	"records": [{"test_col": "hello", "value": 1}, {"test_col": "world", "value": 2}]
	}' \
	"http://localhost:5000/api/3/action/datastore_create")

	echo "Test table creation response: $test_response"

	if echo "$test_response" \| jq -e '.success == true' >/dev/null 2>&1; then
	echo "✓ Datastore write access working"

	# Extract resource_id for cleanup
	test_resource_id=$(echo "$test_response" \| jq -r '.result.resource_id')

	# Test 3: Query the test table
	echo "=== Testing datastore query ==="
	query_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$test_resource_id")
	echo "Query response: $query_response"

	# Cleanup: Delete test table
	echo "=== Cleaning up test table ==="
	curl -s -X POST \
	-H "Content-Type: application/json" \
	-H "Authorization: $CKAN_API_KEY" \
	-d "{\"resource_id\": \"$test_resource_id\"}" \
	"http://localhost:5000/api/3/action/datastore_delete" >/dev/null

	echo "✓ Datastore functionality test completed successfully"
	else
	echo "✗ Datastore write access failed"
	fi
	- name: Start CKAN background job worker
	run: \|
	set -eu
	echo "Starting CKAN background job worker (CRITICAL for DataPusher Plus)..."
	nohup ckan -c /srv/app/src/ckan/test-core.ini jobs worker > /tmp/ckan_worker.log 2>&1 &
	WORKER_PID=$!
	echo "CKAN Worker PID=$WORKER_PID"
	echo "CKAN_WORKER_PID=$WORKER_PID" >> $GITHUB_ENV

	# Give worker a moment to start up
	sleep 5

	# Verify worker is running
	if kill -0 "$WORKER_PID" >/dev/null 2>&1; then
	echo "Background job worker started successfully"
	echo "Worker logs:"
	head -n 20 /tmp/ckan_worker.log \|\| echo "No worker logs yet"
	else
	echo "Worker failed to start"
	cat /tmp/ckan_worker.log
	exit 1
	fi



	- name: Test DataPusher Plus functionality - Remote Files (CSV Input)
	run: \|
	set -eu
	echo "=== Testing DataPusher Plus Functionality - Remote Files from CSV ==="

	# Initialize results tracking
	echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message" > /tmp/test_results.csv

	# Initialize skipped files tracking
	echo "file_name,reason_skipped" > /tmp/skipped_files.csv

	# Set path for CSV input file
	CSV_INPUT_FILE="${GITHUB_WORKSPACE}/tests/$FILES_DIR/base_files.csv"

	# Check if CSV input file exists
	if [ ! -f "$CSV_INPUT_FILE" ]; then
	echo "ERROR: CSV input file not found: $CSV_INPUT_FILE"
	echo "Please ensure the tests/$FILES_DIR/base_files.csv file exists in your repository"
	echo "Expected CSV format: file_name,file_url,file_format,file_mimetype,file_description"
	exit 1
	fi

	echo "Using CSV input file: $CSV_INPUT_FILE"
	echo "CSV file size: $(du -h "$CSV_INPUT_FILE" \| cut -f1)"
	echo ""

	# Validate CSV structure
	echo "Validating CSV structure..."
	header=$(head -n 1 "$CSV_INPUT_FILE")
	echo "CSV Header: $header"

	# Check if header contains required columns
	if ! echo "$header" \| grep -qi "file_url"; then
	echo "ERROR: CSV must contain 'file_url' column"
	echo "Expected format: file_name,file_url,file_format,file_mimetype,file_description"
	exit 1
	fi

	# Count total entries in CSV
	total_entries=$(tail -n +2 "$CSV_INPUT_FILE" \| grep -v '^[[:space:]]*$' \| wc -l)
	echo "Total entries in CSV: $total_entries"
	echo ""

	# Display first few entries for verification
	echo "First 5 entries from CSV:"
	head -n 6 "$CSV_INPUT_FILE"
	echo ""

	# Create test dataset once
	echo "Creating test dataset for DataPusher Plus..."
	if ckanapi action package_create \
	name=datapusher-plus-test-remote \
	title="DataPusher Plus Remote Files Test Dataset" \
	owner_org=demo-organization \
	-c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then
	echo "Test dataset created"
	else
	echo "Test dataset might already exist, continuing..."
	fi

	# Initialize counters
	total_files=0
	passed_files=0
	failed_files=0
	skipped_files=0

	# Process each line from CSV (skip header)
	tail -n +2 "$CSV_INPUT_FILE" \| while IFS=',' read -r file_name file_url file_format file_mimetype file_desc \|\| [ -n "$file_name" ]; do
	# Skip empty lines and comments
	[ -z "$file_name" ] && continue
	case "$file_name" in
	'#'*) continue ;;
	''\|[[:space:]])
	# Skip lines with only whitespace
	[ -z "$(echo "$file_name" \| tr -d '[:space:]')" ] && continue
	;;
	esac

	# Trim whitespace from all fields
	file_name=$(echo "$file_name" \| sed 's/^[[:space:]]//;s/[[:space:]]$//')
	file_url=$(echo "$file_url" \| sed 's/^[[:space:]]//;s/[[:space:]]$//' \| tr -d '"')
	file_format=$(echo "$file_format" \| sed 's/^[[:space:]]//;s/[[:space:]]$//')
	file_mimetype=$(echo "$file_mimetype" \| sed 's/^[[:space:]]//;s/[[:space:]]$//')
	file_desc=$(echo "$file_desc" \| sed 's/^[[:space:]]//;s/[[:space:]]$//')

	# Validate required fields
	if [ -z "$file_url" ]; then
	echo "SKIP: Missing URL for file: $file_name"
	echo "$file_name,Missing file_url in CSV" >> /tmp/skipped_files.csv
	skipped_files=$((skipped_files + 1))
	continue
	fi

	# Set defaults if fields are empty
	[ -z "$file_name" ] && file_name=$(basename "$file_url")
	[ -z "$file_format" ] && file_format="UNKNOWN"
	[ -z "$file_mimetype" ] && file_mimetype="application/octet-stream"
	[ -z "$file_desc" ] && file_desc="Remote file: $file_name"

	# Test if URL is accessible
	echo "Testing accessibility of: $file_url"
	if ! curl -s --head --max-time 10 "$file_url" > /dev/null 2>&1; then
	echo "SKIP: File not accessible via HTTP: $file_url"
	echo "$file_name,File not accessible or timed out" >> /tmp/skipped_files.csv
	skipped_files=$((skipped_files + 1))
	continue
	fi

	total_files=$((total_files + 1))
	echo ""
	echo "=========================================="
	echo "Testing File #${total_files}: $file_name"
	echo "URL: $file_url"
	echo "Format: $file_format"
	echo "Description: $file_desc"

	# Try to get file size
	file_size=$(curl -sI "$file_url" \| grep -i content-length \| cut -d' ' -f2 \| tr -d '\r' \|\| echo "unknown")
	echo "File size: $file_size bytes"
	echo "=========================================="

	# Initialize tracking variables for this file
	start_time=$(date +%s)
	upload_status="FAILED"
	resource_id=""
	datapusher_status="N/A"
	datastore_active="false"
	rows_imported="0"
	error_message=""

	# Create resource with URL for this test file
	echo "Creating resource with URL for $file_name..."
	if resource_response=$(ckanapi action resource_create \
	package_id=datapusher-plus-test-remote \
	url="$file_url" \
	name="Remote Test: $file_name" \
	description="$file_desc" \
	format="$file_format" \
	mimetype="$file_mimetype" \
	-c /srv/app/src/ckan/test-core.ini 2>&1); then

	echo "Resource created successfully for $file_name"
	upload_status="SUCCESS"

	# Extract resource ID
	resource_id=$(echo "$resource_response" \| grep -o '"id"[[:space:]]:[[:space:]]"[^"]"' \| sed 's/."id"[[:space:]]:[[:space:]]"$[^"]$"./\1/')

	if [ -z "$resource_id" ]; then
	resource_id=$(echo "$resource_response" \| sed -n 's/."id"[[:space:]]:[[:space:]]"$[a-f0-9-]$".*/\1/p')
	fi

	echo "Resource ID: $resource_id"

	if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then
	# Monitor DataPusher Plus processing
	echo "Monitoring DataPusher Plus processing for $file_name..."
	max_attempts=90 # 3 minutes max per file

	for attempt in $(seq 1 $max_attempts); do
	sleep 2

	# Check DataPusher status
	if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \
	"http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then

	if echo "$dp_status_response" \| grep -q '"success"[[:space:]]:[[:space:]]true'; then
	datapusher_status=$(echo "$dp_status_response" \| grep -o '"status"[[:space:]]:[[:space:]]"[^"]"' \| head -1 \| sed 's/."status"[[:space:]]:[[:space:]]"$[^"]$"./\1/')

	if [ -z "$datapusher_status" ]; then
	datapusher_status="unknown"
	fi

	# Clean up status string
	datapusher_status=$(echo "$datapusher_status" \| tr -d '\n\r\t ' \| cut -c1-10)

	echo " Attempt $attempt/$max_attempts: DataPusher status = $datapusher_status"

	if [ "$datapusher_status" = "complete" ]; then
	echo " ✓ DataPusher processing completed for $file_name!"
	break
	elif [ "$datapusher_status" = "error" ]; then
	error_info=$(echo "$dp_status_response" \| grep -o '"message"[[:space:]]:[[:space:]]"[^"]"' \| sed 's/."message"[[:space:]]:[[:space:]]"$[^"]$"./\1/' \| head -1)
	if [ -z "$error_info" ]; then
	error_info="DataPusher processing error"
	fi
	error_message="DataPusher error: $error_info"
	echo " ✗ DataPusher processing failed for $file_name: $error_message"
	break
	fi
	else
	# API returned success=false
	if [ $attempt -eq $max_attempts ]; then
	error_message="DataPusher status API returned success=false"
	echo " ✗ DataPusher status API error for $file_name"
	fi
	fi
	else
	# Curl failed
	if [ $attempt -eq $max_attempts ]; then
	error_message="Failed to get DataPusher status"
	echo " ✗ Cannot reach DataPusher status API for $file_name"
	fi
	fi

	# Progress indicator
	if [ $((attempt % 15)) -eq 0 ]; then
	echo " Still processing $file_name... (${attempt}/${max_attempts})"
	fi
	done

	# Check final resource status
	echo "Checking final status for $file_name..."
	if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then
	if echo "$final_resource" \| grep -q '"datastore_active"[[:space:]]:[[:space:]]true'; then
	datastore_active="true"
	echo " ✓ DataStore activated for $file_name"

	# Get row count
	if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then
	rows_imported=$(echo "$datastore_data" \| grep -o '"total"[[:space:]]:[[:space:]][0-9]' \| sed 's/."total"[[:space:]]:[[:space:]]$[0-9]$./\1/')
	if [ -z "$rows_imported" ]; then
	rows_imported="0"
	fi
	echo " ✓ Rows imported for $file_name: $rows_imported"
	fi
	else
	datastore_active="false"
	echo " ✗ DataStore not activated for $file_name"
	fi
	else
	echo " ✗ Cannot check final resource status for $file_name"
	fi
	else
	error_message="No valid resource ID extracted for $file_name"
	echo " ✗ $error_message"
	fi
	else
	echo " ✗ Resource creation failed for $file_name"
	error_message="Resource creation failed: $(echo "$resource_response" \| head -1)"
	fi

	# Calculate processing time
	end_time=$(date +%s)
	processing_time=$((end_time - start_time))

	# Log results for this file
	timestamp=$(date '+%Y-%m-%d %H:%M:%S')
	echo "$timestamp,$file_name,$upload_status,$resource_id,$datapusher_status,$datastore_active,$rows_imported,$processing_time,\"$error_message\"" >> /tmp/test_results.csv

	# Update counters
	if [ "$upload_status" = "SUCCESS" ] && [ "$datapusher_status" = "complete" ] && [ "$datastore_active" = "true" ]; then
	passed_files=$((passed_files + 1))
	echo " 🎉 PASS: $file_name processed successfully"
	else
	failed_files=$((failed_files + 1))
	echo " ❌ FAIL: $file_name had issues"
	fi

	echo " Processing time: ${processing_time}s"

	# Brief pause between files to avoid overwhelming the system
	echo " Waiting 3 seconds before next file..."
	sleep 3

	done

	# Count skipped files from CSV
	if [ -f /tmp/skipped_files.csv ]; then
	skipped_count=$(tail -n +2 /tmp/skipped_files.csv \| wc -l)
	skipped_files=$skipped_count
	fi

	echo ""
	echo "=========================================="
	echo "=== FINAL TEST RESULTS SUMMARY ==="
	echo "=========================================="
	echo "Total files in CSV: $total_entries"
	echo "Files tested: $total_files"
	echo "Files skipped: $skipped_files"
	echo "Passed: $passed_files"
	echo "Failed: $failed_files"

	if [ $total_files -gt 0 ]; then
	echo "Success rate (of tested files): $(( passed_files * 100 / total_files ))%"
	else
	echo "No files were tested"
	fi
	echo ""

	echo "=== Detailed Results ==="
	echo "Results saved to: /tmp/test_results.csv"
	cat /tmp/test_results.csv
	echo ""

	if [ $skipped_files -gt 0 ]; then
	echo "=== Skipped Files ==="
	echo "Skipped files saved to: /tmp/skipped_files.csv"
	cat /tmp/skipped_files.csv
	echo ""
	fi

	# Determine overall result
	if [ $total_files -eq 0 ] && [ $skipped_files -gt 0 ]; then
	echo ""
	echo "⚠ OVERALL RESULT: NO TESTABLE FILES"
	echo "All files in CSV were skipped - check URLs and accessibility"
	elif [ $total_files -eq 0 ]; then
	echo ""
	echo "⚠ OVERALL RESULT: NO FILES TESTED"
	echo "No valid entries found in CSV file"
	elif [ $failed_files -eq 0 ] && [ $passed_files -gt 0 ]; then
	echo ""
	echo "🎉 OVERALL RESULT: ALL TESTED FILES PASSED"
	echo "DataPusher Plus is working correctly with all testable remote files"
	elif [ $passed_files -gt 0 ]; then
	echo ""
	echo "⚠ OVERALL RESULT: PARTIAL SUCCESS"
	echo "DataPusher Plus works with some remote files but has issues with others"
	else
	echo ""
	echo "❌ OVERALL RESULT: ALL TESTED FILES FAILED"
	echo "DataPusher Plus is not working correctly with remote files"
	fi

	echo ""
	echo "Test completed at: $(date)"

	- name: Generate Combined Test Results and Worker Analysis
	if: always()
	run: \|
	set -eu
	echo "=== Generating Combined Test Results and Worker Analysis ==="

	# First, process worker logs if they exist
	echo "=== Processing DataPusher Plus Worker Logs ==="

	# Check if worker log exists
	if [ ! -f /tmp/ckan_worker.log ]; then
	echo "No worker log file found at /tmp/ckan_worker.log"
	# Create comprehensive header structure with all new fields including enhanced analytics
	echo "timestamp,job_id,file_name,status,qsv_version,file_format,encoding,normalized,valid_csv,sorted,db_safe_headers,analysis,records,total_time,download_time,analysis_time,copying_time,indexing_time,formulae_time,metadata_time,rows_copied,columns_indexed,error_type,error_message,data_quality_score,processing_efficiency" > /tmp/worker_analysis.csv
	else
	echo "Worker log file size: $(du -h /tmp/ckan_worker.log \| cut -f1)"
	echo "Running enhanced Python log analyzer..."

	# Run the Python script to analyze logs
	python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py analyze /tmp/ckan_worker.log /tmp/worker_analysis.csv
	fi

	# Now check if both results files exist
	if [ ! -f /tmp/test_results.csv ] && [ ! -f /tmp/worker_analysis.csv ]; then
	echo "No test results or worker analysis files found"
	echo "# DataPusher Plus Test Results" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "No test data available to analyze" >> $GITHUB_STEP_SUMMARY
	exit 0
	fi

	# Initialize counters for test results
	total_tests=0
	skipped_count=0
	total_files_in_dir=0
	passed=0
	failed=0
	error_count=0
	tested_success_rate=0
	overall_success_rate=0

	# Process test results if available
	if [ -f /tmp/test_results.csv ]; then
	# Count total results
	total_lines=$(wc -l < /tmp/test_results.csv)
	total_tests=$((total_lines - 1)) # Subtract header line

	# Count skipped files
	if [ -f /tmp/skipped_files.csv ]; then
	skipped_lines=$(wc -l < /tmp/skipped_files.csv)
	skipped_count=$((skipped_lines - 1)) # Subtract header line
	fi

	total_files_in_dir=$((total_tests + skipped_count))

	if [ $total_tests -gt 0 ]; then
	# Count results by status
	passed=$(grep -c ",SUCCESS,.*,complete,true," /tmp/test_results.csv 2>/dev/null \|\| echo "0")
	failed=$(tail -n +2 /tmp/test_results.csv \| grep -v ",SUCCESS,.*,complete,true," \| wc -l)
	error_count=$(grep -c ",error," /tmp/test_results.csv 2>/dev/null \|\| echo "0")
	error_count=$(echo "$error_count" \| tr -d '\n')

	# Calculate success rates
	tested_success_rate=$(( passed * 100 / total_tests ))
	if [ $total_files_in_dir -gt 0 ]; then
	overall_success_rate=$(( passed * 100 / total_files_in_dir ))
	fi
	fi
	fi

	# Check if worker analysis is available
	worker_analysis_available=false
	if [ -f /tmp/worker_analysis.csv ]; then
	worker_analysis_available=true
	fi

	# Start building the combined summary
	{
	echo "# DataPusher Plus Test Results"
	echo ""
	echo "## Summary"
	echo ""
	echo "\| Metric \| Value \|"
	echo "\|--------\|-------\|"
	echo "\| Total Files in Directory \| $total_files_in_dir \|"
	echo "\| Files Tested \| $total_tests \|"
	echo "\| Files Skipped \| $skipped_count \|"
	echo "\| Passed \| $passed \|"
	echo "\| Failed \| $failed \|"
	echo "\| Errors \| $error_count \|"
	echo "\| Success Rate (Tested Files) \| ${tested_success_rate}% \|"
	echo "\| Success Rate (All Files) \| ${overall_success_rate}% \|"
	echo ""

	# Show skipped files section if any exist
	if [ $skipped_count -gt 0 ]; then
	echo "## Skipped Files"
	echo ""
	echo "\| File Name \| Reason Skipped \|"
	echo "\|-----------\|----------------\|"

	if [ -f /tmp/skipped_files.csv ]; then
	tail -n +2 /tmp/skipped_files.csv \| while IFS=',' read -r file_name reason; do
	echo "\| $file_name \| $reason \|"
	done
	fi
	echo ""
	fi

	# Show worker analysis table if available
	if [ "$worker_analysis_available" = true ]; then
	total_jobs=$(tail -n +2 /tmp/worker_analysis.csv \| wc -l)
	if [ $total_jobs -gt 0 ]; then
	echo "## Complete Job Analysis"
	echo ""
	echo "\| # \| File Name \| Status \| Records \| Columns \| Time (s) \| Valid CSV \| Headers Safe \| Error Type \| Quality Score \|"
	echo "\|---\|-----------\|--------\|---------\|---------\|----------\|-----------\|--------------\|------------\|---------------\|"

	counter=1
	tail -n +2 /tmp/worker_analysis.csv \| while IFS=',' read timestamp job_id file_name status qsv_version file_format encoding normalized valid_csv sorted db_safe_headers analysis records total_time download_time analysis_time copying_time indexing_time formulae_time metadata_time rows_copied columns_indexed error_type error_message data_quality_score processing_efficiency; do
	# Don't truncate values - allow full content with horizontal scroll
	full_file_name=$(echo "$file_name" \| sed 's/\.\.\.//')
	full_error_type="$error_type"
	full_headers="$db_safe_headers"

	# Handle empty values
	[ -z "$records" ] && records="0"
	[ -z "$columns_indexed" ] && columns_indexed="0"
	[ -z "$total_time" ] && total_time="0"
	[ -z "$data_quality_score" ] && data_quality_score="-"
	[ -z "$full_error_type" ] && full_error_type="-"

	# Add status emoji
	case "$status" in
	"SUCCESS") status_display="✅ SUCCESS" ;;
	"ERROR") status_display="❌ ERROR" ;;
	"INCOMPLETE") status_display="⏸️ INCOMPLETE" ;;
	*) status_display="❓ $status" ;;
	esac

	echo "\| $counter \| $full_file_name \| $status_display \| $records \| $columns_indexed \| $total_time \| $valid_csv \| $full_headers \| $full_error_type \| $data_quality_score \|"
	counter=$((counter + 1))
	done
	echo ""

	# Add worker analysis sections
	success_jobs=$(grep -c ",SUCCESS," /tmp/worker_analysis.csv \|\| echo "0")
	error_jobs=$(grep -c ",ERROR," /tmp/worker_analysis.csv \|\| echo "0")

	# File Analysis
	echo "## File Analysis"
	echo ""

	if [ $success_jobs -gt 0 ]; then
	# File formats processed
	echo "### File Formats Processed"
	echo ""
	formats=$(tail -n +2 /tmp/worker_analysis.csv \| grep ",SUCCESS," \| cut -d',' -f6 \| sort \| uniq -c)
	if [ -n "$formats" ]; then
	echo "\| Format \| Files \| Percentage \|"
	echo "\|--------\|-------\|------------\|"
	echo "$formats" \| while read count format; do
	percentage=$((count * 100 / success_jobs))
	# Add format icon
	case "$format" in
	"CSV") format_icon="📊" ;;
	"XLSX"\|"XLS") format_icon="📈" ;;
	"JSON") format_icon="🔧" ;;
	"TXT") format_icon="📝" ;;
	*) format_icon="📄" ;;
	esac
	echo "\| $format_icon $format \| $count \| $percentage% \|"
	done
	else
	echo "❌ No format data available"
	fi
	echo ""

	# Encoding types
	echo "### Encoding Distribution"
	echo ""
	encodings=$(tail -n +2 /tmp/worker_analysis.csv \| grep ",SUCCESS," \| cut -d',' -f7 \| sort \| uniq -c)
	if [ -n "$encodings" ]; then
	echo "\| Encoding \| Files \| Status \|"
	echo "\|----------\|-------\|--------\|"
	echo "$encodings" \| while read count encoding; do
	if [ -n "$encoding" ]; then
	if [ "$encoding" = "UTF-8" ] \|\| [ "$encoding" = "UTF" ]; then
	status_icon="✅"
	else
	status_icon="⚠️"
	fi
	echo "\| $status_icon $encoding \| $count \| Compatible \|"
	else
	echo "\| ❓ Unknown \| $count \| Needs Review \|"
	fi
	done
	else
	echo "❌ No encoding data available"
	fi
	echo ""
	fi

	# Error Analysis
	echo "## Error Analysis"
	echo ""
	if [ $error_jobs -gt 0 ]; then
	echo "### Failed Files Details"
	echo ""
	echo ""
	echo "\| File \| Error Type \| Error Message \|"
	echo "\|------\|------------\|---------------\|"
	tail -n +2 /tmp/worker_analysis.csv \| grep ",ERROR," \| cut -d',' -f3,23,24 \| while IFS=',' read file error_type error_msg; do
	clean_error=$(echo "$error_msg" \| sed 's/^"//;s/"$//')
	clean_file=$(echo "$file" \| sed 's/\.\.\.//')
	echo "\| $clean_file \| $error_type \| $clean_error \|"
	done
	echo ""
	else
	echo "✅ No errors found in worker logs - All processed jobs completed successfully!"
	echo ""
	fi

	# Performance Anomalies
	echo "## Performance Anomalies"
	echo ""
	anomalies_output=$(python3 ${GITHUB_WORKSPACE}/tests/log_analyzer.py anomalies /tmp/worker_analysis.csv 2>/dev/null \|\| echo "")
	if [ -z "$anomalies_output" ]; then
	echo "✅ No performance anomalies detected - All jobs processed within expected timeframes"
	else
	echo "⚠️ Performance issues detected:"
	echo ""
	echo "$anomalies_output" \| sed 's/ANOMALY: /🐌 Slow Processing: /'
	fi
	echo ""
	fi
	fi

	# Add comprehensive analysis based on results (keep original logic)
	if [ $total_tests -eq 0 ] && [ $skipped_count -gt 0 ]; then
	echo "## No Testable Files ⚠️"
	echo ""
	echo "All files in the test directory were skipped."
	echo ""
	echo "Common reasons for skipped files:"
	echo "- Unsupported file formats (only .csv, .tsv, .xlsx, .json, .geojson, .txt supported)"
	echo "- Files not accessible via HTTP server"
	echo "- Hidden files or system files"
	echo ""
	echo "Recommendation: Add supported data files to test directory."

	elif [ $total_tests -eq 0 ]; then
	echo "## No Files Found ❌"
	echo ""
	echo "No files found in test directory to test."

	elif [ $passed -eq $total_tests ]; then
	echo "## All Tested Files Passed! 🎉"
	echo ""
	echo "DataPusher Plus is working correctly with all testable files."
	if [ $skipped_count -gt 0 ]; then
	echo ""
	echo "Note: $skipped_count file(s) were skipped. See the Skipped Files section above for details."
	fi

	elif [ $passed -gt 0 ]; then
	echo "## Result: Partial Success"
	echo ""
	echo "DataPusher Plus works with some files but has issues with others."
	echo ""

	else
	echo "## Result: All Tested Files Failed ❌"
	echo ""
	echo "DataPusher Plus is not working correctly with any tested files."
	echo ""
	echo "### All Failed Files:"
	if [ -f /tmp/test_results.csv ]; then
	tail -n +2 /tmp/test_results.csv \| while IFS=',' read -r timestamp file_name upload_status resource_id datapusher_status datastore_active rows_imported processing_time error_message; do
	clean_error=$(echo "$error_message" \| sed 's/^"//;s/"$//')
	echo "- $file_name: $clean_error"
	done
	fi

	if [ $skipped_count -gt 0 ]; then
	echo ""
	echo "### Files Not Even Attempted:"
	if [ -f /tmp/skipped_files.csv ]; then
	tail -n +2 /tmp/skipped_files.csv \| while IFS=',' read -r file_name reason; do
	echo "- $file_name: $reason"
	done
	fi
	fi
	fi

	echo ""
	echo "---"
	echo ""
	echo "Analysis completed: $(date '+%A, %B %d, %Y at %I:%M %p %Z')"

	} > /tmp/combined_summary.md

	# Write to GitHub Actions step summary
	cat /tmp/combined_summary.md >> $GITHUB_STEP_SUMMARY

	echo "Combined analysis summary generated and added to workflow summary"
	echo ""
	echo "Preview of generated summary:"
	echo "=================================="
	cat /tmp/combined_summary.md

	- name: Upload test results as artifact
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: datapusher-plus-test-results
	path: \|
	/tmp/test_results.csv
	/tmp/ckan_stdout.log
	/tmp/ckan_worker.log
	/tmp/worker_analysis.csv
	retention-days: 3

	- name: Cleanup
	if: always()
	run: \|
	echo "Stopping any running CKAN processes..."
	pkill -f "ckan.*run"
	echo "Cleanup completed"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Automated DataPusher+ Testing Run #2

Workflow file

Automated DataPusher+ Testing Run #2

Uh oh!

Workflow file for this run