Skip to content

Commit 0927b63

Browse files
committed
feat(pipeline): implement FHIR validation pipeline step
- Add validation step that sends FHIR resources to a validation service - Chunk resources into Bundles with configurable size and concurrency - Write per-file OperationOutcome reports for all validation results - Default fail_on_error to true (stop pipeline on data quality errors) - Support resumption by skipping files with existing reports - Add fullUrl to inner Bundle entries for proper reference resolution
1 parent 2ac2dc4 commit 0927b63

31 files changed

+3018
-24
lines changed
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Validation E2E test script
5+
# Tests the validation pipeline step in isolation using pre-staged FHIR data.
6+
# Runs aether inside the Docker network using the aether-runner container.
7+
#
8+
# Test data includes both valid and invalid FHIR resources:
9+
# - Patient.ndjson: 2 valid patients (expect empty report — no errors)
10+
# - Condition.ndjson: 1 condition with invalid clinicalStatus and missing subject/verificationStatus (expect error report)
11+
# - InvalidPatient.ndjson: 2 patients with invalid field values (expect 1 error OperationOutcome)
12+
#
13+
# Resources are wrapped into FHIR Bundles before validation. Only error-bearing
14+
# OperationOutcomes are written to report files.
15+
#
16+
# The pipeline is expected to SUCCEED — validation findings are informational,
17+
# not pipeline errors. Report files capture the error OperationOutcomes for review.
18+
19+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20+
TEST_DIR="$SCRIPT_DIR/../test"
21+
22+
# Colors for output
23+
RED='\033[0;31m'
24+
GREEN='\033[0;32m'
25+
YELLOW='\033[1;33m'
26+
NC='\033[0m' # No Color
27+
28+
echo "=== Validation E2E Test ==="
29+
echo ""
30+
31+
cd "$TEST_DIR"
32+
33+
# Copy aether binary into container
34+
echo "Copying aether binary into container..."
35+
docker compose cp ../../bin/aether aether-runner:/app/aether
36+
docker compose exec -T aether-runner chmod +x /app/aether
37+
38+
# Generate a unique job ID
39+
JOB_ID=$(uuidgen | tr '[:upper:]' '[:lower:]')
40+
echo "Creating job: $JOB_ID"
41+
42+
# Create job directory structure and copy test data
43+
echo "Setting up pre-staged job state..."
44+
docker compose exec -T aether-runner sh -c "
45+
mkdir -p /app/jobs/$JOB_ID/import
46+
mkdir -p /app/jobs/$JOB_ID/validation
47+
48+
# Copy NDJSON test files to import directory (simulating completed import step)
49+
cp /app/example-validation/testdata/*.ndjson /app/jobs/$JOB_ID/import/
50+
"
51+
52+
# Create state.json with pre-staged completed import step
53+
docker compose exec -T aether-runner sh -c "
54+
NOW=\$(date -u +%Y-%m-%dT%H:%M:%SZ)
55+
56+
cat > /app/jobs/$JOB_ID/state.json << EOF
57+
{
58+
\"job_id\": \"$JOB_ID\",
59+
\"created_at\": \"\$NOW\",
60+
\"updated_at\": \"\$NOW\",
61+
\"input_source\": \"/app/example-validation/testdata\",
62+
\"input_type\": \"local_directory\",
63+
\"current_step\": \"validation\",
64+
\"status\": \"in_progress\",
65+
\"steps\": [
66+
{
67+
\"name\": \"local_import\",
68+
\"status\": \"completed\",
69+
\"files_processed\": 3,
70+
\"bytes_processed\": 1500,
71+
\"retry_count\": 0
72+
},
73+
{
74+
\"name\": \"validation\",
75+
\"status\": \"pending\",
76+
\"files_processed\": 0,
77+
\"bytes_processed\": 0,
78+
\"retry_count\": 0
79+
}
80+
],
81+
\"config\": {
82+
\"services\": {
83+
\"validation\": {
84+
\"url\": \"http://fhir-validator:8080\",
85+
\"max_concurrent_requests\": 2,
86+
\"bundle_chunk_size_mb\": 10
87+
}
88+
},
89+
\"pipeline\": {
90+
\"enabled_steps\": [\"local_import\", \"validation\"]
91+
},
92+
\"retry\": {
93+
\"max_attempts\": 3,
94+
\"initial_backoff_ms\": 1000,
95+
\"max_backoff_ms\": 10000
96+
},
97+
\"compression\": {\"enabled\": false},
98+
\"jobs_dir\": \"./jobs\"
99+
},
100+
\"total_files\": 3,
101+
\"total_bytes\": 1500
102+
}
103+
EOF
104+
"
105+
106+
echo ""
107+
echo "Running aether pipeline continue for validation step..."
108+
echo " Job ID: $JOB_ID"
109+
echo " Validator: http://fhir-validator:8080 (internal)"
110+
echo ""
111+
112+
# Run the pipeline continue command.
113+
# Validation findings are informational — the pipeline should succeed.
114+
# Report files capture any error OperationOutcomes for review.
115+
PIPELINE_EXIT=0
116+
OUTPUT=$(docker compose exec -T aether-runner /app/aether pipeline continue "$JOB_ID" --config aether-validation.yaml 2>&1) || PIPELINE_EXIT=$?
117+
echo "$OUTPUT"
118+
119+
if [ $PIPELINE_EXIT -eq 0 ]; then
120+
echo -e "${GREEN}Pipeline completed successfully (validation findings are informational)${NC}"
121+
FAILED=0
122+
else
123+
echo ""
124+
echo -e "${RED}UNEXPECTED: Pipeline failed with exit code $PIPELINE_EXIT${NC}"
125+
FAILED=1
126+
fi
127+
128+
echo ""
129+
echo "Verifying validation output..."
130+
131+
VALIDATION_DIR="/app/jobs/$JOB_ID/validation"
132+
133+
# --- Check 1: Report files exist for ALL input files ---
134+
135+
for input_file in "Patient" "Condition" "InvalidPatient"; do
136+
REPORT_FILE="$input_file.validation.ndjson"
137+
echo ""
138+
echo "Checking report exists: $REPORT_FILE"
139+
140+
if docker compose exec -T aether-runner sh -c "
141+
if [ ! -f \"$VALIDATION_DIR/$REPORT_FILE\" ]; then
142+
echo 'MISSING: Report file not found'
143+
exit 1
144+
fi
145+
echo 'OK - Report file exists'
146+
"; then
147+
echo -e "${GREEN} PASS${NC}"
148+
else
149+
echo -e "${RED} FAIL${NC}"
150+
FAILED=1
151+
fi
152+
done
153+
154+
# --- Check 2: Valid files have EMPTY reports (no error outcomes) ---
155+
156+
echo ""
157+
echo "Checking Patient report is empty (all valid, no error outcomes)..."
158+
if docker compose exec -T aether-runner sh -c "
159+
set -e
160+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/Patient.validation.ndjson\" | tr -d ' ')
161+
if [ \"\$LINE_COUNT\" -ne 0 ]; then
162+
echo \"FAIL: Expected 0 lines (empty), got \$LINE_COUNT\"
163+
exit 1
164+
fi
165+
echo 'OK - Patient report is empty (all valid)'
166+
"; then
167+
echo -e "${GREEN} PASS${NC}"
168+
else
169+
echo -e "${RED} FAIL${NC}"
170+
FAILED=1
171+
fi
172+
173+
echo ""
174+
echo "Checking Condition report has errors (invalid resource — missing subject, bad clinicalStatus)..."
175+
if docker compose exec -T aether-runner sh -c "
176+
set -e
177+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/Condition.validation.ndjson\" | tr -d ' ')
178+
if [ \"\$LINE_COUNT\" -eq 0 ]; then
179+
echo 'FAIL: Expected non-empty report for invalid Condition'
180+
exit 1
181+
fi
182+
echo \"OK - Condition report has \$LINE_COUNT entry/entries (errors found)\"
183+
"; then
184+
echo -e "${GREEN} PASS${NC}"
185+
else
186+
echo -e "${RED} FAIL${NC}"
187+
FAILED=1
188+
fi
189+
190+
# --- Check 3: InvalidPatient report has exactly 1 line (one error OperationOutcome for the chunk) ---
191+
192+
echo ""
193+
echo "Checking InvalidPatient report has exactly 1 entry (one chunk with errors)..."
194+
if docker compose exec -T aether-runner sh -c "
195+
set -e
196+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/InvalidPatient.validation.ndjson\" | tr -d ' ')
197+
if [ \"\$LINE_COUNT\" -ne 1 ]; then
198+
echo \"FAIL: Expected 1 entry, got \$LINE_COUNT\"
199+
exit 1
200+
fi
201+
echo 'OK - InvalidPatient report has 1 entry (one chunk OperationOutcome)'
202+
"; then
203+
echo -e "${GREEN} PASS${NC}"
204+
else
205+
echo -e "${RED} FAIL${NC}"
206+
FAILED=1
207+
fi
208+
209+
# --- Check 4: The error OperationOutcome contains error-severity issues ---
210+
211+
echo ""
212+
echo "Checking InvalidPatient report contains error-severity issues..."
213+
if docker compose exec -T aether-runner sh -c "
214+
set -e
215+
ERROR_COUNT=0
216+
while IFS= read -r line; do
217+
if [ -z \"\$line\" ]; then
218+
continue
219+
fi
220+
if echo \"\$line\" | grep -qE '\"severity\"\\s*:\\s*\"(error|fatal)\"'; then
221+
ERROR_COUNT=\$((ERROR_COUNT + 1))
222+
fi
223+
done < \"$VALIDATION_DIR/InvalidPatient.validation.ndjson\"
224+
225+
if [ \"\$ERROR_COUNT\" -eq 0 ]; then
226+
echo 'FAIL: No error-severity issues found in InvalidPatient report'
227+
exit 1
228+
fi
229+
echo \"OK - Found \$ERROR_COUNT OperationOutcome(s) with error-severity issues\"
230+
"; then
231+
echo -e "${GREEN} PASS${NC}"
232+
else
233+
echo -e "${RED} FAIL${NC}"
234+
FAILED=1
235+
fi
236+
237+
# --- Summary ---
238+
239+
echo ""
240+
if [ $FAILED -eq 0 ]; then
241+
echo -e "${GREEN}=== All validation checks passed ===${NC}"
242+
exit 0
243+
else
244+
echo -e "${RED}=== Some validation checks failed ===${NC}"
245+
# Print report contents for debugging
246+
echo ""
247+
echo "--- Validation report contents ---"
248+
docker compose exec -T aether-runner sh -c "
249+
for f in $VALIDATION_DIR/*.ndjson; do
250+
echo \"File: \$(basename \$f)\"
251+
cat \"\$f\"
252+
echo ''
253+
done
254+
" || true
255+
exit 1
256+
fi

.github/test/Makefile

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Makefile for Aether Test Infrastructure
22
# Located in .github/test/
33

4-
.PHONY: help services start stop torch-up torch-down torch-logs torch-test dimp-up dimp-down dimp-test flattener-up flattener-down flattener-logs blaze-up blaze-down blaze-logs test-with-services download-testdata upload-testdata e2e-test e2e-test-flattening e2e-test-direct-load clean-test-data
4+
.PHONY: help services build start stop torch-up torch-down torch-logs torch-test dimp-up dimp-down dimp-test flattener-up flattener-down flattener-logs blaze-up blaze-down blaze-logs validator-up validator-down validator-logs test-with-services download-testdata upload-testdata e2e-test e2e-test-flattening e2e-test-validation e2e-test-direct-load clean-test-data
55

66
# Default target
77
.DEFAULT_GOAL := help
@@ -16,6 +16,7 @@ help:
1616
@echo " torch TORCH data extraction service"
1717
@echo " dimp DIMP pseudonymization service"
1818
@echo " flattener fhir-flattener CSV transformation service"
19+
@echo " validator mii-fhir-validator FHIR validation service"
1920
@echo ""
2021
@echo "Targets:"
2122
@echo " start Start all services (TORCH + DIMP + Flattener)"
@@ -35,14 +36,25 @@ help:
3536
@echo " blaze-up Start Blaze FHIR server (DSF transfer target)"
3637
@echo " blaze-down Stop Blaze FHIR server"
3738
@echo " blaze-logs Show Blaze FHIR server logs"
39+
@echo " validator-up Start mii-fhir-validator service"
40+
@echo " validator-down Stop mii-fhir-validator service"
41+
@echo " validator-logs Show mii-fhir-validator service logs"
3842
@echo " test-with-services Run all tests with required services"
3943
@echo " download-testdata Download test data (only if not present)"
4044
@echo " upload-testdata Upload test data to TORCH (requires services)"
4145
@echo " e2e-test Run end-to-end test (downloads, uploads, runs pipeline)"
4246
@echo " e2e-test-flattening Run flattening E2E test with pre-staged example data"
47+
@echo " e2e-test-validation Run validation E2E test with pre-staged example data"
4348
@echo " e2e-test-direct-load Run direct resource load E2E test (torch → send)"
49+
@echo " build Build aether binary"
4450
@echo " clean-test-data Clean test data and temporary files"
4551

52+
## build: Build aether binary
53+
build:
54+
@echo "Building aether binary..."
55+
@cd ../.. && make build
56+
@echo "✓ aether binary built"
57+
4658
## services: Alias for help
4759
services: help
4860

@@ -155,6 +167,23 @@ blaze-logs:
155167
@echo "Showing Blaze FHIR server logs..."
156168
cd blaze && docker compose logs -f
157169

170+
## validator-up: Start mii-fhir-validator service
171+
validator-up:
172+
@echo "Starting mii-fhir-validator service..."
173+
cd validator && docker compose up -d
174+
@echo "✓ mii-fhir-validator service starting (may take up to 60s)"
175+
176+
## validator-down: Stop mii-fhir-validator service
177+
validator-down:
178+
@echo "Stopping mii-fhir-validator service..."
179+
cd validator && docker compose down
180+
@echo "✓ mii-fhir-validator service stopped"
181+
182+
## validator-logs: Show mii-fhir-validator service logs
183+
validator-logs:
184+
@echo "Showing mii-fhir-validator service logs..."
185+
cd validator && docker compose logs -f
186+
158187
## dimp-test: Run integration tests with DIMP service
159188
dimp-test:
160189
@echo "Running integration tests with DIMP service..."
@@ -223,6 +252,15 @@ e2e-test-flattening:
223252
@../scripts/run-flattening-e2e-test.sh
224253
@echo "✓ Flattening E2E test complete"
225254

255+
## e2e-test-validation: Run validation E2E test with pre-staged example data
256+
e2e-test-validation:
257+
@echo "Running validation E2E test..."
258+
@echo "Starting fhir-validator and aether-runner services..."
259+
@docker compose up -d --wait aether-runner fhir-validator
260+
@echo "Running validation E2E test script..."
261+
@../scripts/run-validation-e2e-test.sh
262+
@echo "✓ Validation E2E test complete"
263+
226264
## e2e-test-direct-load: Run direct resource load E2E test (torch → send)
227265
e2e-test-direct-load: download-testdata
228266
@echo "Running direct resource load E2E test..."
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Aether Validation E2E Test Configuration
2+
# Uses internal Docker hostnames since aether runs inside the Docker network
3+
4+
services:
5+
# Validation Service (mii-fhir-validator)
6+
validation:
7+
url: "http://fhir-validator:8080"
8+
max_concurrent_requests: 2
9+
bundle_chunk_size_mb: 10
10+
11+
pipeline:
12+
# Pipeline starts from pre-staged data after local_import step
13+
enabled_steps:
14+
- local_import # Already completed (pre-staged)
15+
- validation # This is the step being tested
16+
17+
# Disable compression for simpler test verification
18+
compression:
19+
enabled: false
20+
21+
retry:
22+
max_attempts: 3
23+
initial_backoff_ms: 1000
24+
max_backoff_ms: 10000
25+
26+
# Directory to store job state and data
27+
jobs_dir: "./jobs"

.github/test/compose.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ include:
88
- dimp/compose.yaml
99
- flattener/compose.yaml
1010
- blaze/compose.yaml
11+
- validator/compose.yaml
1112

1213
services:
1314
# E2E test runner - runs aether inside the Docker network
@@ -19,14 +20,17 @@ services:
1920
volumes:
2021
- ./aether.yaml:/app/aether.yaml:ro
2122
- ./aether-flattening.yaml:/app/aether-flattening.yaml:ro
23+
- ./aether-validation.yaml:/app/aether-validation.yaml:ro
2224
- ./aether-direct-load.yaml:/app/aether-direct-load.yaml:ro
2325
- ./torch/queries:/app/torch/queries:ro
2426
- ./example-flattening:/app/example-flattening:ro
27+
- ./example-validation:/app/example-validation:ro
2528
- ./jobs:/app/jobs
2629
# Keep container running so we can exec into it
2730
command: ["sleep", "infinity"]
2831
depends_on:
2932
- torch-proxy
3033
- fhir-pseudonymizer
3134
- fhir-flattener
35+
- fhir-validator
3236
- blaze-fhir
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"resourceType":"Condition","id":"val-condition-1","clinicalStatus":{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/condition-clinical","code":"bogus-status"}]},"code":{"coding":[{"system":"http://fhir.de/CodeSystem/bfarm/icd-10-gm","code":"I50.01","display":"Sekundaere Rechtsherzinsuffizienz"}]}}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"resourceType":"Patient","id":"val-invalid-1","gender":"not-a-real-gender","birthDate":"not-a-date"}
2+
{"resourceType":"Patient","id":"val-invalid-2","contact":[{"name":{"text":"John"},"gender":"invalid"}],"multipleBirthInteger":"should-be-integer"}

0 commit comments

Comments
 (0)