Skip to content

Commit 76e3899

Browse files
committed
feat(pipeline): implement FHIR validation pipeline step
- Add validation step that sends FHIR resources to a validation service - Chunk resources into Bundles with configurable size and concurrency - Write per-file OperationOutcome reports for all validation results - Default fail_on_error to true (stop pipeline on data quality errors) - Support resumption by skipping files with existing reports - Add fullUrl to inner Bundle entries for proper reference resolution
1 parent 834c811 commit 76e3899

31 files changed

+3024
-24
lines changed
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Validation E2E test script
5+
# Tests the validation pipeline step in isolation using pre-staged FHIR data.
6+
# Runs aether inside the Docker network using the aether-runner container.
7+
#
8+
# Test data includes both valid and invalid FHIR resources:
9+
# - Patient.ndjson: 2 valid patients (expect informational outcome)
10+
# - Condition.ndjson: 1 condition with invalid clinicalStatus and missing subject/verificationStatus (expect error report)
11+
# - InvalidPatient.ndjson: 2 patients with invalid field values (expect 1 error OperationOutcome)
12+
#
13+
# Resources are wrapped into FHIR Bundles before validation. All OperationOutcomes
14+
# (including informational ones for valid resources) are written to report files.
15+
#
16+
# The e2e config sets fail_on_error: false so the pipeline succeeds even when
17+
# validation errors are found. Report files capture all OperationOutcomes for review.
18+
19+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20+
TEST_DIR="$SCRIPT_DIR/../test"
21+
22+
# Colors for output
23+
RED='\033[0;31m'
24+
GREEN='\033[0;32m'
25+
YELLOW='\033[1;33m'
26+
NC='\033[0m' # No Color
27+
28+
echo "=== Validation E2E Test ==="
29+
echo ""
30+
31+
cd "$TEST_DIR"
32+
33+
# Copy aether binary into container
34+
echo "Copying aether binary into container..."
35+
docker compose cp ../../bin/aether aether-runner:/app/aether
36+
docker compose exec -T aether-runner chmod +x /app/aether
37+
38+
# Generate a unique job ID
39+
JOB_ID=$(uuidgen | tr '[:upper:]' '[:lower:]')
40+
echo "Creating job: $JOB_ID"
41+
42+
# Create job directory structure and copy test data
43+
echo "Setting up pre-staged job state..."
44+
docker compose exec -T aether-runner sh -c "
45+
mkdir -p /app/jobs/$JOB_ID/import
46+
mkdir -p /app/jobs/$JOB_ID/validation
47+
48+
# Copy NDJSON test files to import directory (simulating completed import step)
49+
cp /app/example-validation/testdata/*.ndjson /app/jobs/$JOB_ID/import/
50+
"
51+
52+
# Create state.json with pre-staged completed import step
53+
docker compose exec -T aether-runner sh -c "
54+
NOW=\$(date -u +%Y-%m-%dT%H:%M:%SZ)
55+
56+
cat > /app/jobs/$JOB_ID/state.json << EOF
57+
{
58+
\"job_id\": \"$JOB_ID\",
59+
\"created_at\": \"\$NOW\",
60+
\"updated_at\": \"\$NOW\",
61+
\"input_source\": \"/app/example-validation/testdata\",
62+
\"input_type\": \"local_directory\",
63+
\"current_step\": \"validation\",
64+
\"status\": \"in_progress\",
65+
\"steps\": [
66+
{
67+
\"name\": \"local_import\",
68+
\"status\": \"completed\",
69+
\"files_processed\": 3,
70+
\"bytes_processed\": 1500,
71+
\"retry_count\": 0
72+
},
73+
{
74+
\"name\": \"validation\",
75+
\"status\": \"pending\",
76+
\"files_processed\": 0,
77+
\"bytes_processed\": 0,
78+
\"retry_count\": 0
79+
}
80+
],
81+
\"config\": {
82+
\"services\": {
83+
\"validation\": {
84+
\"url\": \"http://fhir-validator:8080\",
85+
\"max_concurrent_requests\": 2,
86+
\"bundle_chunk_size_mb\": 10
87+
}
88+
},
89+
\"pipeline\": {
90+
\"enabled_steps\": [\"local_import\", \"validation\"]
91+
},
92+
\"retry\": {
93+
\"max_attempts\": 3,
94+
\"initial_backoff_ms\": 1000,
95+
\"max_backoff_ms\": 10000
96+
},
97+
\"compression\": {\"enabled\": false},
98+
\"jobs_dir\": \"./jobs\"
99+
},
100+
\"total_files\": 3,
101+
\"total_bytes\": 1500
102+
}
103+
EOF
104+
"
105+
106+
echo ""
107+
echo "Running aether pipeline continue for validation step..."
108+
echo " Job ID: $JOB_ID"
109+
echo " Validator: http://fhir-validator:8080 (internal)"
110+
echo ""
111+
112+
# Run the pipeline continue command.
113+
# Config sets fail_on_error: false, so the pipeline should succeed.
114+
# Report files capture all OperationOutcomes for review.
115+
PIPELINE_EXIT=0
116+
OUTPUT=$(docker compose exec -T aether-runner /app/aether pipeline continue "$JOB_ID" --config aether-validation.yaml 2>&1) || PIPELINE_EXIT=$?
117+
echo "$OUTPUT"
118+
119+
if [ $PIPELINE_EXIT -eq 0 ]; then
120+
echo -e "${GREEN}Pipeline completed successfully (validation findings are informational)${NC}"
121+
FAILED=0
122+
else
123+
echo ""
124+
echo -e "${RED}UNEXPECTED: Pipeline failed with exit code $PIPELINE_EXIT${NC}"
125+
FAILED=1
126+
fi
127+
128+
echo ""
129+
echo "Verifying validation output..."
130+
131+
VALIDATION_DIR="/app/jobs/$JOB_ID/validation"
132+
133+
# --- Check 1: Report files exist for ALL input files ---
134+
135+
for input_file in "Patient" "Condition" "InvalidPatient"; do
136+
REPORT_FILE="$input_file.validation.ndjson"
137+
echo ""
138+
echo "Checking report exists: $REPORT_FILE"
139+
140+
if docker compose exec -T aether-runner sh -c "
141+
if [ ! -f \"$VALIDATION_DIR/$REPORT_FILE\" ]; then
142+
echo 'MISSING: Report file not found'
143+
exit 1
144+
fi
145+
echo 'OK - Report file exists'
146+
"; then
147+
echo -e "${GREEN} PASS${NC}"
148+
else
149+
echo -e "${RED} FAIL${NC}"
150+
FAILED=1
151+
fi
152+
done
153+
154+
# --- Check 2: Valid files have informational outcomes (no error-severity issues) ---
155+
156+
echo ""
157+
echo "Checking Patient report has informational outcome (all valid, no errors)..."
158+
if docker compose exec -T aether-runner sh -c "
159+
set -e
160+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/Patient.validation.ndjson\" | tr -d ' ')
161+
if [ \"\$LINE_COUNT\" -eq 0 ]; then
162+
echo 'FAIL: Expected non-empty report with informational outcome'
163+
exit 1
164+
fi
165+
# Verify no error-severity issues in the report
166+
if grep -qE '\"severity\"\\s*:\\s*\"(error|fatal)\"' \"$VALIDATION_DIR/Patient.validation.ndjson\"; then
167+
echo 'FAIL: Expected only informational outcomes but found error-severity issues'
168+
exit 1
169+
fi
170+
echo \"OK - Patient report has \$LINE_COUNT informational entry/entries (all valid)\"
171+
"; then
172+
echo -e "${GREEN} PASS${NC}"
173+
else
174+
echo -e "${RED} FAIL${NC}"
175+
FAILED=1
176+
fi
177+
178+
echo ""
179+
echo "Checking Condition report has errors (invalid resource — missing subject, bad clinicalStatus)..."
180+
if docker compose exec -T aether-runner sh -c "
181+
set -e
182+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/Condition.validation.ndjson\" | tr -d ' ')
183+
if [ \"\$LINE_COUNT\" -eq 0 ]; then
184+
echo 'FAIL: Expected non-empty report for invalid Condition'
185+
exit 1
186+
fi
187+
echo \"OK - Condition report has \$LINE_COUNT entry/entries (errors found)\"
188+
"; then
189+
echo -e "${GREEN} PASS${NC}"
190+
else
191+
echo -e "${RED} FAIL${NC}"
192+
FAILED=1
193+
fi
194+
195+
# --- Check 3: InvalidPatient report has exactly 1 line (one error OperationOutcome for the chunk) ---
196+
197+
echo ""
198+
echo "Checking InvalidPatient report has exactly 1 entry (one chunk with errors)..."
199+
if docker compose exec -T aether-runner sh -c "
200+
set -e
201+
LINE_COUNT=\$(wc -l < \"$VALIDATION_DIR/InvalidPatient.validation.ndjson\" | tr -d ' ')
202+
if [ \"\$LINE_COUNT\" -ne 1 ]; then
203+
echo \"FAIL: Expected 1 entry, got \$LINE_COUNT\"
204+
exit 1
205+
fi
206+
echo 'OK - InvalidPatient report has 1 entry (one chunk OperationOutcome)'
207+
"; then
208+
echo -e "${GREEN} PASS${NC}"
209+
else
210+
echo -e "${RED} FAIL${NC}"
211+
FAILED=1
212+
fi
213+
214+
# --- Check 4: The error OperationOutcome contains error-severity issues ---
215+
216+
echo ""
217+
echo "Checking InvalidPatient report contains error-severity issues..."
218+
if docker compose exec -T aether-runner sh -c "
219+
set -e
220+
ERROR_COUNT=0
221+
while IFS= read -r line; do
222+
if [ -z \"\$line\" ]; then
223+
continue
224+
fi
225+
if echo \"\$line\" | grep -qE '\"severity\"\\s*:\\s*\"(error|fatal)\"'; then
226+
ERROR_COUNT=\$((ERROR_COUNT + 1))
227+
fi
228+
done < \"$VALIDATION_DIR/InvalidPatient.validation.ndjson\"
229+
230+
if [ \"\$ERROR_COUNT\" -eq 0 ]; then
231+
echo 'FAIL: No error-severity issues found in InvalidPatient report'
232+
exit 1
233+
fi
234+
echo \"OK - Found \$ERROR_COUNT OperationOutcome(s) with error-severity issues\"
235+
"; then
236+
echo -e "${GREEN} PASS${NC}"
237+
else
238+
echo -e "${RED} FAIL${NC}"
239+
FAILED=1
240+
fi
241+
242+
# --- Summary ---
243+
244+
echo ""
245+
if [ $FAILED -eq 0 ]; then
246+
echo -e "${GREEN}=== All validation checks passed ===${NC}"
247+
exit 0
248+
else
249+
echo -e "${RED}=== Some validation checks failed ===${NC}"
250+
# Print report contents for debugging
251+
echo ""
252+
echo "--- Validation report contents ---"
253+
docker compose exec -T aether-runner sh -c "
254+
for f in $VALIDATION_DIR/*.ndjson; do
255+
echo \"File: \$(basename \$f)\"
256+
cat \"\$f\"
257+
echo ''
258+
done
259+
" || true
260+
exit 1
261+
fi

.github/test/Makefile

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Makefile for Aether Test Infrastructure
22
# Located in .github/test/
33

4-
.PHONY: help services start stop torch-up torch-down torch-logs torch-test dimp-up dimp-down dimp-test flattener-up flattener-down flattener-logs blaze-up blaze-down blaze-logs test-with-services download-testdata upload-testdata e2e-test e2e-test-flattening e2e-test-direct-load clean-test-data
4+
.PHONY: help services build start stop torch-up torch-down torch-logs torch-test dimp-up dimp-down dimp-test flattener-up flattener-down flattener-logs blaze-up blaze-down blaze-logs validator-up validator-down validator-logs test-with-services download-testdata upload-testdata e2e-test e2e-test-flattening e2e-test-validation e2e-test-direct-load clean-test-data
55

66
# Default target
77
.DEFAULT_GOAL := help
@@ -16,6 +16,7 @@ help:
1616
@echo " torch TORCH data extraction service"
1717
@echo " dimp DIMP pseudonymization service"
1818
@echo " flattener fhir-flattener CSV transformation service"
19+
@echo " validator mii-fhir-validator FHIR validation service"
1920
@echo ""
2021
@echo "Targets:"
2122
@echo " start Start all services (TORCH + DIMP + Flattener)"
@@ -35,14 +36,25 @@ help:
3536
@echo " blaze-up Start Blaze FHIR server (DSF transfer target)"
3637
@echo " blaze-down Stop Blaze FHIR server"
3738
@echo " blaze-logs Show Blaze FHIR server logs"
39+
@echo " validator-up Start mii-fhir-validator service"
40+
@echo " validator-down Stop mii-fhir-validator service"
41+
@echo " validator-logs Show mii-fhir-validator service logs"
3842
@echo " test-with-services Run all tests with required services"
3943
@echo " download-testdata Download test data (only if not present)"
4044
@echo " upload-testdata Upload test data to TORCH (requires services)"
4145
@echo " e2e-test Run end-to-end test (downloads, uploads, runs pipeline)"
4246
@echo " e2e-test-flattening Run flattening E2E test with pre-staged example data"
47+
@echo " e2e-test-validation Run validation E2E test with pre-staged example data"
4348
@echo " e2e-test-direct-load Run direct resource load E2E test (torch → send)"
49+
@echo " build Build aether binary"
4450
@echo " clean-test-data Clean test data and temporary files"
4551

52+
## build: Build aether binary
53+
build:
54+
@echo "Building aether binary..."
55+
@cd ../.. && make build
56+
@echo "✓ aether binary built"
57+
4658
## services: Alias for help
4759
services: help
4860

@@ -155,6 +167,23 @@ blaze-logs:
155167
@echo "Showing Blaze FHIR server logs..."
156168
cd blaze && docker compose logs -f
157169

170+
## validator-up: Start mii-fhir-validator service
171+
validator-up:
172+
@echo "Starting mii-fhir-validator service..."
173+
cd validator && docker compose up -d
174+
@echo "✓ mii-fhir-validator service starting (may take up to 60s)"
175+
176+
## validator-down: Stop mii-fhir-validator service
177+
validator-down:
178+
@echo "Stopping mii-fhir-validator service..."
179+
cd validator && docker compose down
180+
@echo "✓ mii-fhir-validator service stopped"
181+
182+
## validator-logs: Show mii-fhir-validator service logs
183+
validator-logs:
184+
@echo "Showing mii-fhir-validator service logs..."
185+
cd validator && docker compose logs -f
186+
158187
## dimp-test: Run integration tests with DIMP service
159188
dimp-test:
160189
@echo "Running integration tests with DIMP service..."
@@ -223,6 +252,15 @@ e2e-test-flattening:
223252
@../scripts/run-flattening-e2e-test.sh
224253
@echo "✓ Flattening E2E test complete"
225254

255+
## e2e-test-validation: Run validation E2E test with pre-staged example data
256+
e2e-test-validation:
257+
@echo "Running validation E2E test..."
258+
@echo "Starting fhir-validator and aether-runner services..."
259+
@docker compose up -d --wait aether-runner fhir-validator
260+
@echo "Running validation E2E test script..."
261+
@../scripts/run-validation-e2e-test.sh
262+
@echo "✓ Validation E2E test complete"
263+
226264
## e2e-test-direct-load: Run direct resource load E2E test (torch → send)
227265
e2e-test-direct-load: download-testdata
228266
@echo "Running direct resource load E2E test..."
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Aether Validation E2E Test Configuration
2+
# Uses internal Docker hostnames since aether runs inside the Docker network
3+
4+
services:
5+
# Validation Service (mii-fhir-validator)
6+
validation:
7+
url: "http://fhir-validator:8080"
8+
max_concurrent_requests: 2
9+
bundle_chunk_size_mb: 10
10+
fail_on_error: false
11+
12+
pipeline:
13+
# Pipeline starts from pre-staged data after local_import step
14+
enabled_steps:
15+
- local_import # Already completed (pre-staged)
16+
- validation # This is the step being tested
17+
18+
# Disable compression for simpler test verification
19+
compression:
20+
enabled: false
21+
22+
retry:
23+
max_attempts: 3
24+
initial_backoff_ms: 1000
25+
max_backoff_ms: 10000
26+
27+
# Directory to store job state and data
28+
jobs_dir: "./jobs"

.github/test/compose.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ include:
88
- dimp/compose.yaml
99
- flattener/compose.yaml
1010
- blaze/compose.yaml
11+
- validator/compose.yaml
1112

1213
services:
1314
# E2E test runner - runs aether inside the Docker network
@@ -19,14 +20,17 @@ services:
1920
volumes:
2021
- ./aether.yaml:/app/aether.yaml:ro
2122
- ./aether-flattening.yaml:/app/aether-flattening.yaml:ro
23+
- ./aether-validation.yaml:/app/aether-validation.yaml:ro
2224
- ./aether-direct-load.yaml:/app/aether-direct-load.yaml:ro
2325
- ./torch/queries:/app/torch/queries:ro
2426
- ./example-flattening:/app/example-flattening:ro
27+
- ./example-validation:/app/example-validation:ro
2528
- ./jobs:/app/jobs
2629
# Keep container running so we can exec into it
2730
command: ["sleep", "infinity"]
2831
depends_on:
2932
- torch-proxy
3033
- fhir-pseudonymizer
3134
- fhir-flattener
35+
- fhir-validator
3236
- blaze-fhir

0 commit comments

Comments
 (0)