Skip to content

Commit 7a1921b

Browse files
authored
Merge pull request #41 from jdkent/enh/only_post_process
Enh/only post process
2 parents 6a91a1f + 6628be4 commit 7a1921b

File tree

3 files changed

+69
-24
lines changed

3 files changed

+69
-24
lines changed

ns_extract/cli/run.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,7 @@ def run_pipelines(
129129

130130
# Run pipeline
131131
pipeline_output_dir = output_path / pipeline_name
132-
pipeline.transform_dataset(
133-
dataset,
134-
pipeline_output_dir,
135-
**transform_args
136-
)
132+
pipeline.transform_dataset(dataset, pipeline_output_dir, **transform_args)
137133

138134
print(f"Completed {pipeline_name} pipeline")
139135

ns_extract/pipelines/base.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -178,26 +178,39 @@ def transform_dataset(
178178
raw_file = study_dir / "raw_results.json"
179179
results_file = study_dir / "results.json"
180180

181-
# Try raw_results.json first
182-
if raw_file.exists():
183-
try:
184-
with raw_file.open() as f:
185-
raw_results[dbid] = json.load(f)
186-
except (IOError, json.JSONDecodeError) as e:
187-
raise ProcessingError(
188-
dbid, f"Failed to load raw results: {e}"
181+
try:
182+
# Try raw_results.json first
183+
if raw_file.exists():
184+
try:
185+
with raw_file.open() as f:
186+
raw_results[dbid] = json.load(f)
187+
except (IOError, json.JSONDecodeError) as e:
188+
logger.error(
189+
f"Failed to load raw results for study {dbid}: {e}"
190+
)
191+
continue
192+
# Fallback to results.json
193+
elif results_file.exists():
194+
try:
195+
with results_file.open() as f:
196+
raw_results[dbid] = json.load(f)
197+
except (IOError, json.JSONDecodeError) as e:
198+
logger.error(
199+
f"Failed to load results for study {dbid}: {e}"
200+
)
201+
continue
202+
elif post_process == "only":
203+
logger.warning(
204+
f"Skipping study {dbid}: no results found for post-processing"
189205
)
190-
# Fallback to results.json
191-
elif results_file.exists():
192-
try:
193-
with results_file.open() as f:
194-
raw_results[dbid] = json.load(f)
195-
except (IOError, json.JSONDecodeError) as e:
196-
raise ProcessingError(dbid, f"Failed to load results: {e}")
197-
elif post_process == "only":
198-
raise ProcessingError(
199-
dbid, "No results found for post-processing"
200-
)
206+
continue
207+
except Exception as e:
208+
logger.error(f"Error processing study {dbid}: {e}")
209+
continue
210+
211+
if not raw_results and post_process == "only":
212+
logger.warning("No results found for post-processing in any study")
213+
return hash_outdir
201214

202215
kwargs["raw_results"] = raw_results
203216

tests/test_example_extractor.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,42 @@ def test_text_and_demographics_update(sample_data, mock_demographics, tmp_path):
392392
assert "modified_text.txt" in str(second_run_info["inputs"])
393393

394394

395+
def test_post_process_only_missing_results(sample_data, mock_demographics, tmp_path):
396+
"""Test post_process='only' gracefully handles missing results."""
397+
demographics_dir = setup_demographics_dir(tmp_path, mock_demographics)
398+
399+
# Create test data with clear transformation differences
400+
test_study_id = list(mock_demographics.keys())[0]
401+
modified_dataset = sample_data.slice([test_study_id])
402+
403+
# Set up pipeline without creating any results
404+
extractor = ExampleExtractor()
405+
input_pipeline_info = {
406+
"participant_demographics": {
407+
"version": "1.0.0",
408+
"config_hash": "abc123",
409+
"pipeline_dir": Path(demographics_dir),
410+
}
411+
}
412+
413+
# Create output dir but don't run pipeline
414+
output_dir = tmp_path / "output"
415+
output_dir.mkdir(parents=True)
416+
417+
# Try post-process only - should log warning but not error
418+
hash_dir = extractor.transform_dataset(
419+
modified_dataset,
420+
output_dir,
421+
post_process="only",
422+
input_pipeline_info=input_pipeline_info
423+
)
424+
425+
# Check directory exists but no results were created
426+
study_dir = hash_dir / test_study_id
427+
assert not (study_dir / "results.json").exists(), "No results should be created"
428+
assert not (study_dir / "raw_results.json").exists(), "No raw results should be created"
429+
430+
395431
def test_post_process_and_file_handling(sample_data, mock_demographics, tmp_path):
396432
"""Test post-processing modes and file handling behavior."""
397433
demographics_dir = setup_demographics_dir(tmp_path, mock_demographics)

0 commit comments

Comments
 (0)