Skip to content

Commit 771aff7

Browse files
authored
Merge pull request #58 from jdkent/enh/add_processed_db
[FIX] re-run errors
2 parents 39ba02a + 98790ff commit 771aff7

File tree

14 files changed

+5571
-8
lines changed

14 files changed

+5571
-8
lines changed

ns_extract/pipelines/base.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -390,18 +390,20 @@ def __identify_matching_results(
390390

391391
for dbid, study in dataset.data.items():
392392
# Get existing input file hashes for this study
393-
existing = existing_results.get(dbid, {}).get("inputs", {})
393+
existing_output = existing_results.get(dbid, {})
394+
existing = existing_output.get("inputs", {})
394395

395-
# Skip if no existing results and there are current inputs
396-
if not existing and dbid in study_inputs and study_inputs[dbid]:
396+
# There is no match if there are no outputs
397+
if not existing_output:
397398
result_matches[dbid] = False
398399
continue
399-
elif not study_inputs[dbid]:
400-
# no current inputs, so results are matching
400+
elif not study_inputs[dbid] and existing_output:
401+
# no current inputs and output file already exists
402+
# so results are matching
401403
result_matches[dbid] = True
402404
continue
403405

404-
# Use __are_file_hashes_identical to compare hashes
406+
# Compare file hashes to determine if the inputs have changed
405407
result_matches[dbid] = self.__do_file_hashes_match(
406408
study_inputs[dbid], existing
407409
)

ns_extract/pipelines/participant_demographics/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ def post_process(self, results, study_inputs, **kwargs):
2222
# Clean known issues with GPT demographics result
2323
cleaned_results = super().post_process(results, study_inputs, **kwargs)
2424
for study_id, study_result in cleaned_results.items():
25+
if study_result == dict():
26+
study_result = {"groups": []}
27+
continue
2528
meta_keys = ["pmid", "rank", "start_char", "end_char", "id"]
2629
meta_keys = [k for k in meta_keys if k in study_result]
2730

ns_extract/pipelines/utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
pipeline implementations.
1010
"""
1111

12+
import math
1213
import hashlib
1314
import inspect
1415
from pathlib import Path
@@ -66,7 +67,12 @@ def _load_json(self, file_path: Path) -> Dict[str, Any]:
6667
json.JSONDecodeError: If file contains invalid JSON
6768
"""
6869
with file_path.open("r") as f:
69-
return json.load(f)
70+
data = json.load(f)
71+
if isinstance(data.get("title", ""), float) and math.isnan(data["title"]):
72+
data["title"] = ""
73+
if isinstance(data.get("abstract", ""), float) and math.isnan(data["abstract"]):
74+
data["abstract"] = ""
75+
return data
7076

7177
def _write_json(self, file_path: Path, data: Dict[str, Any]) -> None:
7278
"""Write data to a JSON file.

tests/cassettes/test_nv_task/test_TaskExtractor.yaml

Lines changed: 749 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"pmid": "22410476", "doi": "10.1016/j.brs.2012.01.006", "pmcid": null}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"title": "Modulation of verbal fluency networks by transcranial direct current stimulation (tDCS) in Parkinson\u2019s disease",
3+
"authors": "Pereira, Joana B;Junque, Carme;Bartres-Faz, David;Marti, Maria J;Sala-Llonch, Roser;Compta, Yarko;Falcon, Carles;Vendrell, Pere;Pascual-Leone, Alvaro;Valls-Sole, Josep;Tolosa, Eduardo",
4+
"journal": "Brain stimulation",
5+
"keywords": null,
6+
"abstract": null,
7+
"publication_year": 2013,
8+
"coordinate_space": "UNKNOWN",
9+
"license": null,
10+
"text": false
11+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"pmid": "28301868", "doi": "10.1097/PHM.0000000000000582", "pmcid": null}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"title": "Quadriplegia Due to Injury of Corticofugal Tracts from Secondary Motor Area in a Patient With Severe Traumatic Brain Injury.",
3+
"authors": "Jang, Sung Ho;Kim, Seong Ho;Lee, Han Do",
4+
"journal": "American journal of physical medicine & rehabilitation",
5+
"keywords": null,
6+
"abstract": NaN,
7+
"publication_year": 2017,
8+
"coordinate_space": "UNKNOWN",
9+
"license": null,
10+
"text": true
11+
}

0 commit comments

Comments
 (0)