Skip to content

Commit 68d79af

Browse files
committed
Added new commands to reproschema
1 parent e161ae6 commit 68d79af

20 files changed

+1280
-2
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ dependencies = [
1515
"beautifulsoup4",
1616
"lxml",
1717
"pydantic >= 2.0",
18-
"pandas"
18+
"pandas",
19+
"fhir.resources>=v8.0.0",
1920
]
2021
description = "Reproschema Python library"
2122
# Version from setuptools_scm

reproschema/cli.py

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
11
import os
22
from pathlib import Path
3-
3+
from collections import OrderedDict
4+
import json
45
import click
56

7+
from collections import OrderedDict
8+
import json
9+
import os
10+
import shutil
11+
from pathlib import Path
612
from . import __version__, get_logger, set_logger_level
713
from .migrate import migrate2newschema
814
from .redcap2reproschema import redcap2reproschema as redcap2rs
915
from .reproschema2redcap import reproschema2redcap as rs2redcap
16+
from .reproschemaui2redcap import parse_survey
17+
from .reproschema2fhir import QuestionnaireGenerator
18+
from fhir.resources.questionnaire import Questionnaire
19+
import pandas as pd
20+
1021

1122
lgr = get_logger()
1223

@@ -179,3 +190,114 @@ def reproschema2redcap(input_path, output_csv_path):
179190
click.echo(
180191
f"Converted reproschema protocol from {input_path} to Redcap CSV at {output_csv_path}"
181192
)
193+
194+
195+
@main.command()
196+
@click.argument("survey_file", type=str)
197+
@click.argument("redcap_csv", type=str)
198+
def reproschema_ui_to_redcap(survey_file, redcap_csv):
199+
"""
200+
Generates redcap csv given the audio and survey data from reproschema ui
201+
202+
survey_file is the location of the surveys generated from reproschem ui
203+
redcap_csv is the path to store the newly generated redcap csv
204+
205+
"""
206+
merged_questionnaire_data = []
207+
# load each file recursively within the folder into its own key
208+
content = OrderedDict()
209+
for file in Path(survey_file).rglob("*"):
210+
if file.is_file():
211+
filename = str(file.relative_to(survey_file))
212+
with open(f"{survey_file}/{filename}", 'r') as f:
213+
content[filename] = json.load(f)
214+
215+
for questionnaire in content.keys(): # activity files
216+
try:
217+
record_id = (survey_file.split("/")[-1]).split()[0]
218+
survey_data = content[questionnaire]
219+
merged_questionnaire_data += parse_survey(
220+
survey_data, record_id, questionnaire)
221+
except Exception:
222+
continue
223+
224+
survey_df = pd.concat(merged_questionnaire_data, ignore_index=True)
225+
Path(redcap_csv).mkdir(parents=True, exist_ok=True)
226+
227+
merged_csv_path = os.path.join(redcap_csv, "redcap.csv")
228+
survey_df.to_csv(merged_csv_path, index=False)
229+
click.echo(
230+
f"Converted reproschema-ui output from {survey_file} to Redcap CSV at {redcap_csv}"
231+
)
232+
233+
@main.command()
234+
@click.argument("reproschema_questionnaire", type=str)
235+
@click.argument("output", type=str)
236+
def reproschema_to_fhir(reproschema_questionnaire, output):
237+
"""
238+
Generates redcap csv given the audio and survey data from reproschema ui
239+
240+
reproschema_questionnaire is the location of all reproschema activities
241+
output is the path to store the newly generated fhir json
242+
"""
243+
output_path = Path(output)
244+
reproschema_folders = Path(reproschema_questionnaire)
245+
if not os.path.isdir(reproschema_folders):
246+
raise FileNotFoundError(
247+
f"{reproschema_folders} does not exist. Please check if folder exists and is located at the correct directory"
248+
)
249+
reproschema_folders = [Path(f) for f in reproschema_folders.iterdir() if f.is_dir()]
250+
for reproschema_folder in reproschema_folders:
251+
# load each file recursively within the folder into its own key in the reproschema_content dict
252+
reproschema_content = OrderedDict()
253+
for file in reproschema_folder.glob("**/*"):
254+
if file.is_file():
255+
# get the full path to the file *after* the base reproschema_folder path
256+
# since files can be referenced by relative paths, we need to keep track of relative location
257+
filename = str(file.relative_to(reproschema_folder))
258+
with open(f"{reproschema_folder}/{filename}") as f:
259+
reproschema_content[filename] = json.loads(f.read())
260+
261+
schema_name = [
262+
name for name in (reproschema_content.keys())
263+
if name.endswith("_schema")
264+
][0]
265+
reproschema_schema = reproschema_content[schema_name]
266+
267+
if (f"schema:version" in reproschema_schema and
268+
reproschema_schema["schema:version"] not in ("0.0.1", "1.0.0-rc1", "1.0.0")
269+
) or f"schemaVersion" in reproschema_schema and reproschema_schema[
270+
"schemaVersion"] not in ("0.0.1", "1.0.0-rc1", "1.0.0-rc4", "1.0.0"):
271+
raise ValueError(
272+
'Unable to work with reproschema versions other than 0.0.1, 1.0.0-rc1, and 1.0.0-rc4'
273+
)
274+
275+
questionnaire_generator = QuestionnaireGenerator()
276+
fhir_questionnaire = questionnaire_generator.convert_to_fhir(
277+
reproschema_content)
278+
279+
# validate the json using fhir resources
280+
try:
281+
questionnaire_json = Questionnaire.model_validate(fhir_questionnaire)
282+
except Exception:
283+
raise Exception("Fhir Questionnaire is not valid")
284+
285+
# get filename from the reproschema_folder name provided
286+
287+
file_name = reproschema_folder.parts[-1]
288+
289+
dirpath = Path(output_path / f"{file_name}")
290+
if dirpath.exists() and dirpath.is_dir():
291+
shutil.rmtree(dirpath)
292+
293+
paths = [
294+
output_path / file_name
295+
]
296+
297+
for folder in paths:
298+
folder.mkdir(parents=True, exist_ok=True)
299+
300+
with open(output_path / f"{file_name}/{file_name}.json", "w+") as f:
301+
f.write(json.dumps(fhir_questionnaire))
302+
303+

0 commit comments

Comments
 (0)