Skip to content

Commit 105489b

Browse files
authored
Merge pull request #37 from yibeichan/master
update conversion tools and their tests
2 parents 9d3e929 + ab7c051 commit 105489b

File tree

6 files changed

+126
-81
lines changed

6 files changed

+126
-81
lines changed

reproschema/redcap2reproschema.py

Lines changed: 74 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,43 @@
1010

1111

1212
def clean_header(header):
13-
return {k.lstrip("\ufeff"): v for k, v in header.items()}
13+
cleaned_header = {}
14+
for k, v in header.items():
15+
# Strip BOM, whitespace, and enclosing quotation marks if present
16+
cleaned_key = k.lstrip("\ufeff").strip().strip('"')
17+
cleaned_header[cleaned_key] = v
18+
return cleaned_header
1419

1520

1621
def normalize_condition(condition_str):
22+
# Regular expressions for various pattern replacements
1723
re_parentheses = re.compile(r"\(([0-9]*)\)")
1824
re_non_gt_lt_equal = re.compile(r"([^>|<])=")
1925
re_brackets = re.compile(r"\[([^\]]*)\]")
26+
re_extra_spaces = re.compile(r"\s+")
27+
re_double_quotes = re.compile(r'"')
28+
re_or = re.compile(r"\bor\b") # Match 'or' as whole word
2029

30+
# Apply regex replacements
2131
condition_str = re_parentheses.sub(r"___\1", condition_str)
2232
condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str)
23-
condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ")
2433
condition_str = re_brackets.sub(r" \1 ", condition_str)
25-
return condition_str
34+
35+
# Replace 'or' with '||', ensuring not to replace '||'
36+
condition_str = re_or.sub("||", condition_str)
37+
38+
# Replace 'and' with '&&'
39+
condition_str = condition_str.replace(" and ", " && ")
40+
41+
# Trim extra spaces and replace double quotes with single quotes
42+
condition_str = re_extra_spaces.sub(
43+
" ", condition_str
44+
).strip() # Reduce multiple spaces to a single space
45+
condition_str = re_double_quotes.sub(
46+
"'", condition_str
47+
) # Replace double quotes with single quotes
48+
49+
return condition_str.strip()
2650

2751

2852
def process_visibility(data):
@@ -42,7 +66,11 @@ def process_visibility(data):
4266

4367
def parse_field_type_and_value(field, input_type_map):
4468
field_type = field.get("Field Type", "")
45-
input_type = input_type_map.get(field_type, field_type)
69+
# Check if field_type is 'yesno' and directly assign 'radio' as the input type
70+
if field_type == "yesno":
71+
input_type = "radio" # Directly set to 'radio' for 'yesno' fields
72+
else:
73+
input_type = input_type_map.get(field_type, field_type) # Original logic
4674

4775
# Initialize the default value type as string
4876
value_type = "xsd:string"
@@ -55,7 +83,8 @@ def parse_field_type_and_value(field, input_type_map):
5583
"time_": "xsd:time",
5684
"email": "xsd:string",
5785
"phone": "xsd:string",
58-
} # todo: input_type="signature"
86+
# No change needed here for 'yesno', as it's handled above
87+
}
5988

6089
# Get the validation type from the field, if available
6190
validation_type = field.get(
@@ -91,10 +120,11 @@ def process_choices(field_type, choices_str):
91120
except ValueError:
92121
value = parts[0]
93122

94-
choice_obj = {"name": parts[1], "value": value}
95-
if len(parts) == 3:
96-
# Handle image url
97-
choice_obj["schema:image"] = f"{parts[2]}.png"
123+
choice_obj = {"name": " ".join(parts[1:]), "value": value}
124+
# remove image for now
125+
# if len(parts) == 3:
126+
# # Handle image url
127+
# choice_obj["image"] = f"{parts[2]}.png"
98128
choices.append(choice_obj)
99129
return choices
100130

@@ -156,7 +186,7 @@ def process_row(
156186

157187
rowData = {
158188
"@context": schema_context_url,
159-
"@type": "reproschema:Field",
189+
"@type": "reproschema:Item",
160190
"@id": item_id,
161191
"prefLabel": item_id,
162192
"description": f"{item_id} of {form_name}",
@@ -179,10 +209,7 @@ def process_row(
179209
}
180210

181211
for key, value in field.items():
182-
if (
183-
schema_map.get(key) in ["question", "schema:description", "preamble"]
184-
and value
185-
):
212+
if schema_map.get(key) in ["question", "description", "preamble"] and value:
186213
rowData.update({schema_map[key]: parse_html(value)})
187214

188215
elif schema_map.get(key) == "allow" and value:
@@ -214,21 +241,15 @@ def process_row(
214241
}
215242
)
216243

217-
elif schema_map.get(key) == "visibility" and value:
218-
condition = normalize_condition(value)
219-
rowData.setdefault("visibility", []).append(
220-
{"variableName": field["Variable / Field Name"], "isVis": condition}
221-
)
222-
223-
elif key == "Identifier?" and value:
224-
identifier_val = value.lower() == "y"
225-
rowData.update(
226-
{
227-
schema_map[key]: [
228-
{"legalStandard": "unknown", "isIdentifier": identifier_val}
229-
]
230-
}
231-
)
244+
# elif key == "Identifier?" and value:
245+
# identifier_val = value.lower() == "y"
246+
# rowData.update(
247+
# {
248+
# schema_map[key]: [
249+
# {"legalStandard": "unknown", "isIdentifier": identifier_val}
250+
# ]
251+
# }
252+
# )
232253

233254
elif key in additional_notes_list and value:
234255
notes_obj = {"source": "redcap", "column": key, "value": value}
@@ -240,6 +261,7 @@ def process_row(
240261
def create_form_schema(
241262
abs_folder_path,
242263
schema_context_url,
264+
redcap_version,
243265
form_name,
244266
activity_display_name,
245267
activity_description,
@@ -259,16 +281,17 @@ def create_form_schema(
259281
"prefLabel": activity_display_name,
260282
"description": activity_description,
261283
"schemaVersion": "1.0.0-rc4",
262-
"version": "0.0.1",
284+
"version": redcap_version,
263285
"ui": {
264286
"order": unique_order,
265287
"addProperties": bl_list,
266288
"shuffle": False,
267289
},
268290
}
269291

270-
if matrix_list:
271-
json_ld["matrixInfo"] = matrix_list
292+
# remove matrixInfo to pass validataion
293+
# if matrix_list:
294+
# json_ld["matrixInfo"] = matrix_list
272295
if scores_list:
273296
json_ld["scoringLogic"] = scores_list
274297

@@ -296,6 +319,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order):
296319
def create_protocol_schema(
297320
abs_folder_path,
298321
schema_context_url,
322+
redcap_version,
299323
protocol_name,
300324
protocol_display_name,
301325
protocol_description,
@@ -307,31 +331,33 @@ def create_protocol_schema(
307331
"@context": schema_context_url,
308332
"@type": "reproschema:Protocol",
309333
"@id": f"{protocol_name}_schema",
310-
"skos:prefLabel": protocol_display_name,
311-
"skos:altLabel": f"{protocol_name}_schema",
312-
"schema:description": protocol_description,
313-
"schema:schemaVersion": "1.0.0-rc4",
314-
"schema:version": "0.0.1",
334+
"prefLabel": protocol_display_name,
335+
"altLabel": f"{protocol_name}_schema",
336+
"description": protocol_description,
337+
"schemaVersion": "1.0.0-rc4",
338+
"version": redcap_version,
315339
"ui": {
316340
"addProperties": [],
317-
"order": protocol_order,
341+
"order": [],
318342
"shuffle": False,
319343
},
320344
}
321345

322346
# Populate addProperties list
323347
for activity in protocol_order:
348+
full_path = f"../activities/{activity}/{activity}_schema"
324349
add_property = {
325-
"isAbout": f"../activities/{activity}/{activity}_schema",
350+
"isAbout": full_path,
326351
"variableName": f"{activity}_schema",
327352
# Assuming activity name as prefLabel, update as needed
328353
"prefLabel": activity.replace("_", " ").title(),
354+
"isVis": protocol_visibility_obj.get(
355+
activity, True
356+
), # Default to True if not specified
329357
}
330358
protocol_schema["ui"]["addProperties"].append(add_property)
331-
332-
# Add visibility if needed
333-
if protocol_visibility_obj:
334-
protocol_schema["ui"]["visibility"] = protocol_visibility_obj
359+
# Add the full path to the order list
360+
protocol_schema["ui"]["order"].append(full_path)
335361

336362
protocol_dir = f"{abs_folder_path}/{protocol_name}"
337363
schema_file = f"{protocol_name}_schema"
@@ -420,6 +446,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
420446
protocol_name = protocol.get("protocol_name")
421447
protocol_display_name = protocol.get("protocol_display_name")
422448
protocol_description = protocol.get("protocol_description")
449+
redcap_version = protocol.get("redcap_version")
423450

424451
if not protocol_name:
425452
raise ValueError("Protocol name not specified in the YAML file.")
@@ -434,7 +461,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
434461
abs_folder_path = os.path.abspath(protocol_name)
435462

436463
if schema_context_url is None:
437-
schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic"
464+
schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new"
438465

439466
# Initialize variables
440467
schema_map = {
@@ -451,7 +478,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
451478
"Choices, Calculations, OR Slider Labels": "choices", # column F
452479
"Branching Logic (Show field only if...)": "visibility", # column L
453480
"Custom Alignment": "customAlignment", # column N
454-
"Identifier?": "identifiable", # column K
481+
# "Identifier?": "identifiable", # column K
455482
"multipleChoice": "multipleChoice",
456483
"responseType": "@type",
457484
}
@@ -515,6 +542,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
515542
create_form_schema(
516543
abs_folder_path,
517544
schema_context_url,
545+
redcap_version,
518546
form_name,
519547
activity_display_name,
520548
activity_description,
@@ -530,6 +558,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
530558
create_protocol_schema(
531559
abs_folder_path,
532560
schema_context_url,
561+
redcap_version,
533562
protocol_name,
534563
protocol_display_name,
535564
protocol_description,

reproschema/reproschema2redcap.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def get_csv_data(dir_path):
142142
if protocol_dir.is_dir():
143143
# Check for a _schema file in each directory
144144
schema_file = next(protocol_dir.glob("*_schema"), None)
145+
print(f"Found schema file: {schema_file}")
145146
if schema_file:
146147
# Process the found _schema file
147148
parsed_protocol_json = read_json_file(schema_file)
@@ -152,8 +153,14 @@ def get_csv_data(dir_path):
152153
normalized_relative_path = Path(
153154
relative_activity_path.lstrip("../")
154155
)
155-
activity_path = dir_path / normalized_relative_path
156-
print(f"Processing activity {activity_path}")
156+
157+
activity_path = (
158+
dir_path
159+
/ "activities"
160+
/ normalized_relative_path
161+
/ (normalized_relative_path.name + "_schema")
162+
)
163+
157164
parsed_activity_json = read_json_file(activity_path)
158165

159166
if parsed_activity_json:
Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
22
import shutil
33
import pytest
4+
import yaml
45
from click.testing import CliRunner
5-
from ..cli import main # Import the Click group
6+
from ..cli import main
67

7-
# Assuming your test files are located in a 'tests' directory
88
CSV_FILE_NAME = "redcap_dict.csv"
99
YAML_FILE_NAME = "redcap2rs.yaml"
1010
CSV_TEST_FILE = os.path.join(
@@ -15,17 +15,34 @@
1515
)
1616

1717

18-
def test_redcap2reproschema_success():
18+
def test_redcap2reproschema(tmpdir):
1919
runner = CliRunner()
2020

21-
with runner.isolated_filesystem():
22-
# Copy the test files to the isolated filesystem
23-
shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME)
24-
shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME)
21+
temp_csv_file = tmpdir.join(CSV_FILE_NAME)
22+
temp_yaml_file = tmpdir.join(YAML_FILE_NAME)
23+
24+
shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string
25+
shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string
26+
27+
# Change the current working directory to tmpdir
28+
with tmpdir.as_cwd():
29+
# Read YAML to find the expected output directory name
30+
with open(str(temp_yaml_file), "r") as file: # Convert to string
31+
protocol = yaml.safe_load(file)
32+
protocol_name = protocol.get("protocol_name", "").replace(" ", "_")
2533

26-
# Run the command within the isolated filesystem
2734
result = runner.invoke(
28-
main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME]
35+
main,
36+
[
37+
"redcap2reproschema",
38+
str(temp_csv_file),
39+
str(temp_yaml_file),
40+
], # Convert to string
2941
)
30-
print(result.output)
31-
assert result.exit_code == 0
42+
43+
assert (
44+
result.exit_code == 0
45+
), f"The command failed to execute successfully: {result.output}"
46+
assert os.path.isdir(
47+
protocol_name
48+
), f"Expected output directory '{protocol_name}' does not exist"

reproschema/tests/test_redcap2rs_data/redcap2rs.yaml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol"
99
# This name will be displayed in the application.
1010
protocol_display_name: "redcap protocols"
1111

12-
# GitHub Repository Information:
13-
# Create a GitHub repository named 'reproschema' to store your reproschema protocols.
14-
# Replace 'your_github_username' with your actual GitHub username.
15-
user_name: "yibeichan"
16-
repo_name: "redcap2reproschema" # Recommended name; can be different if preferred.
17-
repo_url: "https://github.com/{{user_name}}/{{repo_name}}"
18-
1912
# Protocol Description:
2013
# Provide a brief description of your protocol.
2114
protocol_description: "testing" # Example: "This protocol is for ..."
15+
16+
redcap_version: "3.0.0"

0 commit comments

Comments
 (0)