Skip to content

Commit 813005f

Browse files
authored
Merge pull request #80 from yibeichan/fix-readonly
fix isVis based on Field annotation
2 parents e8aeb0d + b84b05b commit 813005f

File tree

4 files changed

+257
-55
lines changed

4 files changed

+257
-55
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ dependencies = [
1414
"pyyaml",
1515
"beautifulsoup4",
1616
"lxml",
17-
"pydantic >= 2.0"
17+
"pydantic >= 2.0",
18+
"pandas"
1819
]
1920
description = "Reproschema Python library"
2021
# Version from setuptools_scm

reproschema/redcap2reproschema.py

Lines changed: 126 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import csv
21
import os
32
import re
43
from pathlib import Path
54

5+
import pandas as pd
66
import yaml
77
from bs4 import BeautifulSoup
88

@@ -17,7 +17,7 @@
1717
"Field Note": "description",
1818
# TODO: often "Field Annotation" has "@HIDDEN" and other markers
1919
# TODO: not sure if this can be every treated as description
20-
# "Field Annotation": "description", # column R
20+
# "Field Annotation": "isVis", # column R
2121
"Section Header": "preamble", # column C (need double-check)
2222
"Field Label": "question", # column E
2323
"Field Type": "inputType", # column D
@@ -82,7 +82,9 @@ def clean_header(header):
8282
cleaned_header = {}
8383
for k, v in header.items():
8484
# Strip BOM, whitespace, and enclosing quotation marks if present
85-
cleaned_key = k.lstrip("\ufeff").strip().strip('"')
85+
cleaned_key = (
86+
k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
87+
)
8688
cleaned_header[cleaned_key] = v
8789
return cleaned_header
8890

@@ -99,6 +101,12 @@ def normalize_condition(condition_str, field_type=None):
99101
return False
100102
elif condition_str is None:
101103
return None
104+
elif not isinstance(condition_str, str):
105+
# Convert non-string types to string, or return as is if conversion doesn't make sense
106+
try:
107+
condition_str = str(condition_str)
108+
except:
109+
return condition_str
102110

103111
re_parentheses = re.compile(r"\(([0-9]*)\)")
104112
re_non_gt_lt_equal = re.compile(r"([^>|<])=")
@@ -137,17 +145,42 @@ def process_field_properties(data):
137145
else:
138146
condition = True
139147

148+
# Check Field Annotation for special flags - safely handle non-string values
149+
annotation = (
150+
str(data.get("Field Annotation", "")).upper()
151+
if data.get("Field Annotation") is not None
152+
else ""
153+
)
154+
if (
155+
condition
156+
and isinstance(annotation, str)
157+
and (
158+
"@READONLY" in annotation
159+
or "@HIDDEN" in annotation
160+
or "@CALCTEXT" in annotation
161+
)
162+
):
163+
condition = False
164+
140165
prop_obj = {
141166
"variableName": data["Variable / Field Name"],
142167
"isAbout": f"items/{data['Variable / Field Name']}",
143168
"isVis": condition,
144169
}
145-
if data["Required Field?"]:
146-
if data["Required Field?"] in "y":
170+
171+
# Handle Required Field check, accounting for NaN values and empty strings
172+
required_field = data.get("Required Field?")
173+
if (
174+
pd.notna(required_field) and str(required_field).strip()
175+
): # Check if value is not NaN and not empty
176+
if str(required_field).lower() == "y":
147177
prop_obj["valueRequired"] = True
148-
else:
149-
raise (
150-
f"value {data['Required Field?']} not supported yet for redcap:Required Field?"
178+
elif str(required_field).lower() not in [
179+
"",
180+
"n",
181+
]: # Only raise error for unexpected values
182+
raise ValueError(
183+
f"value {required_field} not supported yet for redcap:Required Field?"
151184
)
152185
return prop_obj
153186

@@ -246,6 +279,16 @@ def process_choices(choices_str, field_name):
246279

247280
def parse_html(input_string, default_language="en"):
248281
result = {}
282+
283+
# Handle non-string input
284+
if not isinstance(input_string, str):
285+
if pd.isna(input_string): # Handle NaN values
286+
return {default_language: ""}
287+
try:
288+
input_string = str(input_string)
289+
except:
290+
return {default_language: str(input_string)}
291+
249292
soup = BeautifulSoup(input_string, "html.parser")
250293

251294
lang_elements = soup.find_all(True, {"lang": True})
@@ -284,19 +327,30 @@ def process_row(
284327

285328
field_type = field.get("Field Type", "")
286329
input_type, value_type = parse_field_type_and_value(field)
287-
rowData["ui"] = {"inputType": input_type}
330+
331+
# Initialize ui object with common properties
332+
ui_obj = {"inputType": input_type}
333+
334+
# Handle readonly status first - this affects UI behavior
335+
annotation = str(field.get("Field Annotation", "")).upper()
336+
if (
337+
field_type in COMPUTE_LIST
338+
or "@READONLY" in annotation
339+
or "@CALCTEXT" in annotation
340+
):
341+
ui_obj["readonlyValue"] = True
342+
343+
rowData["ui"] = ui_obj
288344
rowData["responseOptions"] = {"valueType": [value_type]}
289345

290-
# setting additional fields for some field types
346+
# Handle specific field type configurations
291347
if field_type == "yesno":
292348
rowData["responseOptions"]["choices"] = [
293349
{"name": {"en": "Yes"}, "value": 1},
294350
{"name": {"en": "No"}, "value": 0},
295351
]
296352
elif field_type == "checkbox":
297353
rowData["responseOptions"]["multipleChoice"] = True
298-
elif field_type in COMPUTE_LIST:
299-
rowData["ui"]["readonlyValue"] = True
300354

301355
for key, value in field.items():
302356
if SCHEMA_MAP.get(key) in ["question", "description"] and value:
@@ -498,52 +552,79 @@ def parse_language_iso_codes(input_string):
498552
]
499553

500554

501-
def process_csv(
502-
csv_file,
503-
abs_folder_path,
504-
schema_context_url,
505-
protocol_name,
506-
):
555+
def process_csv(csv_file, abs_folder_path, schema_context_url, protocol_name):
507556
datas = {}
508557
order = {}
509558
compute = {}
510559
languages = []
511560

512-
with open(csv_file, mode="r", encoding="utf-8") as csvfile:
513-
reader = csv.DictReader(csvfile)
514-
for row in reader:
515-
row = clean_header(row)
516-
form_name = row["Form Name"]
517-
if form_name not in datas:
518-
datas[form_name] = []
519-
order[form_name] = []
520-
compute[form_name] = []
521-
os.makedirs(
522-
f"{abs_folder_path}/activities/{form_name}/items",
523-
exist_ok=True,
524-
)
561+
# Read CSV with explicit BOM handling, and maintain original order
562+
df = pd.read_csv(
563+
csv_file, encoding="utf-8-sig"
564+
) # utf-8-sig handles BOM automatically
525565

526-
datas[form_name].append(row)
566+
# Clean column names (headers)
567+
df.columns = df.columns.map(
568+
lambda x: x.strip().strip('"').lstrip("\ufeff")
569+
)
527570

528-
# TODO: should we bring back the language
529-
# if not languages:
530-
# languages = parse_language_iso_codes(row["Field Label"])
571+
# Clean string values in the dataframe
572+
object_columns = df.select_dtypes(include=["object"]).columns
573+
for col in object_columns:
574+
df[col] = df[col].astype(str).replace("nan", "")
575+
576+
# Initialize structures for each unique form
577+
unique_forms = df["Form Name"].unique()
578+
for form_name in unique_forms:
579+
datas[form_name] = []
580+
order[form_name] = []
581+
compute[form_name] = []
582+
os.makedirs(
583+
f"{abs_folder_path}/activities/{form_name}/items", exist_ok=True
584+
)
531585

532-
field_name = row["Variable / Field Name"]
533-
if row.get("Field Type", "") in COMPUTE_LIST:
534-
# TODO: this right now doesn't give jsExpression
535-
condition = normalize_condition(
536-
row["Choices, Calculations, OR Slider Labels"],
537-
field_type=row["Field Type"],
538-
)
586+
# TODO: should we bring back the language
587+
# if not languages:
588+
# languages = parse_language_iso_codes(row["Field Label"])
589+
590+
# Process rows in original order
591+
for _, row in df.iterrows():
592+
form_name = row["Form Name"]
593+
field_name = row["Variable / Field Name"]
594+
field_type = row.get("Field Type", "")
595+
field_annotation = row.get("Field Annotation")
596+
597+
# Add row data to datas dictionary
598+
datas[form_name].append(row.to_dict())
599+
600+
if field_type in COMPUTE_LIST:
601+
condition = normalize_condition(
602+
row["Choices, Calculations, OR Slider Labels"],
603+
field_type=field_type,
604+
)
605+
compute[form_name].append(
606+
{
607+
"variableName": field_name,
608+
"jsExpression": condition,
609+
}
610+
)
611+
elif (
612+
isinstance(field_annotation, str)
613+
and "@CALCTEXT" in field_annotation.upper()
614+
):
615+
calc_text = field_annotation
616+
match = re.search(r"@CALCTEXT\((.*)\)", calc_text)
617+
if match:
618+
js_expression = match.group(1)
619+
js_expression = normalize_condition(js_expression)
539620
compute[form_name].append(
540621
{
541622
"variableName": field_name,
542-
"jsExpression": condition,
623+
"jsExpression": js_expression,
543624
}
544625
)
545-
else:
546-
order[form_name].append(f"items/{field_name}")
626+
else:
627+
order[form_name].append(f"items/{field_name}")
547628

548629
os.makedirs(f"{abs_folder_path}/{protocol_name}", exist_ok=True)
549630
return datas, order, compute, languages
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import csv
2+
3+
import pytest
4+
5+
from ..redcap2reproschema import process_field_properties
6+
7+
8+
def test_process_field_properties_calctext():
9+
"""Test different CALCTEXT annotations with realistic examples"""
10+
test_cases = [
11+
# Simple CALCTEXT
12+
{
13+
"input": {
14+
"Variable / Field Name": "test_var",
15+
"Required Field?": "",
16+
"Field Annotation": "@CALCTEXT",
17+
"Branching Logic (Show field only if...)": "",
18+
},
19+
"expected": {
20+
"variableName": "test_var",
21+
"isAbout": "items/test_var",
22+
"isVis": False,
23+
},
24+
},
25+
# Complex CALCTEXT with conditional logic
26+
{
27+
"input": {
28+
"Variable / Field Name": "parkinsons_diagnosis",
29+
"Required Field?": "",
30+
"Field Annotation": "@CALCTEXT(if(([diagnosis_parkinsons_gsd_category_1(bradykinesia)] && ([diagnosis_parkinsons_gsd_category_1(tremor)] || [diagnosis_parkinsons_gsd_category_1(rigidity)])), 'Yes', 'No'))",
31+
"Branching Logic (Show field only if...)": "[some_other_condition] = 1",
32+
},
33+
"expected": {
34+
"variableName": "parkinsons_diagnosis",
35+
"isAbout": "items/parkinsons_diagnosis",
36+
"isVis": False,
37+
},
38+
},
39+
# CALCTEXT with numerical operations
40+
{
41+
"input": {
42+
"Variable / Field Name": "bmi",
43+
"Required Field?": "",
44+
"Field Annotation": "@CALCTEXT([weight]/([height]*[height]))",
45+
"Branching Logic (Show field only if...)": "[weight] > 0 and [height] > 0",
46+
},
47+
"expected": {
48+
"variableName": "bmi",
49+
"isAbout": "items/bmi",
50+
"isVis": False,
51+
},
52+
},
53+
# CALCTEXT with multiple nested conditions
54+
{
55+
"input": {
56+
"Variable / Field Name": "complex_score",
57+
"Required Field?": "",
58+
"Field Annotation": "@CALCTEXT(if([score1] > 10 && [score2] < 5, 'High', if([score1] > 5, 'Medium', 'Low')))",
59+
"Branching Logic (Show field only if...)": "",
60+
},
61+
"expected": {
62+
"variableName": "complex_score",
63+
"isAbout": "items/complex_score",
64+
"isVis": False,
65+
},
66+
},
67+
]
68+
69+
for test_case in test_cases:
70+
result = process_field_properties(test_case["input"])
71+
for key, expected_value in test_case["expected"].items():
72+
assert (
73+
result[key] == expected_value
74+
), f"Failed for {key} in test case with annotation: {test_case['input']['Field Annotation']}"
75+
76+
77+
def test_process_field_properties_mixed_annotations():
78+
"""Test fields with multiple annotations"""
79+
test_cases = [
80+
# CALCTEXT with READONLY
81+
{
82+
"input": {
83+
"Variable / Field Name": "test_var",
84+
"Required Field?": "",
85+
"Field Annotation": "@CALCTEXT @READONLY",
86+
"Branching Logic (Show field only if...)": "",
87+
},
88+
"expected": {"isVis": False},
89+
},
90+
# CALCTEXT with HIDDEN
91+
{
92+
"input": {
93+
"Variable / Field Name": "test_var",
94+
"Required Field?": "",
95+
"Field Annotation": "@HIDDEN @CALCTEXT(if([var1] > 0, 1, 0))",
96+
"Branching Logic (Show field only if...)": "",
97+
},
98+
"expected": {"isVis": False},
99+
},
100+
# Complex CALCTEXT with other annotations
101+
{
102+
"input": {
103+
"Variable / Field Name": "test_var",
104+
"Required Field?": "",
105+
"Field Annotation": "@CALCTEXT(if(([var1] && [var2]), 'Yes', 'No')) @READONLY @HIDDEN-SURVEY",
106+
"Branching Logic (Show field only if...)": "[condition] = 1",
107+
},
108+
"expected": {"isVis": False},
109+
},
110+
]
111+
112+
for test_case in test_cases:
113+
result = process_field_properties(test_case["input"])
114+
for key, expected_value in test_case["expected"].items():
115+
assert (
116+
result[key] == expected_value
117+
), f"Failed for {key} in test case with annotation: {test_case['input']['Field Annotation']}"

0 commit comments

Comments
 (0)