Skip to content

Commit 145e06c

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 7a6784b commit 145e06c

File tree

2 files changed

+63
-33
lines changed

2 files changed

+63
-33
lines changed

reproschema/redcap2reproschema.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -109,24 +109,24 @@ def normalize_condition(condition_str, field_type=None):
109109
return None
110110

111111
try:
112-
112+
113113
# Clean HTML
114114
condition_str = BeautifulSoup(condition_str, "html.parser").get_text()
115115
condition_str = condition_str.strip()
116-
116+
117117
if not condition_str:
118118
return None
119119

120120
# Common operator normalizations for all types
121121
operator_replacements = [
122-
(r"\s*\+\s*", " + "), # Normalize spacing around +
123-
(r"\s*-\s*", " - "), # Normalize spacing around -
124-
(r"\s*\*\s*", " * "), # Normalize spacing around *
125-
(r"\s*\/\s*", " / "), # Normalize spacing around /
126-
(r"\s*\(\s*", "("), # Remove spaces after opening parenthesis
127-
(r"\s*\)\s*", ")"), # Remove spaces before closing parenthesis
128-
(r"\s*,\s*", ","), # Normalize spaces around commas
129-
(r"\s+", " "), # Normalize multiple spaces
122+
(r"\s*\+\s*", " + "), # Normalize spacing around +
123+
(r"\s*-\s*", " - "), # Normalize spacing around -
124+
(r"\s*\*\s*", " * "), # Normalize spacing around *
125+
(r"\s*\/\s*", " / "), # Normalize spacing around /
126+
(r"\s*\(\s*", "("), # Remove spaces after opening parenthesis
127+
(r"\s*\)\s*", ")"), # Remove spaces before closing parenthesis
128+
(r"\s*,\s*", ","), # Normalize spaces around commas
129+
(r"\s+", " "), # Normalize multiple spaces
130130
]
131131

132132
# Apply operator normalizations first
@@ -145,7 +145,7 @@ def normalize_condition(condition_str, field_type=None):
145145
(r"\[([^\]]*)\]", r"\1"), # Remove brackets and extra spaces
146146
(r"\bor\b", "||"),
147147
(r"\band\b", "&&"),
148-
(r'"', "'")
148+
(r'"', "'"),
149149
]
150150
for pattern, repl in replacements:
151151
condition_str = re.sub(pattern, repl, condition_str)
@@ -816,7 +816,9 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
816816

817817
try:
818818
df = pd.read_csv(csv_file, encoding="utf-8-sig")
819-
df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff"))
819+
df.columns = df.columns.map(
820+
lambda x: x.strip().strip('"').lstrip("\ufeff")
821+
)
820822

821823
required_columns = ["Form Name", "Variable / Field Name", "Field Type"]
822824
missing_columns = [
@@ -881,29 +883,42 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
881883

882884
# Case 1: Field is calc type
883885
if field_type in COMPUTE_LIST:
884-
calc_value = row_dict.get("Choices, Calculations, OR Slider Labels", "")
886+
calc_value = row_dict.get(
887+
"Choices, Calculations, OR Slider Labels", ""
888+
)
885889
if calc_value and str(calc_value).strip():
886-
compute_expression = normalize_condition(calc_value, field_type=field_type)
890+
compute_expression = normalize_condition(
891+
calc_value, field_type=field_type
892+
)
887893
if compute_expression:
888894
is_compute = True
889-
compute[form_name].append({
890-
"variableName": field_name,
891-
"jsExpression": compute_expression
892-
})
895+
compute[form_name].append(
896+
{
897+
"variableName": field_name,
898+
"jsExpression": compute_expression,
899+
}
900+
)
893901
else:
894-
print(f"Warning: Could not normalize calc expression for {field_name}: {calc_value}")
902+
print(
903+
f"Warning: Could not normalize calc expression for {field_name}: {calc_value}"
904+
)
895905

896906
# Case 2: Field has @CALCTEXT
897-
elif field_annotation and "@CALCTEXT" in str(field_annotation).upper():
907+
elif (
908+
field_annotation
909+
and "@CALCTEXT" in str(field_annotation).upper()
910+
):
898911
match = re.search(r"@CALCTEXT\((.*)\)", field_annotation)
899912
if match:
900913
compute_expression = normalize_condition(match.group(1))
901914
if compute_expression:
902915
is_compute = True
903-
compute[form_name].append({
904-
"variableName": field_name,
905-
"jsExpression": compute_expression
906-
})
916+
compute[form_name].append(
917+
{
918+
"variableName": field_name,
919+
"jsExpression": compute_expression,
920+
}
921+
)
907922

908923
# Add to order list only if not a compute field
909924
if not is_compute:
@@ -915,6 +930,7 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
915930
print(f"Error processing CSV: {str(e)}")
916931
raise
917932

933+
918934
# todo adding output path
919935
def redcap2reproschema(
920936
csv_file, yaml_file, output_path, schema_context_url=None

reproschema/tests/test_process_csv.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
import pytest
66

7-
from ..redcap2reproschema import process_csv, normalize_condition
7+
from ..redcap2reproschema import normalize_condition, process_csv
88

99

1010
def test_process_csv():
@@ -25,12 +25,18 @@ def test_process_csv():
2525
assert len(datas["form1"]) == 3
2626
assert len(datas["form2"]) == 1
2727

28-
assert order["form1"] == ["items/field1"] # both field2 and field3 go to compute
28+
assert order["form1"] == [
29+
"items/field1"
30+
] # both field2 and field3 go to compute
2931
assert order["form2"] == ["items/field4"]
3032

3133
assert len(compute["form1"]) == 2
32-
assert any(item["variableName"] == "field2" for item in compute["form1"])
33-
assert any(item["variableName"] == "field3" for item in compute["form1"])
34+
assert any(
35+
item["variableName"] == "field2" for item in compute["form1"]
36+
)
37+
assert any(
38+
item["variableName"] == "field3" for item in compute["form1"]
39+
)
3440

3541

3642
def test_process_csv_missing_columns():
@@ -45,13 +51,21 @@ def test_process_csv_missing_columns():
4551

4652
def test_normalize_condition():
4753
# Test calc expressions
48-
assert normalize_condition("[field1] + [field2]", field_type="calc") == "field1 + field2"
49-
assert normalize_condition("[total]*100", field_type="calc") == "total * 100"
54+
assert (
55+
normalize_condition("[field1] + [field2]", field_type="calc")
56+
== "field1 + field2"
57+
)
58+
assert (
59+
normalize_condition("[total]*100", field_type="calc") == "total * 100"
60+
)
5061
assert normalize_condition("2+2", field_type="calc") == "2 + 2"
51-
62+
5263
# Test @CALCTEXT expressions
5364
assert normalize_condition("3*3") == "3 * 3"
54-
65+
5566
# Test branching logic
5667
assert normalize_condition("[age] = 1") == "age == 1"
57-
assert normalize_condition("[field1] = 1 or [field2] = 2") == "field1 == 1 || field2 == 2"
68+
assert (
69+
normalize_condition("[field1] = 1 or [field2] = 2")
70+
== "field1 == 1 || field2 == 2"
71+
)

0 commit comments

Comments
 (0)