[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 145e06c0e61b · 2025-02-04T03:29:06.000Z
for more information, see https://pre-commit.ci
diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
@@ -109,24 +109,24 @@ def normalize_condition(condition_str, field_type=None):
             return None
 
     try:
-        
+
         # Clean HTML
         condition_str = BeautifulSoup(condition_str, "html.parser").get_text()
         condition_str = condition_str.strip()
-        
+
         if not condition_str:
             return None
 
         # Common operator normalizations for all types
         operator_replacements = [
-            (r"\s*\+\s*", " + "),      # Normalize spacing around +
-            (r"\s*-\s*", " - "),       # Normalize spacing around -
-            (r"\s*\*\s*", " * "),      # Normalize spacing around *
-            (r"\s*\/\s*", " / "),      # Normalize spacing around /
-            (r"\s*\(\s*", "("),        # Remove spaces after opening parenthesis
-            (r"\s*\)\s*", ")"),        # Remove spaces before closing parenthesis
-            (r"\s*,\s*", ","),         # Normalize spaces around commas
-            (r"\s+", " "),             # Normalize multiple spaces
+            (r"\s*\+\s*", " + "),  # Normalize spacing around +
+            (r"\s*-\s*", " - "),  # Normalize spacing around -
+            (r"\s*\*\s*", " * "),  # Normalize spacing around *
+            (r"\s*\/\s*", " / "),  # Normalize spacing around /
+            (r"\s*\(\s*", "("),  # Remove spaces after opening parenthesis
+            (r"\s*\)\s*", ")"),  # Remove spaces before closing parenthesis
+            (r"\s*,\s*", ","),  # Normalize spaces around commas
+            (r"\s+", " "),  # Normalize multiple spaces
         ]
 
         # Apply operator normalizations first
@@ -145,7 +145,7 @@ def normalize_condition(condition_str, field_type=None):
                 (r"\[([^\]]*)\]", r"\1"),  # Remove brackets and extra spaces
                 (r"\bor\b", "||"),
                 (r"\band\b", "&&"),
-                (r'"', "'")
+                (r'"', "'"),
             ]
             for pattern, repl in replacements:
                 condition_str = re.sub(pattern, repl, condition_str)
@@ -816,7 +816,9 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
 
     try:
         df = pd.read_csv(csv_file, encoding="utf-8-sig")
-        df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff"))
+        df.columns = df.columns.map(
+            lambda x: x.strip().strip('"').lstrip("\ufeff")
+        )
 
         required_columns = ["Form Name", "Variable / Field Name", "Field Type"]
         missing_columns = [
@@ -881,29 +883,42 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
 
             # Case 1: Field is calc type
             if field_type in COMPUTE_LIST:
-                calc_value = row_dict.get("Choices, Calculations, OR Slider Labels", "")
+                calc_value = row_dict.get(
+                    "Choices, Calculations, OR Slider Labels", ""
+                )
                 if calc_value and str(calc_value).strip():
-                    compute_expression = normalize_condition(calc_value, field_type=field_type)
+                    compute_expression = normalize_condition(
+                        calc_value, field_type=field_type
+                    )
                     if compute_expression:
                         is_compute = True
-                        compute[form_name].append({
-                            "variableName": field_name,
-                            "jsExpression": compute_expression
-                        })
+                        compute[form_name].append(
+                            {
+                                "variableName": field_name,
+                                "jsExpression": compute_expression,
+                            }
+                        )
                     else:
-                        print(f"Warning: Could not normalize calc expression for {field_name}: {calc_value}")
+                        print(
+                            f"Warning: Could not normalize calc expression for {field_name}: {calc_value}"
+                        )
 
             # Case 2: Field has @CALCTEXT
-            elif field_annotation and "@CALCTEXT" in str(field_annotation).upper():
+            elif (
+                field_annotation
+                and "@CALCTEXT" in str(field_annotation).upper()
+            ):
                 match = re.search(r"@CALCTEXT\((.*)\)", field_annotation)
                 if match:
                     compute_expression = normalize_condition(match.group(1))
                     if compute_expression:
                         is_compute = True
-                        compute[form_name].append({
-                            "variableName": field_name,
-                            "jsExpression": compute_expression
-                        })
+                        compute[form_name].append(
+                            {
+                                "variableName": field_name,
+                                "jsExpression": compute_expression,
+                            }
+                        )
 
             # Add to order list only if not a compute field
             if not is_compute:
@@ -915,6 +930,7 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
         print(f"Error processing CSV: {str(e)}")
         raise
 
+
 # todo adding output path
 def redcap2reproschema(
     csv_file, yaml_file, output_path, schema_context_url=None
diff --git a/reproschema/tests/test_process_csv.py b/reproschema/tests/test_process_csv.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import pytest
 
-from ..redcap2reproschema import process_csv, normalize_condition
+from ..redcap2reproschema import normalize_condition, process_csv
 
 
 def test_process_csv():
@@ -25,12 +25,18 @@ def test_process_csv():
         assert len(datas["form1"]) == 3
         assert len(datas["form2"]) == 1
 
-        assert order["form1"] == ["items/field1"]  # both field2 and field3 go to compute
+        assert order["form1"] == [
+            "items/field1"
+        ]  # both field2 and field3 go to compute
         assert order["form2"] == ["items/field4"]
 
         assert len(compute["form1"]) == 2
-        assert any(item["variableName"] == "field2" for item in compute["form1"])
-        assert any(item["variableName"] == "field3" for item in compute["form1"])
+        assert any(
+            item["variableName"] == "field2" for item in compute["form1"]
+        )
+        assert any(
+            item["variableName"] == "field3" for item in compute["form1"]
+        )
 
 
 def test_process_csv_missing_columns():
@@ -45,13 +51,21 @@ def test_process_csv_missing_columns():
 
 def test_normalize_condition():
     # Test calc expressions
-    assert normalize_condition("[field1] + [field2]", field_type="calc") == "field1 + field2"
-    assert normalize_condition("[total]*100", field_type="calc") == "total * 100"
+    assert (
+        normalize_condition("[field1] + [field2]", field_type="calc")
+        == "field1 + field2"
+    )
+    assert (
+        normalize_condition("[total]*100", field_type="calc") == "total * 100"
+    )
     assert normalize_condition("2+2", field_type="calc") == "2 + 2"
-    
+
     # Test @CALCTEXT expressions
     assert normalize_condition("3*3") == "3 * 3"
-    
+
     # Test branching logic
     assert normalize_condition("[age] = 1") == "age == 1"
-    assert normalize_condition("[field1] = 1 or [field2] = 2") == "field1 == 1 || field2 == 2"
+    assert (
+        normalize_condition("[field1] = 1 or [field2] = 2")
+        == "field1 == 1 || field2 == 2"
+    )