Merge branch 'fix-readonly' of github.com:yibeichan/reproschema-py into fix-readonly

yibeichan · yibeichan · commit e72180a3e3ee · 2024-12-03T20:09:20.000-05:00
diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
@@ -82,7 +82,9 @@ def clean_header(header):
     cleaned_header = {}
     for k, v in header.items():
         # Strip BOM, whitespace, and enclosing quotation marks if present
-        cleaned_key = k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
+        cleaned_key = (
+            k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
+        )
         cleaned_header[cleaned_key] = v
     return cleaned_header
 
@@ -144,11 +146,19 @@ def process_field_properties(data):
         condition = True
 
     # Check Field Annotation for special flags - safely handle non-string values
-    annotation = str(data.get("Field Annotation", "")).upper() if data.get("Field Annotation") is not None else ""
-    if condition and isinstance(annotation, str) and (
-        "@READONLY" in annotation
-        or "@HIDDEN" in annotation
-        or "@CALCTEXT" in annotation
+    annotation = (
+        str(data.get("Field Annotation", "")).upper()
+        if data.get("Field Annotation") is not None
+        else ""
+    )
+    if (
+        condition
+        and isinstance(annotation, str)
+        and (
+            "@READONLY" in annotation
+            or "@HIDDEN" in annotation
+            or "@CALCTEXT" in annotation
+        )
     ):
         condition = False
 
@@ -157,13 +167,18 @@ def process_field_properties(data):
         "isAbout": f"items/{data['Variable / Field Name']}",
         "isVis": condition,
     }
-    
+
     # Handle Required Field check, accounting for NaN values and empty strings
     required_field = data.get("Required Field?")
-    if pd.notna(required_field) and str(required_field).strip():  # Check if value is not NaN and not empty
+    if (
+        pd.notna(required_field) and str(required_field).strip()
+    ):  # Check if value is not NaN and not empty
         if str(required_field).lower() == "y":
             prop_obj["valueRequired"] = True
-        elif str(required_field).lower() not in ["", "n"]:  # Only raise error for unexpected values
+        elif str(required_field).lower() not in [
+            "",
+            "n",
+        ]:  # Only raise error for unexpected values
             raise ValueError(
                 f"value {required_field} not supported yet for redcap:Required Field?"
             )
@@ -264,7 +279,7 @@ def process_choices(choices_str, field_name):
 
 def parse_html(input_string, default_language="en"):
     result = {}
-    
+
     # Handle non-string input
     if not isinstance(input_string, str):
         if pd.isna(input_string):  # Handle NaN values
@@ -286,7 +301,9 @@ def parse_html(input_string, default_language="en"):
         if not result:  # If no text was extracted
             result[default_language] = soup.get_text(strip=True)
     else:
-        result[default_language] = soup.get_text(strip=True)  # Use the entire text as default language text
+        result[default_language] = soup.get_text(
+            strip=True
+        )  # Use the entire text as default language text
     return result
 
 
@@ -524,24 +541,26 @@ def parse_language_iso_codes(input_string):
     ]
 
 
-def process_csv(
-    csv_file, abs_folder_path, schema_context_url, protocol_name
-):
+def process_csv(csv_file, abs_folder_path, schema_context_url, protocol_name):
     datas = {}
     order = {}
     compute = {}
     languages = []
 
     # Read CSV with explicit BOM handling, and maintain original order
-    df = pd.read_csv(csv_file, encoding="utf-8-sig")  # utf-8-sig handles BOM automatically
-    
+    df = pd.read_csv(
+        csv_file, encoding="utf-8-sig"
+    )  # utf-8-sig handles BOM automatically
+
     # Clean column names (headers)
-    df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff"))
+    df.columns = df.columns.map(
+        lambda x: x.strip().strip('"').lstrip("\ufeff")
+    )
 
     # Clean string values in the dataframe
-    object_columns = df.select_dtypes(include=['object']).columns
+    object_columns = df.select_dtypes(include=["object"]).columns
     for col in object_columns:
-        df[col] = df[col].astype(str).replace('nan', '')
+        df[col] = df[col].astype(str).replace("nan", "")
 
     # Initialize structures for each unique form
     unique_forms = df["Form Name"].unique()
@@ -563,10 +582,10 @@ def process_csv(
         field_name = row["Variable / Field Name"]
         field_type = row.get("Field Type", "")
         field_annotation = row.get("Field Annotation")
-        
+
         # Add row data to datas dictionary
         datas[form_name].append(row.to_dict())
-        
+
         if field_type in COMPUTE_LIST:
             condition = normalize_condition(
                 row["Choices, Calculations, OR Slider Labels"],
@@ -578,7 +597,10 @@ def process_csv(
                     "jsExpression": condition,
                 }
             )
-        elif isinstance(field_annotation, str) and "@CALCTEXT" in field_annotation.upper():
+        elif (
+            isinstance(field_annotation, str)
+            and "@CALCTEXT" in field_annotation.upper()
+        ):
             calc_text = field_annotation
             match = re.search(r"@CALCTEXT\((.*)\)", calc_text)
             if match:
diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py
@@ -25,11 +25,11 @@ def test_redcap2reproschema(tmpdir):
 
     shutil.copy(CSV_TEST_FILE, str(temp_csv_file))
     shutil.copy(YAML_TEST_FILE, str(temp_yaml_file))
-    
+
     # Add debug output to see the content of the CSV file
-    with open(str(temp_csv_file), 'r') as f:
+    with open(str(temp_csv_file), "r") as f:
         print("CSV content:", f.read())
-        
+
     with tmpdir.as_cwd():
         # Read YAML to find the expected output directory name
         with open(str(temp_yaml_file), "r") as file:
@@ -44,8 +44,10 @@ def test_redcap2reproschema(tmpdir):
                 str(temp_yaml_file),
             ],
         )
-        
+
         print("Command output:", result.output)  # Add debug output
-        
+
         assert result.exit_code == 0, f"Command failed with: {result.output}"
-        assert os.path.isdir(protocol_name), f"Expected output directory '{protocol_name}' does not exist"
+        assert os.path.isdir(
+            protocol_name
+        ), f"Expected output directory '{protocol_name}' does not exist"