Merge pull request #37 from yibeichan/master

yibeichan · web-flow · commit 105489b1b289 · 2024-04-29T11:41:31.000-04:00
update conversion tools and their tests
diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
@@ -10,19 +10,43 @@
 
 
 def clean_header(header):
-    return {k.lstrip("\ufeff"): v for k, v in header.items()}
+    cleaned_header = {}
+    for k, v in header.items():
+        # Strip BOM, whitespace, and enclosing quotation marks if present
+        cleaned_key = k.lstrip("\ufeff").strip().strip('"')
+        cleaned_header[cleaned_key] = v
+    return cleaned_header
 
 
 def normalize_condition(condition_str):
+    # Regular expressions for various pattern replacements
     re_parentheses = re.compile(r"\(([0-9]*)\)")
     re_non_gt_lt_equal = re.compile(r"([^>|<])=")
     re_brackets = re.compile(r"\[([^\]]*)\]")
+    re_extra_spaces = re.compile(r"\s+")
+    re_double_quotes = re.compile(r'"')
+    re_or = re.compile(r"\bor\b")  # Match 'or' as whole word
 
+    # Apply regex replacements
     condition_str = re_parentheses.sub(r"___\1", condition_str)
     condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str)
-    condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ")
     condition_str = re_brackets.sub(r" \1 ", condition_str)
-    return condition_str
+
+    # Replace 'or' with '||', ensuring not to replace '||'
+    condition_str = re_or.sub("||", condition_str)
+
+    # Replace 'and' with '&&'
+    condition_str = condition_str.replace(" and ", " && ")
+
+    # Trim extra spaces and replace double quotes with single quotes
+    condition_str = re_extra_spaces.sub(
+        " ", condition_str
+    ).strip()  # Reduce multiple spaces to a single space
+    condition_str = re_double_quotes.sub(
+        "'", condition_str
+    )  # Replace double quotes with single quotes
+
+    return condition_str.strip()
 
 
 def process_visibility(data):
@@ -42,7 +66,11 @@ def process_visibility(data):
 
 def parse_field_type_and_value(field, input_type_map):
     field_type = field.get("Field Type", "")
-    input_type = input_type_map.get(field_type, field_type)
+    # Check if field_type is 'yesno' and directly assign 'radio' as the input type
+    if field_type == "yesno":
+        input_type = "radio"  # Directly set to 'radio' for 'yesno' fields
+    else:
+        input_type = input_type_map.get(field_type, field_type)  # Original logic
 
     # Initialize the default value type as string
     value_type = "xsd:string"
@@ -55,7 +83,8 @@ def parse_field_type_and_value(field, input_type_map):
         "time_": "xsd:time",
         "email": "xsd:string",
         "phone": "xsd:string",
-    }  # todo: input_type="signature"
+        # No change needed here for 'yesno', as it's handled above
+    }
 
     # Get the validation type from the field, if available
     validation_type = field.get(
@@ -91,10 +120,11 @@ def process_choices(field_type, choices_str):
         except ValueError:
             value = parts[0]
 
-        choice_obj = {"name": parts[1], "value": value}
-        if len(parts) == 3:
-            # Handle image url
-            choice_obj["schema:image"] = f"{parts[2]}.png"
+        choice_obj = {"name": " ".join(parts[1:]), "value": value}
+        # remove image for now
+        # if len(parts) == 3:
+        #     # Handle image url
+        #     choice_obj["image"] = f"{parts[2]}.png"
         choices.append(choice_obj)
     return choices
 
@@ -156,7 +186,7 @@ def process_row(
 
     rowData = {
         "@context": schema_context_url,
-        "@type": "reproschema:Field",
+        "@type": "reproschema:Item",
         "@id": item_id,
         "prefLabel": item_id,
         "description": f"{item_id} of {form_name}",
@@ -179,10 +209,7 @@ def process_row(
         }
 
     for key, value in field.items():
-        if (
-            schema_map.get(key) in ["question", "schema:description", "preamble"]
-            and value
-        ):
+        if schema_map.get(key) in ["question", "description", "preamble"] and value:
             rowData.update({schema_map[key]: parse_html(value)})
 
         elif schema_map.get(key) == "allow" and value:
@@ -214,21 +241,15 @@ def process_row(
                 }
             )
 
-        elif schema_map.get(key) == "visibility" and value:
-            condition = normalize_condition(value)
-            rowData.setdefault("visibility", []).append(
-                {"variableName": field["Variable / Field Name"], "isVis": condition}
-            )
-
-        elif key == "Identifier?" and value:
-            identifier_val = value.lower() == "y"
-            rowData.update(
-                {
-                    schema_map[key]: [
-                        {"legalStandard": "unknown", "isIdentifier": identifier_val}
-                    ]
-                }
-            )
+        # elif key == "Identifier?" and value:
+        #     identifier_val = value.lower() == "y"
+        #     rowData.update(
+        #         {
+        #             schema_map[key]: [
+        #                 {"legalStandard": "unknown", "isIdentifier": identifier_val}
+        #             ]
+        #         }
+        #     )
 
         elif key in additional_notes_list and value:
             notes_obj = {"source": "redcap", "column": key, "value": value}
@@ -240,6 +261,7 @@ def process_row(
 def create_form_schema(
     abs_folder_path,
     schema_context_url,
+    redcap_version,
     form_name,
     activity_display_name,
     activity_description,
@@ -259,16 +281,17 @@ def create_form_schema(
         "prefLabel": activity_display_name,
         "description": activity_description,
         "schemaVersion": "1.0.0-rc4",
-        "version": "0.0.1",
+        "version": redcap_version,
         "ui": {
             "order": unique_order,
             "addProperties": bl_list,
             "shuffle": False,
         },
     }
 
-    if matrix_list:
-        json_ld["matrixInfo"] = matrix_list
+    # remove matrixInfo to pass validataion
+    # if matrix_list:
+    #     json_ld["matrixInfo"] = matrix_list
     if scores_list:
         json_ld["scoringLogic"] = scores_list
 
@@ -296,6 +319,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order):
 def create_protocol_schema(
     abs_folder_path,
     schema_context_url,
+    redcap_version,
     protocol_name,
     protocol_display_name,
     protocol_description,
@@ -307,31 +331,33 @@ def create_protocol_schema(
         "@context": schema_context_url,
         "@type": "reproschema:Protocol",
         "@id": f"{protocol_name}_schema",
-        "skos:prefLabel": protocol_display_name,
-        "skos:altLabel": f"{protocol_name}_schema",
-        "schema:description": protocol_description,
-        "schema:schemaVersion": "1.0.0-rc4",
-        "schema:version": "0.0.1",
+        "prefLabel": protocol_display_name,
+        "altLabel": f"{protocol_name}_schema",
+        "description": protocol_description,
+        "schemaVersion": "1.0.0-rc4",
+        "version": redcap_version,
         "ui": {
             "addProperties": [],
-            "order": protocol_order,
+            "order": [],
             "shuffle": False,
         },
     }
 
     # Populate addProperties list
     for activity in protocol_order:
+        full_path = f"../activities/{activity}/{activity}_schema"
         add_property = {
-            "isAbout": f"../activities/{activity}/{activity}_schema",
+            "isAbout": full_path,
             "variableName": f"{activity}_schema",
             # Assuming activity name as prefLabel, update as needed
             "prefLabel": activity.replace("_", " ").title(),
+            "isVis": protocol_visibility_obj.get(
+                activity, True
+            ),  # Default to True if not specified
         }
         protocol_schema["ui"]["addProperties"].append(add_property)
-
-    # Add visibility if needed
-    if protocol_visibility_obj:
-        protocol_schema["ui"]["visibility"] = protocol_visibility_obj
+        # Add the full path to the order list
+        protocol_schema["ui"]["order"].append(full_path)
 
     protocol_dir = f"{abs_folder_path}/{protocol_name}"
     schema_file = f"{protocol_name}_schema"
@@ -420,6 +446,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
     protocol_name = protocol.get("protocol_name")
     protocol_display_name = protocol.get("protocol_display_name")
     protocol_description = protocol.get("protocol_description")
+    redcap_version = protocol.get("redcap_version")
 
     if not protocol_name:
         raise ValueError("Protocol name not specified in the YAML file.")
@@ -434,7 +461,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
     abs_folder_path = os.path.abspath(protocol_name)
 
     if schema_context_url is None:
-        schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic"
+        schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new"
 
     # Initialize variables
     schema_map = {
@@ -451,7 +478,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
         "Choices, Calculations, OR Slider Labels": "choices",  # column F
         "Branching Logic (Show field only if...)": "visibility",  # column L
         "Custom Alignment": "customAlignment",  # column N
-        "Identifier?": "identifiable",  # column K
+        # "Identifier?": "identifiable",  # column K
         "multipleChoice": "multipleChoice",
         "responseType": "@type",
     }
@@ -515,6 +542,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
         create_form_schema(
             abs_folder_path,
             schema_context_url,
+            redcap_version,
             form_name,
             activity_display_name,
             activity_description,
@@ -530,6 +558,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
     create_protocol_schema(
         abs_folder_path,
         schema_context_url,
+        redcap_version,
         protocol_name,
         protocol_display_name,
         protocol_description,
diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
@@ -142,6 +142,7 @@ def get_csv_data(dir_path):
         if protocol_dir.is_dir():
             # Check for a _schema file in each directory
             schema_file = next(protocol_dir.glob("*_schema"), None)
+            print(f"Found schema file: {schema_file}")
             if schema_file:
                 # Process the found _schema file
                 parsed_protocol_json = read_json_file(schema_file)
@@ -152,8 +153,14 @@ def get_csv_data(dir_path):
                     normalized_relative_path = Path(
                         relative_activity_path.lstrip("../")
                     )
-                    activity_path = dir_path / normalized_relative_path
-                    print(f"Processing activity {activity_path}")
+
+                    activity_path = (
+                        dir_path
+                        / "activities"
+                        / normalized_relative_path
+                        / (normalized_relative_path.name + "_schema")
+                    )
+
                     parsed_activity_json = read_json_file(activity_path)
 
                     if parsed_activity_json:
diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py
@@ -1,10 +1,10 @@
 import os
 import shutil
 import pytest
+import yaml
 from click.testing import CliRunner
-from ..cli import main  # Import the Click group
+from ..cli import main
 
-# Assuming your test files are located in a 'tests' directory
 CSV_FILE_NAME = "redcap_dict.csv"
 YAML_FILE_NAME = "redcap2rs.yaml"
 CSV_TEST_FILE = os.path.join(
@@ -15,17 +15,34 @@
 )
 
 
-def test_redcap2reproschema_success():
+def test_redcap2reproschema(tmpdir):
     runner = CliRunner()
 
-    with runner.isolated_filesystem():
-        # Copy the test files to the isolated filesystem
-        shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME)
-        shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME)
+    temp_csv_file = tmpdir.join(CSV_FILE_NAME)
+    temp_yaml_file = tmpdir.join(YAML_FILE_NAME)
+
+    shutil.copy(CSV_TEST_FILE, str(temp_csv_file))  # Convert to string
+    shutil.copy(YAML_TEST_FILE, str(temp_yaml_file))  # Convert to string
+
+    # Change the current working directory to tmpdir
+    with tmpdir.as_cwd():
+        # Read YAML to find the expected output directory name
+        with open(str(temp_yaml_file), "r") as file:  # Convert to string
+            protocol = yaml.safe_load(file)
+        protocol_name = protocol.get("protocol_name", "").replace(" ", "_")
 
-        # Run the command within the isolated filesystem
         result = runner.invoke(
-            main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME]
+            main,
+            [
+                "redcap2reproschema",
+                str(temp_csv_file),
+                str(temp_yaml_file),
+            ],  # Convert to string
         )
-        print(result.output)
-        assert result.exit_code == 0
+
+        assert (
+            result.exit_code == 0
+        ), f"The command failed to execute successfully: {result.output}"
+        assert os.path.isdir(
+            protocol_name
+        ), f"Expected output directory '{protocol_name}' does not exist"
diff --git a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
@@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs"  # Example: "My_Protocol"
 # This name will be displayed in the application.
 protocol_display_name: "redcap protocols"
 
-# GitHub Repository Information:
-# Create a GitHub repository named 'reproschema' to store your reproschema protocols.
-# Replace 'your_github_username' with your actual GitHub username.
-user_name: "yibeichan"
-repo_name: "redcap2reproschema"  # Recommended name; can be different if preferred.
-repo_url: "https://github.com/{{user_name}}/{{repo_name}}"
-
 # Protocol Description:
 # Provide a brief description of your protocol.
 protocol_description: "testing"  # Example: "This protocol is for ..."
+
+redcap_version: "3.0.0"
diff --git a/reproschema/tests/test_reproschema2redcap.py b/reproschema/tests/test_reproschema2redcap.py
diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml