diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index b0cea06..3dadd46 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -20,9 +20,9 @@ REDCAP_COLUMN_REQUIRED, RESPONSE_COND, VALUE_TYPE_MAP, + get_value_type, ) - def process_input_value_types(input_type_rc, value_type_rc) -> (str, str): """ Process input type and value type to determine the final input type and value type, @@ -47,32 +47,29 @@ def process_input_value_types(input_type_rc, value_type_rc) -> (str, str): input_type = INPUT_TYPE_MAP.get(input_type_rc) if value_type_rc: - if value_type_rc not in VALUE_TYPE_MAP: - raise ValueError( - f"Validation type '{value_type_rc}' is not supported, " - f"supported types are: {', '.join(VALUE_TYPE_MAP.keys())}" - ) - - value_type = VALUE_TYPE_MAP[value_type_rc] + # Get value type using the new function + value_type = get_value_type(value_type_rc) # Adjust input type based on validation - if value_type_rc == "integer" and input_type_rc == "text": + if value_type_rc.startswith("date") or value_type_rc.startswith("datetime"): + if input_type_rc == "text": + input_type = "date" + elif value_type_rc.startswith("time"): + if input_type_rc == "text": + input_type = "time" + elif value_type_rc == "integer" and input_type_rc == "text": input_type = "number" elif value_type_rc in ["float", "number"] and input_type_rc == "text": input_type = "float" - elif ( - value_type_rc == "email" and input_type_rc == "text" - ): # todo: what if input type is not text + elif value_type_rc == "email" and input_type_rc == "text": input_type = "email" elif value_type_rc == "signature" and input_type_rc == "text": input_type = "sign" - elif ( - value_type == "xsd:date" and input_type_rc == "text" - ): # anything that maps to date in RS #todo: what about time? - input_type = "date" elif input_type_rc == "yesno": value_type = "xsd:boolean" + elif input_type_rc == "truefalse": + value_type = "xsd:boolean" elif input_type_rc in COMPUTE_LIST: value_type = "xsd:integer" else: # if no validation type is set, default to string @@ -80,7 +77,6 @@ def process_input_value_types(input_type_rc, value_type_rc) -> (str, str): return input_type, value_type - def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]: """ Process response options from the row and return a dictionary of response options @@ -102,6 +98,11 @@ def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]: {"name": {"en": "Yes"}, "value": 1}, {"name": {"en": "No"}, "value": 0}, ] + elif input_type_rc == "truefalse": + response_options["choices"] = [ + {"name": {"en": "True"}, "value": 1}, + {"name": {"en": "False"}, "value": 0}, + ] elif input_type_rc == "checkbox": response_options["multipleChoice"] = True @@ -418,7 +419,7 @@ def process_row( def process_csv(csv_file) -> (Dict[str, Any], list): df = pd.read_csv( - csv_file, encoding="utf-8-sig" + csv_file, encoding="utf-8-sig", low_memory=False ) # utf-8-sig handles BOM automatically df.columns = df.columns.map( diff --git a/reproschema/redcap_mappings.py b/reproschema/redcap_mappings.py index 18c82d9..d42358c 100644 --- a/reproschema/redcap_mappings.py +++ b/reproschema/redcap_mappings.py @@ -34,6 +34,7 @@ "sql": "number", "yesno": "radio", "radio": "radio", + "truefalse": "radio", "checkbox": "radio", "descriptive": "static", "dropdown": "select", @@ -45,23 +46,23 @@ # Map certain field types directly to xsd types VALUE_TYPE_MAP = { + # Basic types "text": "xsd:string", - "date_": "xsd:date", - "date_mdy": "xsd:date", # it's not exactly xsd:date - "datetime_seconds_mdy": "xsd:date", # it's not exactly xsd:date - "date_ymd": "xsd:date", - "date_dmy": "xsd:date", - "datetime_": "xsd:dateTime", - "datetime_ymd": "xsd:dateTime", - "time_": "xsd:time", "email": "xsd:string", "phone": "xsd:string", - "number": "xsd:decimal", # could be an integer, but have no idea of knowing) + "signature": "xsd:string", + "zipcode": "xsd:string", + "autocomplete": "xsd:string", + + # Numeric types + "number": "xsd:decimal", "float": "xsd:decimal", "integer": "xsd:integer", - "signature": "xsd: string", - "zipcode": "xsd: string", - "autocomplete": "xsd: string", + + # Date and time types will be handled by pattern matching in process_input_value_types + # These entries are kept for backward compatibility + "date_": "xsd:date", + "time_": "xsd:time", } # field types that should be used as compute @@ -80,3 +81,27 @@ "Question Number (surveys only)", "Field Annotation", ] + +def get_value_type(validation_type): + """ + Determine the XSD value type based on REDCap validation type + + Args: + validation_type (str): Validation type from REDCap + + Returns: + str: XSD value type for ReproSchema + """ + if validation_type is None: + return "xsd:string" + + # Handle date and time formats with pattern matching + if validation_type.startswith("date_"): + return "xsd:date" + elif validation_type.startswith("datetime_"): + return "xsd:dateTime" + elif validation_type.startswith("time"): + return "xsd:time" + + # For other types, use the mapping + return VALUE_TYPE_MAP.get(validation_type, "xsd:string") \ No newline at end of file