Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions reproschema/redcap2reproschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
REDCAP_COLUMN_REQUIRED,
RESPONSE_COND,
VALUE_TYPE_MAP,
get_value_type,
)


def process_input_value_types(input_type_rc, value_type_rc) -> (str, str):
"""
Process input type and value type to determine the final input type and value type,
Expand All @@ -47,40 +47,36 @@ def process_input_value_types(input_type_rc, value_type_rc) -> (str, str):
input_type = INPUT_TYPE_MAP.get(input_type_rc)

if value_type_rc:
if value_type_rc not in VALUE_TYPE_MAP:
raise ValueError(
f"Validation type '{value_type_rc}' is not supported, "
f"supported types are: {', '.join(VALUE_TYPE_MAP.keys())}"
)

value_type = VALUE_TYPE_MAP[value_type_rc]
# Get value type using the new function
value_type = get_value_type(value_type_rc)

# Adjust input type based on validation
if value_type_rc == "integer" and input_type_rc == "text":
if value_type_rc.startswith("date") or value_type_rc.startswith("datetime"):
if input_type_rc == "text":
input_type = "date"
elif value_type_rc.startswith("time"):
if input_type_rc == "text":
input_type = "time"
elif value_type_rc == "integer" and input_type_rc == "text":
input_type = "number"
elif value_type_rc in ["float", "number"] and input_type_rc == "text":
input_type = "float"
elif (
value_type_rc == "email" and input_type_rc == "text"
): # todo: what if input type is not text
elif value_type_rc == "email" and input_type_rc == "text":
input_type = "email"
elif value_type_rc == "signature" and input_type_rc == "text":
input_type = "sign"
elif (
value_type == "xsd:date" and input_type_rc == "text"
): # anything that maps to date in RS #todo: what about time?
input_type = "date"

elif input_type_rc == "yesno":
value_type = "xsd:boolean"
elif input_type_rc == "truefalse":
value_type = "xsd:boolean"
elif input_type_rc in COMPUTE_LIST:
value_type = "xsd:integer"
else: # if no validation type is set, default to string
value_type = "xsd:string"

return input_type, value_type


def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]:
"""
Process response options from the row and return a dictionary of response options
Expand All @@ -102,6 +98,11 @@ def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]:
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": "No"}, "value": 0},
]
elif input_type_rc == "truefalse":
response_options["choices"] = [
{"name": {"en": "True"}, "value": 1},
{"name": {"en": "False"}, "value": 0},
]
elif input_type_rc == "checkbox":
response_options["multipleChoice"] = True

Expand Down Expand Up @@ -418,7 +419,7 @@ def process_row(
def process_csv(csv_file) -> (Dict[str, Any], list):

df = pd.read_csv(
csv_file, encoding="utf-8-sig"
csv_file, encoding="utf-8-sig", low_memory=False
) # utf-8-sig handles BOM automatically

df.columns = df.columns.map(
Expand Down
49 changes: 37 additions & 12 deletions reproschema/redcap_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"sql": "number",
"yesno": "radio",
"radio": "radio",
"truefalse": "radio",
"checkbox": "radio",
"descriptive": "static",
"dropdown": "select",
Expand All @@ -45,23 +46,23 @@

# Map certain field types directly to xsd types
VALUE_TYPE_MAP = {
# Basic types
"text": "xsd:string",
"date_": "xsd:date",
"date_mdy": "xsd:date", # it's not exactly xsd:date
"datetime_seconds_mdy": "xsd:date", # it's not exactly xsd:date
"date_ymd": "xsd:date",
"date_dmy": "xsd:date",
"datetime_": "xsd:dateTime",
"datetime_ymd": "xsd:dateTime",
"time_": "xsd:time",
"email": "xsd:string",
"phone": "xsd:string",
"number": "xsd:decimal", # could be an integer, but have no idea of knowing)
"signature": "xsd:string",
"zipcode": "xsd:string",
"autocomplete": "xsd:string",

# Numeric types
"number": "xsd:decimal",
"float": "xsd:decimal",
"integer": "xsd:integer",
"signature": "xsd: string",
"zipcode": "xsd: string",
"autocomplete": "xsd: string",

# Date and time types will be handled by pattern matching in process_input_value_types
# These entries are kept for backward compatibility
"date_": "xsd:date",
"time_": "xsd:time",
}

# field types that should be used as compute
Expand All @@ -80,3 +81,27 @@
"Question Number (surveys only)",
"Field Annotation",
]

def get_value_type(validation_type):
"""
Determine the XSD value type based on REDCap validation type

Args:
validation_type (str): Validation type from REDCap

Returns:
str: XSD value type for ReproSchema
"""
if validation_type is None:
return "xsd:string"

# Handle date and time formats with pattern matching
if validation_type.startswith("date_"):
return "xsd:date"
elif validation_type.startswith("datetime_"):
return "xsd:dateTime"
elif validation_type.startswith("time"):
return "xsd:time"

# For other types, use the mapping
return VALUE_TYPE_MAP.get(validation_type, "xsd:string")