Skip to content

Commit 13f36ec

Browse files
authored
Merge pull request #6 from yibeichan/local-test-branch
Local test branch
2 parents 9e720f4 + f9623d5 commit 13f36ec

File tree

2 files changed

+56
-30
lines changed

2 files changed

+56
-30
lines changed

reproschema/redcap2reproschema.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
REDCAP_COLUMN_REQUIRED,
2121
RESPONSE_COND,
2222
VALUE_TYPE_MAP,
23+
get_value_type,
2324
)
2425

25-
2626
def process_input_value_types(input_type_rc, value_type_rc) -> (str, str):
2727
"""
2828
Process input type and value type to determine the final input type and value type,
@@ -47,40 +47,36 @@ def process_input_value_types(input_type_rc, value_type_rc) -> (str, str):
4747
input_type = INPUT_TYPE_MAP.get(input_type_rc)
4848

4949
if value_type_rc:
50-
if value_type_rc not in VALUE_TYPE_MAP:
51-
raise ValueError(
52-
f"Validation type '{value_type_rc}' is not supported, "
53-
f"supported types are: {', '.join(VALUE_TYPE_MAP.keys())}"
54-
)
55-
56-
value_type = VALUE_TYPE_MAP[value_type_rc]
50+
# Get value type using the new function
51+
value_type = get_value_type(value_type_rc)
5752

5853
# Adjust input type based on validation
59-
if value_type_rc == "integer" and input_type_rc == "text":
54+
if value_type_rc.startswith("date") or value_type_rc.startswith("datetime"):
55+
if input_type_rc == "text":
56+
input_type = "date"
57+
elif value_type_rc.startswith("time"):
58+
if input_type_rc == "text":
59+
input_type = "time"
60+
elif value_type_rc == "integer" and input_type_rc == "text":
6061
input_type = "number"
6162
elif value_type_rc in ["float", "number"] and input_type_rc == "text":
6263
input_type = "float"
63-
elif (
64-
value_type_rc == "email" and input_type_rc == "text"
65-
): # todo: what if input type is not text
64+
elif value_type_rc == "email" and input_type_rc == "text":
6665
input_type = "email"
6766
elif value_type_rc == "signature" and input_type_rc == "text":
6867
input_type = "sign"
69-
elif (
70-
value_type == "xsd:date" and input_type_rc == "text"
71-
): # anything that maps to date in RS #todo: what about time?
72-
input_type = "date"
7368

7469
elif input_type_rc == "yesno":
7570
value_type = "xsd:boolean"
71+
elif input_type_rc == "truefalse":
72+
value_type = "xsd:boolean"
7673
elif input_type_rc in COMPUTE_LIST:
7774
value_type = "xsd:integer"
7875
else: # if no validation type is set, default to string
7976
value_type = "xsd:string"
8077

8178
return input_type, value_type
8279

83-
8480
def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]:
8581
"""
8682
Process response options from the row and return a dictionary of response options
@@ -102,6 +98,11 @@ def process_response_options(row, input_type_rc, value_type) -> Dict[str, Any]:
10298
{"name": {"en": "Yes"}, "value": 1},
10399
{"name": {"en": "No"}, "value": 0},
104100
]
101+
elif input_type_rc == "truefalse":
102+
response_options["choices"] = [
103+
{"name": {"en": "True"}, "value": 1},
104+
{"name": {"en": "False"}, "value": 0},
105+
]
105106
elif input_type_rc == "checkbox":
106107
response_options["multipleChoice"] = True
107108

@@ -418,7 +419,7 @@ def process_row(
418419
def process_csv(csv_file) -> (Dict[str, Any], list):
419420

420421
df = pd.read_csv(
421-
csv_file, encoding="utf-8-sig"
422+
csv_file, encoding="utf-8-sig", low_memory=False
422423
) # utf-8-sig handles BOM automatically
423424

424425
df.columns = df.columns.map(

reproschema/redcap_mappings.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"sql": "number",
3535
"yesno": "radio",
3636
"radio": "radio",
37+
"truefalse": "radio",
3738
"checkbox": "radio",
3839
"descriptive": "static",
3940
"dropdown": "select",
@@ -45,23 +46,23 @@
4546

4647
# Map certain field types directly to xsd types
4748
VALUE_TYPE_MAP = {
49+
# Basic types
4850
"text": "xsd:string",
49-
"date_": "xsd:date",
50-
"date_mdy": "xsd:date", # it's not exactly xsd:date
51-
"datetime_seconds_mdy": "xsd:date", # it's not exactly xsd:date
52-
"date_ymd": "xsd:date",
53-
"date_dmy": "xsd:date",
54-
"datetime_": "xsd:dateTime",
55-
"datetime_ymd": "xsd:dateTime",
56-
"time_": "xsd:time",
5751
"email": "xsd:string",
5852
"phone": "xsd:string",
59-
"number": "xsd:decimal", # could be an integer, but have no idea of knowing)
53+
"signature": "xsd:string",
54+
"zipcode": "xsd:string",
55+
"autocomplete": "xsd:string",
56+
57+
# Numeric types
58+
"number": "xsd:decimal",
6059
"float": "xsd:decimal",
6160
"integer": "xsd:integer",
62-
"signature": "xsd: string",
63-
"zipcode": "xsd: string",
64-
"autocomplete": "xsd: string",
61+
62+
# Date and time types will be handled by pattern matching in process_input_value_types
63+
# These entries are kept for backward compatibility
64+
"date_": "xsd:date",
65+
"time_": "xsd:time",
6566
}
6667

6768
# field types that should be used as compute
@@ -80,3 +81,27 @@
8081
"Question Number (surveys only)",
8182
"Field Annotation",
8283
]
84+
85+
def get_value_type(validation_type):
86+
"""
87+
Determine the XSD value type based on REDCap validation type
88+
89+
Args:
90+
validation_type (str): Validation type from REDCap
91+
92+
Returns:
93+
str: XSD value type for ReproSchema
94+
"""
95+
if validation_type is None:
96+
return "xsd:string"
97+
98+
# Handle date and time formats with pattern matching
99+
if validation_type.startswith("date_"):
100+
return "xsd:date"
101+
elif validation_type.startswith("datetime_"):
102+
return "xsd:dateTime"
103+
elif validation_type.startswith("time"):
104+
return "xsd:time"
105+
106+
# For other types, use the mapping
107+
return VALUE_TYPE_MAP.get(validation_type, "xsd:string")

0 commit comments

Comments
 (0)