66import yaml
77from bs4 import BeautifulSoup
88
9+ matrix_group_count = {}
10+
11+
12+ def clean_header (header ):
13+ return {k .lstrip ("\ufeff " ): v for k , v in header .items ()}
14+
915
1016def normalize_condition (condition_str ):
1117 re_parentheses = re .compile (r"\(([0-9]*)\)" )
@@ -34,33 +40,60 @@ def process_visibility(data):
3440 return visibility_obj
3541
3642
37- def parse_field_type_and_value (data , input_type_map ):
38- field_type = data .get ("Field Type" , "" )
39-
43+ def parse_field_type_and_value (field , input_type_map ):
44+ field_type = field .get ("Field Type" , "" )
4045 input_type = input_type_map .get (field_type , field_type )
4146
47+ # Initialize the default value type as string
48+ value_type = "xsd:string"
49+
50+ # Map certain field types directly to xsd types
4251 value_type_map = {
43- "number " : "xsd:int " ,
52+ "text " : "xsd:string " ,
4453 "date_" : "xsd:date" ,
45- "datetime_" : "datetime" ,
46- "time_" : "xsd:date" ,
47- "email" : "email" ,
48- "phone" : "phone" ,
49- }
50- validation_type = data .get ("Text Validation Type OR Show Slider Number" , "" )
51-
52- value_type = value_type_map .get (validation_type , "xsd:string" )
54+ "datetime_" : "xsd:dateTime" ,
55+ "time_" : "xsd:time" ,
56+ "email" : "xsd:string" ,
57+ "phone" : "xsd:string" ,
58+ } # todo: input_type="signature"
59+
60+ # Get the validation type from the field, if available
61+ validation_type = field .get (
62+ "Text Validation Type OR Show Slider Number" , ""
63+ ).strip ()
64+
65+ if validation_type :
66+ # Map the validation type to an XSD type if it's in the map
67+ value_type = value_type_map .get (validation_type , "xsd:string" )
68+ elif field_type in ["radio" , "dropdown" ]:
69+ # If there's no validation type, but the field type is radio or dropdown, use xsd:integer
70+ value_type = "xsd:integer"
5371
5472 return input_type , value_type
5573
5674
57- def process_choices (choices_str ):
75+ def process_choices (field_type , choices_str ):
76+ if field_type not in ["radio" , "dropdown" ]: # Handle only radio and dropdown types
77+ return None
78+
5879 choices = []
5980 for choice in choices_str .split ("|" ):
6081 parts = choice .split (", " )
61- choice_obj = {"schema:value" : int (parts [0 ]), "schema:name" : parts [1 ]}
82+ if len (parts ) < 2 :
83+ print (
84+ f"Warning: Skipping invalid choice format '{ choice } ' in a { field_type } field"
85+ )
86+ continue
87+
88+ # Try to convert the first part to an integer, if it fails, keep it as a string
89+ try :
90+ value = int (parts [0 ])
91+ except ValueError :
92+ value = parts [0 ]
93+
94+ choice_obj = {"name" : parts [1 ], "value" : value }
6295 if len (parts ) == 3 :
63- # TODO: handle image url
96+ # Handle image url
6497 choice_obj ["schema:image" ] = f"{ parts [2 ]} .png"
6598 choices .append (choice_obj )
6699 return choices
@@ -90,10 +123,12 @@ def parse_html(input_string, default_language="en"):
90123 text = element .get_text (strip = True )
91124 if text :
92125 result [lang ] = text
93- if not result :
126+ if not result : # If no text was extracted
94127 result [default_language ] = soup .get_text (strip = True )
95128 else :
96- result [default_language ] = input_string
129+ result [default_language ] = soup .get_text (
130+ strip = True
131+ ) # Use the entire text as default language text
97132
98133 return result
99134
@@ -109,9 +144,22 @@ def process_row(
109144 response_list ,
110145 additional_notes_list ,
111146):
147+ global matrix_group_count
148+ matrix_group_name = field .get ("Matrix Group Name" , "" )
149+ if matrix_group_name :
150+ matrix_group_count [matrix_group_name ] = (
151+ matrix_group_count .get (matrix_group_name , 0 ) + 1
152+ )
153+ item_id = f"{ matrix_group_name } _{ matrix_group_count [matrix_group_name ]} "
154+ else :
155+ item_id = field .get ("Variable / Field Name" , "" )
156+
112157 rowData = {
113158 "@context" : schema_context_url ,
114159 "@type" : "reproschema:Field" ,
160+ "@id" : item_id ,
161+ "prefLabel" : item_id ,
162+ "description" : f"{ item_id } of { form_name } " ,
115163 }
116164
117165 field_type = field .get ("Field Type" , "" )
@@ -124,8 +172,20 @@ def process_row(
124172 if value_type :
125173 rowData ["responseOptions" ] = {"valueType" : value_type }
126174
175+ if field_type == "yesno" :
176+ rowData ["responseOptions" ] = {
177+ "valueType" : "xsd:boolean" ,
178+ "choices" : [{"name" : "Yes" , "value" : 1 }, {"name" : "No" , "value" : 0 }],
179+ }
180+
127181 for key , value in field .items ():
128- if schema_map .get (key ) == "allow" and value :
182+ if (
183+ schema_map .get (key ) in ["question" , "schema:description" , "preamble" ]
184+ and value
185+ ):
186+ rowData .update ({schema_map [key ]: parse_html (value )})
187+
188+ elif schema_map .get (key ) == "allow" and value :
129189 rowData .setdefault ("ui" , {}).update ({schema_map [key ]: value .split (", " )})
130190
131191 elif key in ui_list and value :
@@ -139,8 +199,9 @@ def process_row(
139199 rowData .setdefault ("responseOptions" , {}).update ({schema_map [key ]: value })
140200
141201 elif schema_map .get (key ) == "choices" and value :
202+ # Pass both field_type and value to process_choices
142203 rowData .setdefault ("responseOptions" , {}).update (
143- {"choices" : process_choices (value )}
204+ {"choices" : process_choices (field_type , value )}
144205 )
145206
146207 elif schema_map .get (key ) == "scoringLogic" and value :
@@ -159,9 +220,6 @@ def process_row(
159220 {"variableName" : field ["Variable / Field Name" ], "isVis" : condition }
160221 )
161222
162- elif key in ["question" , "schema:description" , "preamble" ] and value :
163- rowData .update ({schema_map [key ]: parse_html (value )})
164-
165223 elif key == "Identifier?" and value :
166224 identifier_val = value .lower () == "y"
167225 rowData .update (
@@ -190,6 +248,9 @@ def create_form_schema(
190248 matrix_list ,
191249 scores_list ,
192250):
251+ # Use a set to track unique items and preserve order
252+ unique_order = list (dict .fromkeys (order .get (form_name , [])))
253+
193254 # Construct the JSON-LD structure
194255 json_ld = {
195256 "@context" : schema_context_url ,
@@ -200,7 +261,7 @@ def create_form_schema(
200261 "schemaVersion" : "1.0.0-rc4" ,
201262 "version" : "0.0.1" ,
202263 "ui" : {
203- "order" : order . get ( form_name , []) ,
264+ "order" : unique_order ,
204265 "addProperties" : bl_list ,
205266 "shuffle" : False ,
206267 },
@@ -310,6 +371,7 @@ def process_csv(
310371 with open (csv_file , mode = "r" , encoding = "utf-8" ) as csvfile :
311372 reader = csv .DictReader (csvfile )
312373 for row in reader :
374+ row = clean_header (row )
313375 form_name = row ["Form Name" ]
314376 if form_name not in datas :
315377 datas [form_name ] = []
@@ -484,7 +546,6 @@ def main():
484546 parser .add_argument ("yaml_file" , help = "Path to the Reproschema protocol YAML file." )
485547 args = parser .parse_args ()
486548
487- # Call the main conversion function
488549 redcap2reproschema (args .csv_file , args .yaml_file )
489550
490551
0 commit comments