1- import csv
21import os
32import re
43from pathlib import Path
54
5+ import pandas as pd
66import yaml
77from bs4 import BeautifulSoup
88
1717 "Field Note" : "description" ,
1818 # TODO: often "Field Annotation" has "@HIDDEN" and other markers
1919 # TODO: not sure if this can be every treated as description
20- # "Field Annotation": "description ", # column R
20+ # "Field Annotation": "isVis ", # column R
2121 "Section Header" : "preamble" , # column C (need double-check)
2222 "Field Label" : "question" , # column E
2323 "Field Type" : "inputType" , # column D
@@ -82,7 +82,9 @@ def clean_header(header):
8282 cleaned_header = {}
8383 for k , v in header .items ():
8484 # Strip BOM, whitespace, and enclosing quotation marks if present
85- cleaned_key = k .lstrip ("\ufeff " ).strip ().strip ('"' )
85+ cleaned_key = (
86+ k .lstrip ("\ufeff " ).strip ().strip ('"' ) if isinstance (k , str ) else k
87+ )
8688 cleaned_header [cleaned_key ] = v
8789 return cleaned_header
8890
@@ -99,6 +101,12 @@ def normalize_condition(condition_str, field_type=None):
99101 return False
100102 elif condition_str is None :
101103 return None
104+ elif not isinstance (condition_str , str ):
105+ # Convert non-string types to string, or return as is if conversion doesn't make sense
106+ try :
107+ condition_str = str (condition_str )
108+ except :
109+ return condition_str
102110
103111 re_parentheses = re .compile (r"\(([0-9]*)\)" )
104112 re_non_gt_lt_equal = re .compile (r"([^>|<])=" )
@@ -137,17 +145,42 @@ def process_field_properties(data):
137145 else :
138146 condition = True
139147
148+ # Check Field Annotation for special flags - safely handle non-string values
149+ annotation = (
150+ str (data .get ("Field Annotation" , "" )).upper ()
151+ if data .get ("Field Annotation" ) is not None
152+ else ""
153+ )
154+ if (
155+ condition
156+ and isinstance (annotation , str )
157+ and (
158+ "@READONLY" in annotation
159+ or "@HIDDEN" in annotation
160+ or "@CALCTEXT" in annotation
161+ )
162+ ):
163+ condition = False
164+
140165 prop_obj = {
141166 "variableName" : data ["Variable / Field Name" ],
142167 "isAbout" : f"items/{ data ['Variable / Field Name' ]} " ,
143168 "isVis" : condition ,
144169 }
145- if data ["Required Field?" ]:
146- if data ["Required Field?" ] in "y" :
170+
171+ # Handle Required Field check, accounting for NaN values and empty strings
172+ required_field = data .get ("Required Field?" )
173+ if (
174+ pd .notna (required_field ) and str (required_field ).strip ()
175+ ): # Check if value is not NaN and not empty
176+ if str (required_field ).lower () == "y" :
147177 prop_obj ["valueRequired" ] = True
148- else :
149- raise (
150- f"value { data ['Required Field?' ]} not supported yet for redcap:Required Field?"
178+ elif str (required_field ).lower () not in [
179+ "" ,
180+ "n" ,
181+ ]: # Only raise error for unexpected values
182+ raise ValueError (
183+ f"value { required_field } not supported yet for redcap:Required Field?"
151184 )
152185 return prop_obj
153186
@@ -246,6 +279,16 @@ def process_choices(choices_str, field_name):
246279
247280def parse_html (input_string , default_language = "en" ):
248281 result = {}
282+
283+ # Handle non-string input
284+ if not isinstance (input_string , str ):
285+ if pd .isna (input_string ): # Handle NaN values
286+ return {default_language : "" }
287+ try :
288+ input_string = str (input_string )
289+ except :
290+ return {default_language : str (input_string )}
291+
249292 soup = BeautifulSoup (input_string , "html.parser" )
250293
251294 lang_elements = soup .find_all (True , {"lang" : True })
@@ -284,19 +327,30 @@ def process_row(
284327
285328 field_type = field .get ("Field Type" , "" )
286329 input_type , value_type = parse_field_type_and_value (field )
287- rowData ["ui" ] = {"inputType" : input_type }
330+
331+ # Initialize ui object with common properties
332+ ui_obj = {"inputType" : input_type }
333+
334+ # Handle readonly status first - this affects UI behavior
335+ annotation = str (field .get ("Field Annotation" , "" )).upper ()
336+ if (
337+ field_type in COMPUTE_LIST
338+ or "@READONLY" in annotation
339+ or "@CALCTEXT" in annotation
340+ ):
341+ ui_obj ["readonlyValue" ] = True
342+
343+ rowData ["ui" ] = ui_obj
288344 rowData ["responseOptions" ] = {"valueType" : [value_type ]}
289345
290- # setting additional fields for some field types
346+ # Handle specific field type configurations
291347 if field_type == "yesno" :
292348 rowData ["responseOptions" ]["choices" ] = [
293349 {"name" : {"en" : "Yes" }, "value" : 1 },
294350 {"name" : {"en" : "No" }, "value" : 0 },
295351 ]
296352 elif field_type == "checkbox" :
297353 rowData ["responseOptions" ]["multipleChoice" ] = True
298- elif field_type in COMPUTE_LIST :
299- rowData ["ui" ]["readonlyValue" ] = True
300354
301355 for key , value in field .items ():
302356 if SCHEMA_MAP .get (key ) in ["question" , "description" ] and value :
@@ -498,52 +552,79 @@ def parse_language_iso_codes(input_string):
498552 ]
499553
500554
501- def process_csv (
502- csv_file ,
503- abs_folder_path ,
504- schema_context_url ,
505- protocol_name ,
506- ):
555+ def process_csv (csv_file , abs_folder_path , schema_context_url , protocol_name ):
507556 datas = {}
508557 order = {}
509558 compute = {}
510559 languages = []
511560
512- with open (csv_file , mode = "r" , encoding = "utf-8" ) as csvfile :
513- reader = csv .DictReader (csvfile )
514- for row in reader :
515- row = clean_header (row )
516- form_name = row ["Form Name" ]
517- if form_name not in datas :
518- datas [form_name ] = []
519- order [form_name ] = []
520- compute [form_name ] = []
521- os .makedirs (
522- f"{ abs_folder_path } /activities/{ form_name } /items" ,
523- exist_ok = True ,
524- )
561+ # Read CSV with explicit BOM handling, and maintain original order
562+ df = pd .read_csv (
563+ csv_file , encoding = "utf-8-sig"
564+ ) # utf-8-sig handles BOM automatically
525565
526- datas [form_name ].append (row )
566+ # Clean column names (headers)
567+ df .columns = df .columns .map (
568+ lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " )
569+ )
527570
528- # TODO: should we bring back the language
529- # if not languages:
530- # languages = parse_language_iso_codes(row["Field Label"])
571+ # Clean string values in the dataframe
572+ object_columns = df .select_dtypes (include = ["object" ]).columns
573+ for col in object_columns :
574+ df [col ] = df [col ].astype (str ).replace ("nan" , "" )
575+
576+ # Initialize structures for each unique form
577+ unique_forms = df ["Form Name" ].unique ()
578+ for form_name in unique_forms :
579+ datas [form_name ] = []
580+ order [form_name ] = []
581+ compute [form_name ] = []
582+ os .makedirs (
583+ f"{ abs_folder_path } /activities/{ form_name } /items" , exist_ok = True
584+ )
531585
532- field_name = row ["Variable / Field Name" ]
533- if row .get ("Field Type" , "" ) in COMPUTE_LIST :
534- # TODO: this right now doesn't give jsExpression
535- condition = normalize_condition (
536- row ["Choices, Calculations, OR Slider Labels" ],
537- field_type = row ["Field Type" ],
538- )
586+ # TODO: should we bring back the language
587+ # if not languages:
588+ # languages = parse_language_iso_codes(row["Field Label"])
589+
590+ # Process rows in original order
591+ for _ , row in df .iterrows ():
592+ form_name = row ["Form Name" ]
593+ field_name = row ["Variable / Field Name" ]
594+ field_type = row .get ("Field Type" , "" )
595+ field_annotation = row .get ("Field Annotation" )
596+
597+ # Add row data to datas dictionary
598+ datas [form_name ].append (row .to_dict ())
599+
600+ if field_type in COMPUTE_LIST :
601+ condition = normalize_condition (
602+ row ["Choices, Calculations, OR Slider Labels" ],
603+ field_type = field_type ,
604+ )
605+ compute [form_name ].append (
606+ {
607+ "variableName" : field_name ,
608+ "jsExpression" : condition ,
609+ }
610+ )
611+ elif (
612+ isinstance (field_annotation , str )
613+ and "@CALCTEXT" in field_annotation .upper ()
614+ ):
615+ calc_text = field_annotation
616+ match = re .search (r"@CALCTEXT\((.*)\)" , calc_text )
617+ if match :
618+ js_expression = match .group (1 )
619+ js_expression = normalize_condition (js_expression )
539620 compute [form_name ].append (
540621 {
541622 "variableName" : field_name ,
542- "jsExpression" : condition ,
623+ "jsExpression" : js_expression ,
543624 }
544625 )
545- else :
546- order [form_name ].append (f"items/{ field_name } " )
626+ else :
627+ order [form_name ].append (f"items/{ field_name } " )
547628
548629 os .makedirs (f"{ abs_folder_path } /{ protocol_name } " , exist_ok = True )
549630 return datas , order , compute , languages
0 commit comments