@@ -82,7 +82,9 @@ def clean_header(header):
8282 cleaned_header = {}
8383 for k , v in header .items ():
8484 # Strip BOM, whitespace, and enclosing quotation marks if present
85- cleaned_key = k .lstrip ("\ufeff " ).strip ().strip ('"' ) if isinstance (k , str ) else k
85+ cleaned_key = (
86+ k .lstrip ("\ufeff " ).strip ().strip ('"' ) if isinstance (k , str ) else k
87+ )
8688 cleaned_header [cleaned_key ] = v
8789 return cleaned_header
8890
@@ -144,11 +146,19 @@ def process_field_properties(data):
144146 condition = True
145147
146148 # Check Field Annotation for special flags - safely handle non-string values
147- annotation = str (data .get ("Field Annotation" , "" )).upper () if data .get ("Field Annotation" ) is not None else ""
148- if condition and isinstance (annotation , str ) and (
149- "@READONLY" in annotation
150- or "@HIDDEN" in annotation
151- or "@CALCTEXT" in annotation
149+ annotation = (
150+ str (data .get ("Field Annotation" , "" )).upper ()
151+ if data .get ("Field Annotation" ) is not None
152+ else ""
153+ )
154+ if (
155+ condition
156+ and isinstance (annotation , str )
157+ and (
158+ "@READONLY" in annotation
159+ or "@HIDDEN" in annotation
160+ or "@CALCTEXT" in annotation
161+ )
152162 ):
153163 condition = False
154164
@@ -157,13 +167,18 @@ def process_field_properties(data):
157167 "isAbout" : f"items/{ data ['Variable / Field Name' ]} " ,
158168 "isVis" : condition ,
159169 }
160-
170+
161171 # Handle Required Field check, accounting for NaN values and empty strings
162172 required_field = data .get ("Required Field?" )
163- if pd .notna (required_field ) and str (required_field ).strip (): # Check if value is not NaN and not empty
173+ if (
174+ pd .notna (required_field ) and str (required_field ).strip ()
175+ ): # Check if value is not NaN and not empty
164176 if str (required_field ).lower () == "y" :
165177 prop_obj ["valueRequired" ] = True
166- elif str (required_field ).lower () not in ["" , "n" ]: # Only raise error for unexpected values
178+ elif str (required_field ).lower () not in [
179+ "" ,
180+ "n" ,
181+ ]: # Only raise error for unexpected values
167182 raise ValueError (
168183 f"value { required_field } not supported yet for redcap:Required Field?"
169184 )
@@ -264,7 +279,7 @@ def process_choices(choices_str, field_name):
264279
265280def parse_html (input_string , default_language = "en" ):
266281 result = {}
267-
282+
268283 # Handle non-string input
269284 if not isinstance (input_string , str ):
270285 if pd .isna (input_string ): # Handle NaN values
@@ -286,7 +301,9 @@ def parse_html(input_string, default_language="en"):
286301 if not result : # If no text was extracted
287302 result [default_language ] = soup .get_text (strip = True )
288303 else :
289- result [default_language ] = soup .get_text (strip = True ) # Use the entire text as default language text
304+ result [default_language ] = soup .get_text (
305+ strip = True
306+ ) # Use the entire text as default language text
290307 return result
291308
292309
@@ -524,24 +541,26 @@ def parse_language_iso_codes(input_string):
524541 ]
525542
526543
527- def process_csv (
528- csv_file , abs_folder_path , schema_context_url , protocol_name
529- ):
544+ def process_csv (csv_file , abs_folder_path , schema_context_url , protocol_name ):
530545 datas = {}
531546 order = {}
532547 compute = {}
533548 languages = []
534549
535550 # Read CSV with explicit BOM handling, and maintain original order
536- df = pd .read_csv (csv_file , encoding = "utf-8-sig" ) # utf-8-sig handles BOM automatically
537-
551+ df = pd .read_csv (
552+ csv_file , encoding = "utf-8-sig"
553+ ) # utf-8-sig handles BOM automatically
554+
538555 # Clean column names (headers)
539- df .columns = df .columns .map (lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " ))
556+ df .columns = df .columns .map (
557+ lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " )
558+ )
540559
541560 # Clean string values in the dataframe
542- object_columns = df .select_dtypes (include = [' object' ]).columns
561+ object_columns = df .select_dtypes (include = [" object" ]).columns
543562 for col in object_columns :
544- df [col ] = df [col ].astype (str ).replace (' nan' , '' )
563+ df [col ] = df [col ].astype (str ).replace (" nan" , "" )
545564
546565 # Initialize structures for each unique form
547566 unique_forms = df ["Form Name" ].unique ()
@@ -563,10 +582,10 @@ def process_csv(
563582 field_name = row ["Variable / Field Name" ]
564583 field_type = row .get ("Field Type" , "" )
565584 field_annotation = row .get ("Field Annotation" )
566-
585+
567586 # Add row data to datas dictionary
568587 datas [form_name ].append (row .to_dict ())
569-
588+
570589 if field_type in COMPUTE_LIST :
571590 condition = normalize_condition (
572591 row ["Choices, Calculations, OR Slider Labels" ],
@@ -578,7 +597,10 @@ def process_csv(
578597 "jsExpression" : condition ,
579598 }
580599 )
581- elif isinstance (field_annotation , str ) and "@CALCTEXT" in field_annotation .upper ():
600+ elif (
601+ isinstance (field_annotation , str )
602+ and "@CALCTEXT" in field_annotation .upper ()
603+ ):
582604 calc_text = field_annotation
583605 match = re .search (r"@CALCTEXT\((.*)\)" , calc_text )
584606 if match :
0 commit comments