@@ -88,18 +88,11 @@ def clean_dict_nans(obj):
8888
8989# TODO: normalized condition should depend on the field type, e.g., for SQL
9090def normalize_condition (condition_str , field_type = None ):
91- """
92- Enhanced normalization of condition strings with specific handling for calc fields.
93-
94- Args:
95- condition_str: The condition string to normalize
96- field_type: The type of field (e.g., 'calc', 'sql')
97-
98- Returns:
99- str: Normalized condition string, or None if invalid
100- """
91+ """Normalize condition strings with specific handling for calc fields."""
10192 if condition_str is None or pd .isna (condition_str ):
10293 return None
94+
95+ # Handle boolean values
10396 if isinstance (condition_str , bool ):
10497 return condition_str
10598 if isinstance (condition_str , str ):
@@ -108,43 +101,60 @@ def normalize_condition(condition_str, field_type=None):
108101 if condition_str .lower () == "false" :
109102 return False
110103
104+ # Convert to string if needed
111105 if not isinstance (condition_str , str ):
112106 try :
113107 condition_str = str (condition_str )
114108 except :
115109 return None
116110
117111 try :
112+
113+ # Clean HTML
118114 condition_str = BeautifulSoup (condition_str , "html.parser" ).get_text ()
115+ condition_str = condition_str .strip ()
116+
117+ if not condition_str :
118+ return None
119+
120+ # Common operator normalizations for all types
121+ operator_replacements = [
122+ (r"\s*\+\s*" , " + " ), # Normalize spacing around +
123+ (r"\s*-\s*" , " - " ), # Normalize spacing around -
124+ (r"\s*\*\s*" , " * " ), # Normalize spacing around *
125+ (r"\s*\/\s*" , " / " ), # Normalize spacing around /
126+ (r"\s*\(\s*" , "(" ), # Remove spaces after opening parenthesis
127+ (r"\s*\)\s*" , ")" ), # Remove spaces before closing parenthesis
128+ (r"\s*,\s*" , "," ), # Normalize spaces around commas
129+ (r"\s+" , " " ), # Normalize multiple spaces
130+ ]
119131
120- # SQL/calc specific handling
132+ # Apply operator normalizations first
133+ for pattern , repl in operator_replacements :
134+ condition_str = re .sub (pattern , repl , condition_str )
135+
136+ # Then apply type-specific replacements
121137 if field_type in ["sql" , "calc" ]:
122- # For calc fields, we want to preserve function calls like Math.max
123- # but normalize the spacing around operators and arguments
124- replacements = [
125- (r"\s*\(\s*" , "(" ), # Remove spaces after opening parenthesis
126- (r"\s*\)\s*" , ")" ), # Remove spaces before closing parenthesis
127- (r"\s*,\s*" , ", " ), # Normalize spaces around commas
128- (r"\s+" , " " ), # Normalize multiple spaces to single space
129- (r'"' , "'" ), # Standardize quotes
130- ]
138+ # For calc fields, just remove brackets from field references
139+ condition_str = re .sub (r"\[([^\]]+)\]" , r"\1" , condition_str )
131140 else :
132- # Standard REDCap logic replacements for non-calc fields
141+ # For branching logic
133142 replacements = [
134143 (r"\(([0-9]*)\)" , r"___\1" ),
135- (r"([^>|<])=" , r"\1 ==" ),
136- (r"\[([^\]]*)\]" , r" \1 " ),
144+ (r"([^>|<])=" , r"\1==" ),
145+ (r"\[([^\]]*)\]" , r"\1 " ), # Remove brackets and extra spaces
137146 (r"\bor\b" , "||" ),
138147 (r"\band\b" , "&&" ),
139- (r"\s+" , " " ),
140- (r'"' , "'" ),
148+ (r'"' , "'" )
141149 ]
150+ for pattern , repl in replacements :
151+ condition_str = re .sub (pattern , repl , condition_str )
142152
143- for pattern , repl in replacements :
144- condition_str = re . sub ( pattern , repl , condition_str )
153+ result = condition_str . strip ()
154+ return result
145155
146- return condition_str . strip () or None
147- except :
156+ except Exception as e :
157+ print ( f"Error normalizing condition: { str ( e ) } " )
148158 return None
149159
150160
@@ -184,12 +194,7 @@ def process_field_properties(data):
184194 ... "Branching Logic (Show field only if...)": "[gender] = '1'"
185195 ... }
186196 >>> process_field_properties(data)
187- {
188- 'variableName': 'age',
189- 'isAbout': 'items/age',
190- 'isVis': 'gender == 1',
191- 'valueRequired': True
192- }
197+ {'variableName': 'age', 'isAbout': 'items/age', 'valueRequired': True, 'isVis': "gender == '1'"}
193198 """
194199 if not isinstance (data , dict ):
195200 return {"variableName" : "unknown" , "isAbout" : "items/unknown" }
@@ -811,9 +816,7 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
811816
812817 try :
813818 df = pd .read_csv (csv_file , encoding = "utf-8-sig" )
814- df .columns = df .columns .map (
815- lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " )
816- )
819+ df .columns = df .columns .map (lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " ))
817820
818821 required_columns = ["Form Name" , "Variable / Field Name" , "Field Type" ]
819822 missing_columns = [
@@ -868,60 +871,49 @@ def process_csv(csv_file, abs_folder_path, protocol_name):
868871 continue
869872
870873 datas [form_name ].append (row_dict )
871-
872- # Always add to order list to preserve sequence
873874 field_path = f"items/{ field_name } "
874875
875876 field_type = row_dict .get ("Field Type" , "" ).strip ().lower ()
876877 field_annotation = row_dict .get ("Field Annotation" , "" )
877878
878- # Add to compute list if needed
879- if field_type in COMPUTE_LIST and row_dict .get (
880- "Choices, Calculations, OR Slider Labels"
881- ):
882- condition = normalize_condition (
883- row_dict ["Choices, Calculations, OR Slider Labels" ],
884- field_type = field_type ,
885- )
886- if condition :
887- compute [form_name ].append (
888- {"variableName" : field_name , "jsExpression" : condition }
889- )
890- elif (
891- field_annotation
892- and "@CALCTEXT" in str (field_annotation ).upper ()
893- ):
879+ # Handle compute fields
880+ is_compute = False
881+
882+ # Case 1: Field is calc type
883+ if field_type in COMPUTE_LIST :
884+ calc_value = row_dict .get ("Choices, Calculations, OR Slider Labels" , "" )
885+ if calc_value and str (calc_value ).strip ():
886+ compute_expression = normalize_condition (calc_value , field_type = field_type )
887+ if compute_expression :
888+ is_compute = True
889+ compute [form_name ].append ({
890+ "variableName" : field_name ,
891+ "jsExpression" : compute_expression
892+ })
893+ else :
894+ print (f"Warning: Could not normalize calc expression for { field_name } : { calc_value } " )
895+
896+ # Case 2: Field has @CALCTEXT
897+ elif field_annotation and "@CALCTEXT" in str (field_annotation ).upper ():
894898 match = re .search (r"@CALCTEXT\((.*)\)" , field_annotation )
895899 if match :
896- js_expression = normalize_condition (match .group (1 ))
897- if js_expression :
898- compute [form_name ].append (
899- {
900- "variableName" : field_name ,
901- "jsExpression" : js_expression ,
902- }
903- )
904- else :
905- order [form_name ].append (f"items/{ field_name } " )
906-
907- # Validate results
908- for form_name in datas :
909- if not datas [form_name ]:
910- print (f"Warning: Form '{ form_name } ' has no valid fields" )
911- if not order [form_name ] and not compute [form_name ]:
912- print (
913- f"Warning: Form '{ form_name } ' has no order or compute fields"
914- )
915-
916- # Create protocol directory
917- protocol_dir = Path (abs_folder_path ) / protocol_name
918- protocol_dir .mkdir (parents = True , exist_ok = True )
900+ compute_expression = normalize_condition (match .group (1 ))
901+ if compute_expression :
902+ is_compute = True
903+ compute [form_name ].append ({
904+ "variableName" : field_name ,
905+ "jsExpression" : compute_expression
906+ })
907+
908+ # Add to order list only if not a compute field
909+ if not is_compute :
910+ order [form_name ].append (field_path )
919911
920912 return datas , order , compute
921913
922- except pd . errors . EmptyDataError :
923- raise ValueError ( "The CSV file is empty " )
924-
914+ except Exception as e :
915+ print ( f"Error processing CSV: { str ( e ) } " )
916+ raise
925917
926918# todo adding output path
927919def redcap2reproschema (
0 commit comments