File tree Expand file tree Collapse file tree 1 file changed +14
-6
lines changed
Expand file tree Collapse file tree 1 file changed +14
-6
lines changed Original file line number Diff line number Diff line change @@ -157,12 +157,20 @@ def __general_prep(records_df: pd.DataFrame) -> pd.DataFrame:
157157 if optional_field not in records_df .columns :
158158 records_df = records_df .assign (** {optional_field : "" })
159159
160- records_df = records_df .drop (
161- labels = list (records_df .columns .difference (ALL_FIELDS )),
162- axis = 1 ,
163- )
164- records_df .loc [:, CONTAINER_TITLE ] = ""
165- records_df .loc [:, ALL_FIELDS ] = records_df [ALL_FIELDS ].astype (str )
160+ # ensure the container title exists and is string-typed
161+ if CONTAINER_TITLE not in records_df .columns :
162+ records_df [CONTAINER_TITLE ] = pd .Series (
163+ "" , index = records_df .index , dtype = "string"
164+ )
165+
166+ # keep only the fields of interest
167+ records_df = records_df .loc [:, ALL_FIELDS ].copy ()
168+
169+ # cast the target columns to pandas StringDtype
170+ records_df = records_df .astype ({col : "string" for col in ALL_FIELDS }, copy = False )
171+
172+ # replace pd.NA with empty strings so regex/string ops don't see NAType
173+ records_df .loc [:, ALL_FIELDS ] = records_df .loc [:, ALL_FIELDS ].fillna ("" )
166174
167175 return records_df
168176
You can’t perform that action at this time.
0 commit comments