1+ from typing import TextIO
12import warnings
23from collections .abc import Callable
34
5+ import sys
46import numpy as np
57import pandas as pd
68from pandas .api .typing import NAType
2527
2628processing_error_logs : list = []
2729error_indices : set = set ()
28- anonymize : bool = False
2930
3031
3132class Preprocess :
@@ -203,6 +204,37 @@ def anonymize_str_column(cls, column: pd.Series, prefix: str) -> pd.Series:
203204 """
204205 return prefix + column .rank (method = "dense" ).astype (int ).astype (str ).str .zfill (2 )
205206
207+ @staticmethod
208+ def showwarning_preprocess (
209+ message : Warning | str ,
210+ category : type [Warning ],
211+ filename : str ,
212+ lineno : int ,
213+ file : TextIO | None = None ,
214+ line : str | None = None ,
215+ ) -> None :
216+ """
217+ Custom warning formatter for preprocessing warnings to avoid printing the full traceback.
218+
219+ Args:
220+ message (Warning | str): The warning message.
221+ category (type[Warning]): The warning category.
222+ filename (str): The name of the file where the warning occurred.
223+ lineno (int): The line number where the warning occurred.
224+ file (TextIO | None, optional): The file to write the warning message to.
225+ line (str | None, optional): The line of code where the warning occurred.
226+
227+ Returns:
228+ str: The formatted warning message as a string.
229+ """
230+ parts = warnings .formatwarning (message , category , filename , lineno , line ).split (":" )[2 :]
231+ msg = ":" .join (parts ).strip ()
232+ if file :
233+ file .write (msg + "\n " )
234+ else :
235+ sys .stderr .write (msg + "\n " )
236+ return
237+
206238 @classmethod
207239 def preprocess_data (
208240 cls ,
@@ -335,6 +367,9 @@ def preprocess_data(
335367 all_categories = list (set (enum_values ) | set (unique_values ))
336368 data [col ] = pd .Categorical (data [col ], categories = all_categories , ordered = False )
337369
370+ old_sw = warnings .showwarning # store previous function...
371+ warnings .showwarning = cls .showwarning_preprocess # override showwarning function
372+
338373 # Raise warning if GPUMemUsage or CPUMemUsage having infinity values
339374 mem_usage_columns = ["CPUMemUsage" , "GPUMemUsage" ]
340375 for col_name in mem_usage_columns :
@@ -356,6 +391,9 @@ def preprocess_data(
356391 ) # Sort by SubmitTime to keep the latest entry
357392 data = data_sorted .drop_duplicates (subset = ["JobID" ], keep = "first" ) # Keep the latest entry for each JobID
358393
394+ # Restore the original showwarning function
395+ warnings .showwarning = old_sw
396+
359397 # Save preprocessing error logs to a file.
360398 cls ._write_preprocessing_error_logs (processing_error_logs )
361399
0 commit comments