Skip to content

Commit bfb003d

Browse files
committed
Monkeypatch showwarning in preproessing to not include the path where the warning is raised in the output
1 parent 248d36f commit bfb003d

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

notebooks/analysis/No VRAM Use Analysis.ipynb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,7 @@
108108
"source": [
109109
"# Load the jobs DataFrame from DuckDB\n",
110110
"preprocessed_jobs_df = ea.load_preprocessed_jobs_dataframe_from_duckdb(\n",
111-
" db_path=Path(project_root) / \"data/slurm_data.db\",\n",
112-
" table_name=\"Jobs\",\n",
113-
" anonymize=True\n",
111+
" db_path=Path(project_root) / \"data/slurm_data_small.db\", table_name=\"Jobs\", anonymize=True\n",
114112
")\n",
115113
"display(preprocessed_jobs_df.head(10))\n",
116114
"print(preprocessed_jobs_df.shape)"

src/preprocess/preprocess.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from typing import TextIO
12
import warnings
23
from collections.abc import Callable
34

5+
import sys
46
import numpy as np
57
import pandas as pd
68
from pandas.api.typing import NAType
@@ -25,7 +27,6 @@
2527

2628
processing_error_logs: list = []
2729
error_indices: set = set()
28-
anonymize: bool = False
2930

3031

3132
class Preprocess:
@@ -203,6 +204,37 @@ def anonymize_str_column(cls, column: pd.Series, prefix: str) -> pd.Series:
203204
"""
204205
return prefix + column.rank(method="dense").astype(int).astype(str).str.zfill(2)
205206

207+
@staticmethod
208+
def showwarning_preprocess(
209+
message: Warning | str,
210+
category: type[Warning],
211+
filename: str,
212+
lineno: int,
213+
file: TextIO | None = None,
214+
line: str | None = None,
215+
) -> None:
216+
"""
217+
Custom warning formatter for preprocessing warnings to avoid printing the full traceback.
218+
219+
Args:
220+
message (Warning | str): The warning message.
221+
category (type[Warning]): The warning category.
222+
filename (str): The name of the file where the warning occurred.
223+
lineno (int): The line number where the warning occurred.
224+
file (TextIO | None, optional): The file to write the warning message to.
225+
line (str | None, optional): The line of code where the warning occurred.
226+
227+
Returns:
228+
str: The formatted warning message as a string.
229+
"""
230+
parts = warnings.formatwarning(message, category, filename, lineno, line).split(":")[2:]
231+
msg = ":".join(parts).strip()
232+
if file:
233+
file.write(msg + "\n")
234+
else:
235+
sys.stderr.write(msg + "\n")
236+
return
237+
206238
@classmethod
207239
def preprocess_data(
208240
cls,
@@ -335,6 +367,9 @@ def preprocess_data(
335367
all_categories = list(set(enum_values) | set(unique_values))
336368
data[col] = pd.Categorical(data[col], categories=all_categories, ordered=False)
337369

370+
old_sw = warnings.showwarning # store previous function...
371+
warnings.showwarning = cls.showwarning_preprocess # override showwarning function
372+
338373
# Raise warning if GPUMemUsage or CPUMemUsage having infinity values
339374
mem_usage_columns = ["CPUMemUsage", "GPUMemUsage"]
340375
for col_name in mem_usage_columns:
@@ -356,6 +391,9 @@ def preprocess_data(
356391
) # Sort by SubmitTime to keep the latest entry
357392
data = data_sorted.drop_duplicates(subset=["JobID"], keep="first") # Keep the latest entry for each JobID
358393

394+
# Restore the original showwarning function
395+
warnings.showwarning = old_sw
396+
359397
# Save preprocessing error logs to a file.
360398
cls._write_preprocessing_error_logs(processing_error_logs)
361399

0 commit comments

Comments
 (0)