File tree Expand file tree Collapse file tree 1 file changed +11
-0
lines changed
Expand file tree Collapse file tree 1 file changed +11
-0
lines changed Original file line number Diff line number Diff line change @@ -803,6 +803,17 @@ def preprocess_data(
803803 message = f"Some entries in { col_name } having infinity values. This may be caused by an overflow."
804804 warnings .warn (message = message , stacklevel = 2 , category = UserWarning )
805805
806+ # Identify and handle duplicate JobIDs
807+ duplicate_rows = data [data ["JobID" ].duplicated (keep = False )]
808+ if not duplicate_rows .empty :
809+ duplicate_message = (
810+ f"{ len (duplicate_rows ['JobID' ].unique ().tolist ())} duplicate JobIDs detected. "
811+ "Keeping only the latest entry for each JobID."
812+ )
813+ warnings .warn (message = duplicate_message , stacklevel = 2 , category = UserWarning )
814+ data_sorted = data .sort_values (by = "SubmitTime" , ascending = False ) # Sort by SubmitTime to keep the latest entry
815+ data = data_sorted .drop_duplicates (subset = ["JobID" ], keep = "first" ) # Keep the latest entry for each JobID
816+
806817 # Save preprocessing error logs to a file.
807818 _write_preprocessing_error_logs (processing_error_logs )
808819
You can’t perform that action at this time.
0 commit comments