@@ -507,105 +507,6 @@ def calculate_user_efficiency_metrics(self) -> pd.DataFrame:
507507 self .users_with_efficiency_metrics = users_w_efficiency_metrics
508508 return self .users_with_efficiency_metrics
509509
510- def find_inefficient_users_by_alloc_vram_efficiency (
511- self , alloc_vram_efficiency_filter : int | float | dict | None , min_jobs : int = 5
512- ) -> pd .DataFrame :
513- """
514- Identify users with low expected allocated VRAM efficiency across their jobs compared to others
515-
516- Args:
517- alloc_vram_efficiency_filter:
518- - int | float : select rows where expected_value_alloc_vram_efficiency == value
519- - dict with 'min'/'max' and required 'inclusive' (bool): select rows in the range
520- min_jobs (int): Minimum number of jobs a user must have to be included in the analysis
521-
522- Returns:
523- pd.DataFrame: DataFrame with users and their average VRAM efficiency
524-
525- Raises:
526- ValueError: If the filter for expected_value_alloc_vram_efficiency is invalid.
527- """
528- if self .users_with_efficiency_metrics is None :
529- self .calculate_user_efficiency_metrics ()
530- print (
531- "Users DataFrame with efficiency metrics was not available. "
532- "Calculated it using the DataFrame of jobs with efficiency metrics."
533- )
534-
535- mask = pd .Series (
536- [True ] * len (self .users_with_efficiency_metrics ), index = self .users_with_efficiency_metrics .index
537- )
538-
539- if alloc_vram_efficiency_filter is not None :
540- try :
541- mask &= EfficiencyAnalysis .apply_numeric_filter (
542- self .users_with_efficiency_metrics ["expected_value_alloc_vram_efficiency" ],
543- alloc_vram_efficiency_filter ,
544- {FilterTypeEnum .NUMERIC_SCALAR , FilterTypeEnum .DICTIONARY },
545- filter_name = "expected_value_alloc_vram_efficiency" ,
546- )
547- except ValueError as e :
548- raise ValueError ("Invalid filter for expected_value_alloc_vram_efficiency." ) from e
549-
550- col = self .users_with_efficiency_metrics ["job_count" ]
551- mask &= col .ge (min_jobs )
552-
553- inefficient_users = self .users_with_efficiency_metrics [mask ]
554-
555- # Sort by the metric ascending (lower is worse)
556- inefficient_users = inefficient_users .sort_values ("expected_value_alloc_vram_efficiency" , ascending = True )
557- return inefficient_users
558-
559- def find_inefficient_users_by_vram_hours (
560- self , vram_hours_filter : int | float | dict = 200 , min_jobs : int = 5
561- ) -> pd .DataFrame :
562- """
563- Identify users with high VRAM-hours across their jobs compared to others.
564-
565- Args:
566- vram_hours_filter:
567- - None: no filtering on vram_hours
568- - int | float: select rows where vram_hours == value
569- - dict with 'min'/'max' and required 'inclusive' (bool): select rows in the range
570- min_jobs (int): Minimum number of jobs a user must have to be included in the analysis
571-
572- Returns:
573- pd.DataFrame: DataFrame with users and their total VRAM hours
574-
575- Raises:
576- ValueError: If the filter is invalid
577- """
578- if self .users_with_efficiency_metrics is None :
579- self .calculate_user_efficiency_metrics ()
580- print (
581- "Users DataFrame with efficiency metrics was not available. "
582- "Calculated it using the DataFrame of jobs with efficiency metrics."
583- )
584-
585- mask = pd .Series (
586- [True ] * len (self .users_with_efficiency_metrics ), index = self .users_with_efficiency_metrics .index
587- )
588-
589- if vram_hours_filter is not None :
590- try :
591- mask &= EfficiencyAnalysis .apply_numeric_filter (
592- self .users_with_efficiency_metrics ["vram_hours" ],
593- vram_hours_filter ,
594- {FilterTypeEnum .NUMERIC_SCALAR , FilterTypeEnum .DICTIONARY },
595- filter_name = "vram_hours_filter" ,
596- )
597- except ValueError as e :
598- raise ValueError ("Invalid filter for vram_hours." ) from e
599-
600- col = self .users_with_efficiency_metrics ["job_count" ]
601- mask &= col .ge (min_jobs )
602-
603- inefficient_users = self .users_with_efficiency_metrics [mask ]
604-
605- # Sort by the metric descending (higher is worse)
606- inefficient_users = inefficient_users .sort_values ("vram_hours" , ascending = False )
607- return inefficient_users
608-
609510 def calculate_all_efficiency_metrics (
610511 self ,
611512 filtered_jobs : pd .DataFrame ,
@@ -722,57 +623,6 @@ def calculate_pi_account_efficiency_metrics(self) -> pd.DataFrame:
722623 self .pi_accounts_with_efficiency_metrics = pi_efficiency_metrics
723624 return self .pi_accounts_with_efficiency_metrics
724625
725- def find_inefficient_pis_by_vram_hours (
726- self , vram_hours_filter : int | float | dict = 200 , min_jobs : int = 5
727- ) -> pd .DataFrame :
728- """
729- Identify inefficient PI accounts based on VRAM hours.
730-
731- Args:
732- vram_hours_filter:
733- - None: no filtering on vram_hours
734- - int | float: select rows where pi_acc_vram_hours == value
735- - dict with 'min'/'max' and required 'inclusive' (bool): select rows in the range
736- min_jobs (int): Minimum number of jobs a PI account must have to be included in the analysis
737-
738- Returns:
739- pd.DataFrame: DataFrame with PI accounts and their VRAM hours
740-
741- Raises:
742- ValueError: If the filter is invalid
743- """
744- if self .pi_accounts_with_efficiency_metrics is None :
745- self .calculate_pi_account_efficiency_metrics ()
746- print (
747- "PI accounts with efficiency metrics DataFrame was not available. "
748- "Calculated it using the DataFrame of users with efficiency metrics."
749- )
750-
751- mask = pd .Series (
752- [True ] * len (self .pi_accounts_with_efficiency_metrics ),
753- index = self .pi_accounts_with_efficiency_metrics .index ,
754- )
755-
756- if vram_hours_filter is not None :
757- try :
758- mask &= EfficiencyAnalysis .apply_numeric_filter (
759- self .pi_accounts_with_efficiency_metrics ["pi_acc_vram_hours" ],
760- vram_hours_filter ,
761- {FilterTypeEnum .NUMERIC_SCALAR , FilterTypeEnum .DICTIONARY },
762- filter_name = "pi_acc_vram_hours_filter" ,
763- )
764- except ValueError as e :
765- raise ValueError ("Invalid filter for pi_acc_vram_hours." ) from e
766-
767- col = self .pi_accounts_with_efficiency_metrics ["job_count" ]
768- mask &= col .ge (min_jobs )
769-
770- inefficient_pi_accounts = self .pi_accounts_with_efficiency_metrics [mask ]
771-
772- # Sort by the metric descending (higher is worse)
773- inefficient_pi_accounts = inefficient_pi_accounts .sort_values ("pi_acc_vram_hours" , ascending = False )
774- return inefficient_pi_accounts
775-
776626 def sort_and_filter_records_with_metrics (
777627 self ,
778628 metrics_df_name_enum : MetricsDFNameEnumT ,
0 commit comments