@@ -95,24 +95,40 @@ def view_gpus(run: dict[str, t.Any]) -> None:
9595 first_gpu_stage = gpu_stages [0 ]
9696 num_gpus = len (run ["gpu" ][first_gpu_stage ])
9797 if num_gpus :
98- print ( "[bold green]GPU:[/]" )
99-
98+ # check for any change in memory usage for GPUs
99+ changes = []
100100 for i in range (num_gpus ):
101- dev_name = run ["gpu" ][first_gpu_stage ][i ]["device_name" ]
102- dev_total = run ["gpu" ][first_gpu_stage ][i ]["total_memory" ]
103-
104- print (f" [green]{ dev_name } [/] [dim]|[/] { sizeof_fmt (dev_total )} " )
105-
106- prev_stage = None
101+ prev = None
102+ change = False
107103 for stage in gpu_stages :
108- used = run ["gpu" ][stage ][i ]["total_memory" ] - run ["gpu" ][stage ][i ]["free_memory" ]
109- if prev_stage is None :
110- print (f" * { stage } : { sizeof_fmt (used )} " )
111- else :
112- print (f" * { stage } : { delta_fmt (prev_stage , used )} " )
113- prev_stage = used
114-
115- print ()
104+ if prev is not None :
105+ if run ["gpu" ][stage ][i ]["free_memory" ] != prev :
106+ change = True
107+ break
108+ prev = run ["gpu" ][stage ][i ]["free_memory" ]
109+ changes .append (change )
110+
111+ if any (changes ):
112+ print ("[bold green]GPU:[/]" )
113+ for i in range (num_gpus ):
114+ if not changes [i ]:
115+ continue
116+
117+ dev_name = run ["gpu" ][first_gpu_stage ][i ]["device_name" ]
118+ dev_total = run ["gpu" ][first_gpu_stage ][i ]["total_memory" ]
119+
120+ print (f" [green]{ dev_name } [/] [dim]|[/] { sizeof_fmt (dev_total )} " )
121+
122+ prev_stage = None
123+ for stage in gpu_stages :
124+ used = run ["gpu" ][stage ][i ]["total_memory" ] - run ["gpu" ][stage ][i ]["free_memory" ]
125+ if prev_stage is None :
126+ print (f" * { stage } : { sizeof_fmt (used )} " )
127+ else :
128+ print (f" * { stage } : { delta_fmt (prev_stage , used )} " )
129+ prev_stage = used
130+
131+ print ()
116132
117133
118134def view_process_executions (trace : dict [str , t .Any ]) -> None :
0 commit comments