Skip to content

Commit 743b664

Browse files
committed
enable counting write time
1 parent 0d7600e commit 743b664

File tree

1 file changed

+34
-29
lines changed

1 file changed

+34
-29
lines changed

benchmarks/encoders/benchmark_encoders.py

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -84,45 +84,41 @@ def report_stats(
8484
min_time = unit_times.min().item()
8585
max_time = unit_times.max().item()
8686
print(
87-
f"\n{prefix}: {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min_time = :.2f}, {max_time = :.2f} - in {unit}"
87+
f"\n{prefix} {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min_time = :.2f}, {max_time = :.2f} - in {unit}"
8888
)
89-
fps = num_frames / (times * 1e-9)
90-
std = fps.std().item()
91-
med = fps.median().item()
92-
max_fps = fps.max().item()
93-
print(f"{med = :.1f} fps +- {std:.1f}, {max_fps = :.1f}")
94-
9589
if cpu_utils is not None:
9690
cpu_avg = cpu_utils.mean().item()
9791
cpu_peak = cpu_utils.max().item()
98-
print(f"CPU utilization: avg = {cpu_avg:.1f}%, peak = {cpu_peak:.1f}%")
92+
print(f"CPU utilization: avg = {cpu_avg:.1f}%, peak = {cpu_peak:.1f}%")
9993

10094
if gpu_utils is not None and gpu_utils.numel() > 0:
10195
gpu_avg = gpu_utils.mean().item()
10296
gpu_peak = gpu_utils.max().item()
103-
print(f"GPU utilization: avg = {gpu_avg:.1f}%, peak = {gpu_peak:.1f}%")
97+
print(f"GPU utilization: avg = {gpu_avg:.1f}%, peak = {gpu_peak:.1f}%")
10498

10599

106100
def encode_torchcodec(frames, output_path, device="cpu"):
101+
encoder = VideoEncoder(frames=frames, frame_rate=30)
107102
if device == "cuda":
108-
# Move frames to GPU
109-
gpu_frames = frames.cuda() if frames.device.type == "cpu" else frames
110-
encoder = VideoEncoder(frames=gpu_frames, frame_rate=30, device="cuda")
111103
encoder.to_file(dest=output_path, codec="h264_nvenc", extra_options={"qp": 1})
112104
else:
113-
encoder = VideoEncoder(frames=frames, frame_rate=30, device="cpu")
114105
encoder.to_file(dest=output_path, codec="libx264", crf=0)
115106

116107

117-
def write_raw_frames(frames, raw_path):
108+
def write_raw_frames(frames, num_frames, raw_path):
118109
# Convert NCHW to NHWC for raw video format
119-
raw_frames = frames.permute(0, 2, 3, 1).contiguous()
110+
raw_frames = frames.permute(0, 2, 3, 1).contiguous()[:num_frames]
120111
with open(raw_path, "wb") as f:
121112
f.write(raw_frames.cpu().numpy().tobytes())
122113

123114

124-
def encode_ffmpeg_cli(raw_path, frames_shape, output_path, device="cpu", codec=None):
125-
height, width = frames_shape[2], frames_shape[3]
115+
def write_and_encode_ffmpeg_cli(
116+
frames, num_frames, raw_path, output_path, device="cpu", write_frames=False
117+
):
118+
# Rewrite frames during benchmarking function if write_frames flag used
119+
if write_frames:
120+
write_raw_frames(frames, num_frames, raw_path)
121+
height, width = frames.shape[2], frames.shape[3]
126122

127123
if device == "cuda":
128124
codec = "h264_nvenc"
@@ -152,6 +148,7 @@ def encode_ffmpeg_cli(raw_path, frames_shape, output_path, device="cpu", codec=N
152148
ffmpeg_cmd.extend(quality_params)
153149
# By not setting threads, allow FFmpeg to choose.
154150
# ffmpeg_cmd.extend(["-threads", "1"])
151+
# try setting threads on VideoEncoder too?
155152
ffmpeg_cmd.extend([str(output_path)])
156153

157154
subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
@@ -174,6 +171,11 @@ def main():
174171
default=DEFAULT_MAX_FRAMES,
175172
help="Maximum number of frames to decode for benchmarking",
176173
)
174+
parser.add_argument(
175+
"--write-frames",
176+
action="store_true",
177+
help="Include raw frame writing time in FFmpeg CLI benchmarks for fairer comparison with tensor-based workflows",
178+
)
177179

178180
args = parser.parse_args()
179181

@@ -189,21 +191,22 @@ def main():
189191
frames = decoder.get_frames_in_range(
190192
start=0, stop=min(args.max_frames, len(decoder))
191193
).data
194+
gpu_frames = frames.cuda()
192195
print(
193196
f"Loaded {frames.shape[0]} frames of size {frames.shape[2]}x{frames.shape[3]}"
194197
)
195198

196199
with tempfile.TemporaryDirectory() as temp_dir:
197200
temp_dir = Path(temp_dir)
198201
raw_frames_path = temp_dir / "input_frames.raw"
199-
write_raw_frames(frames, str(raw_frames_path))
202+
write_raw_frames(frames, args.max_frames, str(raw_frames_path))
200203

201204
# Benchmark torchcodec on GPU
202205
if cuda_available:
203206
gpu_output = temp_dir / "torchcodec_gpu.mp4"
204207
times, _cpu_utils, gpu_utils = bench(
205208
encode_torchcodec,
206-
frames=frames,
209+
frames=gpu_frames,
207210
output_path=str(gpu_output),
208211
device="cuda",
209212
average_over=args.average_over,
@@ -219,17 +222,18 @@ def main():
219222
if cuda_available:
220223
ffmpeg_gpu_output = temp_dir / "ffmpeg_gpu.mp4"
221224
times, _cpu_utils, gpu_utils = bench(
222-
encode_ffmpeg_cli,
225+
write_and_encode_ffmpeg_cli,
226+
frames=gpu_frames,
227+
num_frames=args.max_frames,
223228
raw_path=str(raw_frames_path),
224-
frames_shape=frames.shape,
225229
output_path=str(ffmpeg_gpu_output),
226230
device="cuda",
231+
write_frames=args.write_frames,
227232
average_over=args.average_over,
228233
warmup=1,
229234
)
230-
report_stats(
231-
times, frames.shape[0], None, gpu_utils, prefix="FFmpeg CLI on GPU"
232-
)
235+
prefix = "FFmpeg CLI on GPU "
236+
report_stats(times, frames.shape[0], None, gpu_utils, prefix=prefix)
233237
else:
234238
print("Skipping FFmpeg CLI GPU benchmark (CUDA not available)")
235239

@@ -250,17 +254,18 @@ def main():
250254
# Benchmark FFmpeg CLI on CPU
251255
ffmpeg_cpu_output = temp_dir / "ffmpeg_cpu.mp4"
252256
times, cpu_utils, _gpu_utils = bench(
253-
encode_ffmpeg_cli,
257+
write_and_encode_ffmpeg_cli,
258+
frames=frames,
259+
num_frames=args.max_frames,
254260
raw_path=str(raw_frames_path),
255-
frames_shape=frames.shape,
256261
output_path=str(ffmpeg_cpu_output),
257262
device="cpu",
263+
write_frames=args.write_frames,
258264
average_over=args.average_over,
259265
warmup=1,
260266
)
261-
report_stats(
262-
times, frames.shape[0], cpu_utils, None, prefix="FFmpeg CLI on CPU"
263-
)
267+
prefix = "FFmpeg CLI on CPU "
268+
report_stats(times, frames.shape[0], cpu_utils, None, prefix=prefix)
264269

265270

266271
if __name__ == "__main__":

0 commit comments

Comments
 (0)