Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/wild-hounds-laugh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"gradio": patch
---

feat:Profile the upload route
50 changes: 28 additions & 22 deletions gradio/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1771,11 +1771,14 @@ async def preprocess_data(
inputs[i].get("value", None) if is_prop_input else inputs[i]
)

inputs_cached = await processing_utils.async_move_files_to_cache(
value_to_process,
block,
check_in_upload_folder=not explicit_call,
)
from gradio.profiling import trace_phase

async with trace_phase("preprocess_move_to_cache"):
inputs_cached = await processing_utils.async_move_files_to_cache(
value_to_process,
block,
check_in_upload_folder=not explicit_call,
)
if getattr(block, "data_model", None) and inputs_cached is not None:
data_model = cast(
Union[GradioModel, GradioRootModel], block.data_model
Expand Down Expand Up @@ -1855,6 +1858,8 @@ async def postprocess_data(
predictions: list | dict,
state: SessionState | None,
) -> list[Any]:
from gradio.profiling import trace_phase

state = state or SessionState(self)
if (
isinstance(predictions, dict)
Expand Down Expand Up @@ -1948,28 +1953,29 @@ async def postprocess_data(
prediction_value_serialized = prediction_value.model_dump()
else:
prediction_value_serialized = prediction_value
prediction_value_serialized = (
await processing_utils.async_move_files_to_cache(
prediction_value_serialized,
block,
postprocess=True,
async with trace_phase("postprocess_update_state_in_config"):
prediction_value_serialized = (
await processing_utils.async_move_files_to_cache(
prediction_value_serialized,
block,
postprocess=True,
)
)
if block._id not in state:
state[block._id] = block
state._update_value_in_config(
block._id, prediction_value_serialized
)
)
if block._id not in state:
state[block._id] = block
state._update_value_in_config(
block._id, prediction_value_serialized
)
elif not block_fn.postprocess:
if block._id not in state:
state[block._id] = block
state._update_value_in_config(block._id, prediction_value)

outputs_cached = await processing_utils.async_move_files_to_cache(
prediction_value,
block,
postprocess=True,
)
async with trace_phase("postprocess_move_to_cache"):
outputs_cached = await processing_utils.async_move_files_to_cache(
prediction_value,
block,
postprocess=True,
)
output.append(outputs_cached)

return output
Expand Down
111 changes: 61 additions & 50 deletions gradio/components/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,59 +196,67 @@ def preprocess(self, payload: FileData | None) -> str | None:
Returns:
Passes the uploaded video as a `str` filepath or URL whose extension can be modified by `format`.
"""
if payload is None:
return None
if not payload.path:
raise ValueError("Payload path missing")
file_name = Path(payload.path)
uploaded_format = file_name.suffix.replace(".", "")
needs_formatting = self.format is not None and uploaded_format != self.format
flip = self.sources == ["webcam"] and self.webcam_options.mirror
# TODO: Check other image extensions to see if they work.
valid_watermark_extensions = [".png", ".jpg", ".jpeg"]
if self.watermark.watermark is not None:
if not isinstance(self.watermark.watermark, (str, Path)):
raise ValueError(
f"Provided watermark file not an expected file type. "
f"Received: {self.watermark.watermark}"
from gradio.profiling import trace_phase_sync

with trace_phase_sync("preprocess_video"):
if payload is None:
return None
if not payload.path:
raise ValueError("Payload path missing")
file_name = Path(payload.path)
uploaded_format = file_name.suffix.replace(".", "")
needs_formatting = (
self.format is not None and uploaded_format != self.format
)
flip = self.sources == ["webcam"] and self.webcam_options.mirror
# TODO: Check other image extensions to see if they work.
valid_watermark_extensions = [".png", ".jpg", ".jpeg"]
if self.watermark.watermark is not None:
if not isinstance(self.watermark.watermark, (str, Path)):
raise ValueError(
f"Provided watermark file not an expected file type. "
f"Received: {self.watermark.watermark}"
)
if (
Path(self.watermark.watermark).suffix
not in valid_watermark_extensions
):
raise ValueError(
f"Watermark file does not have a supported extension. "
f"Expected one of {','.join(valid_watermark_extensions)}. "
f"Received: {Path(self.watermark.watermark).suffix}."
)
if needs_formatting or flip:
format = f".{self.format if needs_formatting else uploaded_format}"
output_options = ["-vf", "hflip", "-c:a", "copy"] if flip else []
output_options += ["-an"] if not self.include_audio else []
flip_suffix = "_flip" if flip else ""
output_file_name = str(
file_name.with_name(f"{file_name.stem}{flip_suffix}{format}")
)
if Path(self.watermark.watermark).suffix not in valid_watermark_extensions:
raise ValueError(
f"Watermark file does not have a supported extension. "
f"Expected one of {','.join(valid_watermark_extensions)}. "
f"Received: {Path(self.watermark.watermark).suffix}."
output_filepath = Path(output_file_name)
if output_filepath.exists():
return str(output_filepath.resolve())

ff = FFmpeg( # type: ignore
inputs={str(file_name): None},
outputs={output_file_name: output_options},
)
if needs_formatting or flip:
format = f".{self.format if needs_formatting else uploaded_format}"
output_options = ["-vf", "hflip", "-c:a", "copy"] if flip else []
output_options += ["-an"] if not self.include_audio else []
flip_suffix = "_flip" if flip else ""
output_file_name = str(
file_name.with_name(f"{file_name.stem}{flip_suffix}{format}")
)
output_filepath = Path(output_file_name)
if output_filepath.exists():
ff.run()
return str(output_filepath.resolve())

ff = FFmpeg( # type: ignore
inputs={str(file_name): None},
outputs={output_file_name: output_options},
)
ff.run()
return str(output_filepath.resolve())
elif not self.include_audio:
output_file_name = str(file_name.with_name(f"muted_{file_name.name}"))
if Path(output_file_name).exists():
elif not self.include_audio:
output_file_name = str(file_name.with_name(f"muted_{file_name.name}"))
if Path(output_file_name).exists():
return output_file_name

ff = FFmpeg( # type: ignore
inputs={str(file_name): None},
outputs={output_file_name: ["-an"]},
)
ff.run()
return output_file_name

ff = FFmpeg( # type: ignore
inputs={str(file_name): None},
outputs={output_file_name: ["-an"]},
)
ff.run()
return output_file_name
else:
return str(file_name)
else:
return str(file_name)

def postprocess(self, value: str | Path | None) -> FileData | None:
"""
Expand All @@ -272,6 +280,8 @@ def _format_video(self, video: str | Path | None) -> FileData | None:
Processes a video to ensure that it is in the correct format
and adds a watermark if requested.
"""
from gradio.profiling import trace_phase_sync

if video is None:
return None
video = str(video)
Expand Down Expand Up @@ -301,7 +311,8 @@ def _format_video(self, video: str | Path | None) -> FileData | None:
warnings.warn(
"Video does not have browser-compatible container or codec. Converting to mp4."
)
video = processing_utils.convert_video_to_playable_mp4(video)
with trace_phase_sync("postprocess_video_convert_video_to_playable_mp4"):
video = processing_utils.convert_video_to_playable_mp4(video)
# Recalculate the format in case convert_video_to_playable_mp4 already made it the selected format
returned_format = utils.get_extension_from_file_path_or_url(video).lower()
if (
Expand Down
17 changes: 10 additions & 7 deletions gradio/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,16 @@ def preprocess_image(
warnings.simplefilter("ignore")
if image_mode is not None:
im = im.convert(image_mode)
return format_image(
im,
type=type,
cache_dir=cache_dir,
name=name,
format=suffix,
)
from gradio.profiling import trace_phase_sync

with trace_phase_sync("preprocess_format_image"):
return format_image(
im,
type=type,
cache_dir=cache_dir,
name=name,
format=suffix,
)


def postprocess_image(
Expand Down
67 changes: 38 additions & 29 deletions gradio/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,24 @@ def save_pil_to_cache(
def save_img_array_to_cache(
arr: np.ndarray, cache_dir: str, format: str = "webp"
) -> str:
pil_image = Image.fromarray(_convert(arr, np.uint8, force_copy=False))
return save_pil_to_cache(pil_image, cache_dir, format=format)
from gradio.profiling import trace_phase_sync

with trace_phase_sync("postprocess_save_img_array_to_cache"):
pil_image = Image.fromarray(_convert(arr, np.uint8, force_copy=False))
return save_pil_to_cache(pil_image, cache_dir, format=format)


def save_audio_to_cache(
data: np.ndarray, sample_rate: int, format: str, cache_dir: str
) -> str:
temp_dir = Path(cache_dir) / hash_bytes(data.tobytes())
temp_dir.mkdir(exist_ok=True, parents=True)
filename = str((temp_dir / f"audio.{format}").resolve())
audio_to_file(sample_rate, data, filename, format=format)
return filename
from gradio.profiling import trace_phase_sync

with trace_phase_sync("postprocess_save_audio_to_cache"):
temp_dir = Path(cache_dir) / hash_bytes(data.tobytes())
temp_dir.mkdir(exist_ok=True, parents=True)
filename = str((temp_dir / f"audio.{format}").resolve())
audio_to_file(sample_rate, data, filename, format=format)
return filename


def detect_audio_format(data: bytes) -> str:
Expand Down Expand Up @@ -644,28 +650,31 @@ def resize_and_crop(img, size, crop_type="center"):
def audio_from_file(
filename: str, crop_min: float = 0, crop_max: float = 100
) -> tuple[int, np.ndarray]:
try:
audio = AudioSegment.from_file(filename)
except FileNotFoundError as e:
isfile = Path(filename).is_file()
msg = (
f"Cannot load audio from file: `{'ffprobe' if isfile else filename}` not found."
+ " Please install `ffmpeg` in your system to use non-WAV audio file formats"
" and make sure `ffprobe` is in your PATH."
if isfile
else ""
)
raise RuntimeError(msg) from e
except OSError as e:
raise e
if crop_min != 0 or crop_max != 100:
audio_start = len(audio) * crop_min / 100
audio_end = len(audio) * crop_max / 100
audio = audio[audio_start:audio_end]
data = np.array(audio.get_array_of_samples())
if audio.channels > 1:
data = data.reshape(-1, audio.channels)
return audio.frame_rate, data
from gradio.profiling import trace_phase_sync

with trace_phase_sync("preprocess_audio_from_file"):
try:
audio = AudioSegment.from_file(filename)
except FileNotFoundError as e:
isfile = Path(filename).is_file()
msg = (
f"Cannot load audio from file: `{'ffprobe' if isfile else filename}` not found."
+ " Please install `ffmpeg` in your system to use non-WAV audio file formats"
" and make sure `ffprobe` is in your PATH."
if isfile
else ""
)
raise RuntimeError(msg) from e
except OSError as e:
raise e
if crop_min != 0 or crop_max != 100:
audio_start = len(audio) * crop_min / 100
audio_end = len(audio) * crop_max / 100
audio = audio[audio_start:audio_end]
data = np.array(audio.get_array_of_samples())
if audio.channels > 1:
data = data.reshape(-1, audio.channels)
return audio.frame_rate, data


def audio_to_file(sample_rate, data, filename, format="wav"):
Expand Down
40 changes: 39 additions & 1 deletion gradio/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import time
from collections import deque
from contextlib import asynccontextmanager
from contextlib import asynccontextmanager, contextmanager
from dataclasses import dataclass, field
from typing import Any

Expand All @@ -25,6 +25,16 @@ class RequestTrace:
streaming_diff_ms: float = 0.0
total_ms: float = 0.0
n_iterations: int = 0
upload_ms: float = 0.0
preprocess_move_to_cache_ms: float = 0.0
preprocess_format_image_ms: float = 0.0
postprocess_save_img_array_to_cache_ms: float = 0.0
preprocess_audio_from_file_ms: float = 0.0
postprocess_save_audio_to_cache_ms: float = 0.0
preprocess_video_ms: float = 0.0
postprocess_video_convert_video_to_playable_mp4_ms: float = 0.0
postprocess_update_state_in_config_ms = float = 0.0
postprocess_move_to_cache_ms: float = 0.0

def set_phase(self, name: str, duration_ms: float):
attr = f"{name}_ms"
Expand All @@ -47,6 +57,15 @@ def to_dict(self) -> dict[str, Any]:
"streaming_diff_ms": self.streaming_diff_ms,
"total_ms": self.total_ms,
"n_iterations": self.n_iterations,
"preprocess_move_to_cache_ms": self.preprocess_move_to_cache_ms,
"preprocess_format_image_ms": self.preprocess_format_image_ms,
"postprocess_save_img_array_to_cache_ms": self.postprocess_save_img_array_to_cache_ms,
"preprocess_audio_from_file_ms": self.preprocess_audio_from_file_ms,
"postprocess_save_audio_to_cache_ms": self.postprocess_save_audio_to_cache_ms,
"preprocess_video_ms": self.preprocess_video_ms,
"postprocess_video_convert_video_to_playable_mp4_ms": self.postprocess_video_convert_video_to_playable_mp4_ms,
"postprocess_update_state_in_config_ms": self.postprocess_update_state_in_config_ms,
"postprocess_move_to_cache_ms": self.postprocess_move_to_cache_ms,
}


Expand Down Expand Up @@ -78,6 +97,21 @@ async def trace_phase(name: str):
trace.set_phase(name, duration_ms)


@contextmanager
def trace_phase_sync(name: str):
"""Context manager that records timing for a named phase into the current trace."""
trace = _current_trace.get()
if trace is None:
yield
return
start = time.monotonic()
try:
yield
finally:
duration_ms = (time.monotonic() - start) * 1000
trace.set_phase(name, duration_ms)


class TraceCollector:
def __init__(self, maxlen: int = 100_000):
self._traces: deque[RequestTrace] = deque(maxlen=maxlen)
Expand Down Expand Up @@ -134,3 +168,7 @@ def clear(self):
@asynccontextmanager
async def trace_phase(name: str): # noqa: ARG001
yield

@contextmanager
def trace_phase_sync(name: str): # noqa: ARG001
yield
Loading
Loading