Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Change Logs
===========

0.4.4
+++++

* :pr:`79`: implements task ``object-detection``
* :pr:`78`: uses *onnx-weekly* instead of *onnx* to avoid conflicts with *onnxscript*

0.4.3
+++++

Expand Down
1 change: 1 addition & 0 deletions _doc/api/tasks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Or:
image_classification
image_text_to_text
mixture_of_expert
object_detection
sentence_similarity
text_classification
text_generation
Expand Down
7 changes: 7 additions & 0 deletions _doc/api/tasks/object_detection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

onnx_diagnostic.tasks.object_detection
======================================

.. automodule:: onnx_diagnostic.tasks.object_detection
:members:
:no-undoc-members:
28 changes: 28 additions & 0 deletions _unittests/ut_tasks/test_tasks_object_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import unittest
import torch
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches import torch_export_patches
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str


class TestTasks(ExtTestCase):
@hide_stdout()
def test_object_detection(self):
mid = "hustvl/yolos-tiny"
data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
self.assertEqual(data["task"], "object-detection")
self.assertIn((data["size"], data["n_weights"]), [(8160384, 2040096)])
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**inputs)
model(**data["inputs2"])
if not has_transformers("4.51.999"):
raise unittest.SkipTest("Requires transformers>=4.52")
with torch_export_patches(patch_transformers=True, verbose=10):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)


if __name__ == "__main__":
unittest.main(verbosity=2)
38 changes: 38 additions & 0 deletions _unittests/ut_tasks/try_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,44 @@ def test_falcon_mamba_7b(self):
for seq in sequences:
print(f"Result: {seq['generated_text']}")

@never_test()
def test_object_detection(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k object_
# https://huggingface.co/hustvl/yolos-tiny

from transformers import YolosImageProcessor, YolosForObjectDetection
from PIL import Image
import torch
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

model = YolosForObjectDetection.from_pretrained("hustvl/yolos-tiny")
image_processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")

inputs = image_processor(images=image, return_tensors="pt")
print()
print("-- inputs", string_type(inputs, with_shape=True, with_min_max=True))
outputs = model(**inputs)
print("-- outputs", string_type(outputs, with_shape=True, with_min_max=True))

# model predicts bounding boxes and corresponding COCO classes
# logits = outputs.logits
# bboxes = outputs.pred_boxes

# print results
target_sizes = torch.tensor([image.size[::-1]])
results = image_processor.post_process_object_detection(
outputs, threshold=0.9, target_sizes=target_sizes
)[0]
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
box = [round(i, 2) for i in box.tolist()]
print(
f"Detected {model.config.id2label[label.item()]} with confidence "
f"{round(score.item(), 3)} at location {box}"
)


if __name__ == "__main__":
unittest.main(verbosity=2)
2 changes: 2 additions & 0 deletions onnx_diagnostic/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
image_classification,
image_text_to_text,
mixture_of_expert,
object_detection,
sentence_similarity,
text_classification,
text_generation,
Expand All @@ -20,6 +21,7 @@
image_classification,
image_text_to_text,
mixture_of_expert,
object_detection,
sentence_similarity,
text_classification,
text_generation,
Expand Down
123 changes: 123 additions & 0 deletions onnx_diagnostic/tasks/object_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from typing import Any, Callable, Dict, Optional, Tuple
import torch
from ..helpers.config_helper import update_config, check_hasattr

__TASK__ = "object-detection"


def reduce_model_config(config: Any) -> Dict[str, Any]:
"""Reduces a model size."""
check_hasattr(config, ("num_hidden_layers", "hidden_sizes"))
kwargs = dict(
num_hidden_layers=(
min(config.num_hidden_layers, 2)
if hasattr(config, "num_hidden_layers")
else len(config.hidden_sizes)
)
)
update_config(config, kwargs)
return kwargs


def get_inputs(
model: torch.nn.Module,
config: Optional[Any],
input_width: int,
input_height: int,
input_channels: int,
batch_size: int = 2,
dynamic_rope: bool = False,
add_second_input: bool = False,
**kwargs, # unused
):
"""
Generates inputs for task ``object-detection``.

:param model: model to get the missing information
:param config: configuration used to generate the model
:param batch_size: batch size
:param input_channels: input channel
:param input_width: input width
:param input_height: input height
:return: dictionary
"""
assert isinstance(
input_width, int
), f"Unexpected type for input_width {type(input_width)}{config}"
assert isinstance(
input_width, int
), f"Unexpected type for input_height {type(input_height)}{config}"

shapes = {
"pixel_values": {
0: torch.export.Dim("batch", min=1, max=1024),
2: "width",
3: "height",
}
}
inputs = dict(
pixel_values=torch.randn(batch_size, input_channels, input_width, input_height).clamp(
-1, 1
),
)
res = dict(inputs=inputs, dynamic_shapes=shapes)
if add_second_input:
res["inputs2"] = get_inputs(
model=model,
config=config,
input_width=input_width + 1,
input_height=input_height + 1,
input_channels=input_channels,
batch_size=batch_size + 1,
dynamic_rope=dynamic_rope,
**kwargs,
)["inputs"]
return res


def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
"""
Inputs kwargs.

If the configuration is None, the function selects typical dimensions.
"""
if config is not None:
if (
hasattr(config, "model_type")
and config.model_type == "timm_wrapper"
and not hasattr(config, "num_hidden_layers")
):
input_size = config.pretrained_cfg["input_size"]
kwargs = dict(
batch_size=2,
input_width=input_size[-2],
input_height=input_size[-1],
input_channels=input_size[-3],
)
return kwargs, get_inputs

check_hasattr(config, ("image_size", "architectures"), "num_channels")
if config is not None:
if hasattr(config, "image_size"):
image_size = config.image_size
else:
assert config.architectures, f"empty architecture in {config}"
from ..torch_models.hghub.hub_api import get_architecture_default_values

default_values = get_architecture_default_values(config.architectures[0])
image_size = default_values["image_size"]
if config is None or isinstance(image_size, int):
kwargs = dict(
batch_size=2,
input_width=224 if config is None else image_size,
input_height=224 if config is None else image_size,
input_channels=3 if config is None else config.num_channels,
)
else:
kwargs = dict(
batch_size=2,
input_width=config.image_size[0],
input_height=config.image_size[1],
input_channels=config.num_channels,
)
return kwargs, get_inputs
Loading
Loading