Skip to content

Commit 8e12641

Browse files
authored
Merge branch 'main' into new-upload-format
2 parents 0c1312f + 664e444 commit 8e12641

File tree

11 files changed

+754
-800
lines changed

11 files changed

+754
-800
lines changed

inference_experimental/dockerfiles/x86.cpu.base.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ WORKDIR /build
1414

1515
COPY uv.lock uv.lock
1616
COPY pyproject.toml pyproject.toml
17-
RUN UV_PROJECT_ENVIRONMENT=/usr/local $HOME/.local/bin/uv sync --locked --extra torch-cpu --extra onnx-cpu --extra mediapipe --extra grounding-dino
17+
RUN UV_PROJECT_ENVIRONMENT=/usr/local $HOME/.local/bin/uv sync --locked --extra torch-cpu --extra onnx-cpu --extra mediapipe
1818
COPY inference_exp inference_exp
1919
RUN $HOME/.local/bin/uv build
2020
RUN WHEEL=$(ls dist/inference_exp-*.whl) && $HOME/.local/bin/uv pip install --system "${WHEEL}"

inference_experimental/dockerfiles/x86.cu118.base.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ WORKDIR /build
2222
COPY uv.lock uv.lock
2323
COPY pyproject.toml pyproject.toml
2424

25-
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu118 --extra onnx-cu118 --extra mediapipe --extra grounding-dino --extra trt10
25+
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu118 --extra onnx-cu118 --extra mediapipe --extra trt10
2626
COPY inference_exp inference_exp
2727
RUN $HOME/.local/bin/uv build
2828
RUN WHEEL=$(ls dist/inference_exp-*.whl) && $HOME/.local/bin/uv pip install --system "${WHEEL}"

inference_experimental/dockerfiles/x86.cu124.base.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ WORKDIR /build
2222
COPY uv.lock uv.lock
2323
COPY pyproject.toml pyproject.toml
2424

25-
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu124 --extra onnx-cu12 --extra mediapipe --extra grounding-dino --extra trt10
25+
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu124 --extra onnx-cu12 --extra mediapipe --extra trt10
2626

2727
COPY inference_exp inference_exp
2828
RUN $HOME/.local/bin/uv build

inference_experimental/dockerfiles/x86.cu126.base.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ WORKDIR /build
2222
COPY uv.lock uv.lock
2323
COPY pyproject.toml pyproject.toml
2424

25-
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu126 --extra onnx-cu12 --extra mediapipe --extra grounding-dino --extra trt10
25+
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu126 --extra onnx-cu12 --extra mediapipe --extra trt10
2626
COPY inference_exp inference_exp
2727
RUN $HOME/.local/bin/uv build
2828
RUN WHEEL=$(ls dist/inference_exp-*.whl) && $HOME/.local/bin/uv pip install --system "${WHEEL}"

inference_experimental/dockerfiles/x86.cu128.base.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ WORKDIR /build
2020
COPY uv.lock uv.lock
2121
COPY pyproject.toml pyproject.toml
2222

23-
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu128 --extra onnx-cu12 --extra mediapipe --extra grounding-dino --extra trt10
23+
RUN UV_PROJECT_ENVIRONMENT=/usr $HOME/.local/bin/uv sync --locked --extra torch-cu128 --extra onnx-cu12 --extra mediapipe --extra trt10
2424
COPY inference_exp inference_exp
2525
RUN $HOME/.local/bin/uv build
2626
RUN WHEEL=$(ls dist/inference_exp-*.whl) && $HOME/.local/bin/uv pip install --system "${WHEEL}"

inference_experimental/inference_exp/models/auto_loaders/models_registry.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
STRUCTURED_OCR_TASK = "structured-ocr"
1919
TEXT_ONLY_OCR_TASK = "text-only-ocr"
2020
GAZE_DETECTION_TASK = "gaze-detection"
21+
OPEN_VOCABULARY_OBJECT_DETECTION_TASK = "open-vocabulary-object-detection"
2122

2223

2324
@dataclass(frozen=True)
@@ -384,6 +385,14 @@ class RegistryEntry:
384385
module_name="inference_exp.models.l2cs.l2cs_onnx",
385386
class_name="L2CSNetOnnx",
386387
),
388+
(
389+
"grounding-dino",
390+
OPEN_VOCABULARY_OBJECT_DETECTION_TASK,
391+
BackendType.TORCH,
392+
): LazyClass(
393+
module_name="inference_exp.models.grounding_dino.grounding_dino_torch",
394+
class_name="GroundingDinoForObjectDetectionTorch",
395+
),
387396
}
388397

389398

inference_experimental/inference_exp/models/grounding_dino/grounding_dino_torch.py

Lines changed: 25 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,22 @@
1-
import os
1+
import os.path
22
from typing import List, Optional, Tuple, Union
33

44
import numpy as np
55
import torch
66
import torchvision
7+
from groundingdino.util.inference import load_model, predict
78
from inference_exp import Detections
89
from inference_exp.configuration import DEFAULT_DEVICE
910
from inference_exp.entities import ColorFormat, ImageDimensions
10-
from inference_exp.errors import MissingDependencyError, ModelRuntimeError
11+
from inference_exp.errors import ModelRuntimeError
1112
from inference_exp.models.base.object_detection import (
1213
OpenVocabularyObjectDetectionModel,
1314
)
1415
from inference_exp.models.common.model_packages import get_model_package_contents
15-
from inference_exp.utils.download import download_files_to_directory
1616
from torch import nn
1717
from torchvision import transforms
1818
from torchvision.ops import box_convert
1919

20-
try:
21-
from groundingdino.util.inference import load_model, predict
22-
except ImportError as import_error:
23-
raise MissingDependencyError(
24-
message=f"Could not import GroundingDino model - this error means that some additional dependencies "
25-
f"are not installed in the environment. If you run the `inference-exp` library directly in your Python "
26-
f"program, make sure the following extras of the package are installed: `grounding-dino`."
27-
f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
28-
f"You can also contact Roboflow to get support.",
29-
help_url="https://todo",
30-
) from import_error
31-
32-
33-
DEFAULT_CONFIG_URL = "https://raw.githubusercontent.com/roboflow/GroundingDINO/main/groundingdino/config/GroundingDINO_SwinT_OGC.py"
34-
DEFAULT_CONFIG_MD5 = "bdb07fc17b611d622633d133d2cf873a"
35-
3620

3721
class GroundingDinoForObjectDetectionTorch(
3822
OpenVocabularyObjectDetectionModel[
@@ -50,23 +34,16 @@ def from_pretrained(
5034
) -> "GroundingDinoForObjectDetectionTorch":
5135
model_package_content = get_model_package_contents(
5236
model_package_dir=model_name_or_path,
53-
elements=["groundingdino_swint_ogc.pth"],
37+
elements=["weights.pth", "config.py"],
5438
)
55-
config_path = os.path.join(model_name_or_path, "GroundingDINO_SwinT_OGC.py")
56-
if not os.path.exists(config_path):
57-
download_files_to_directory(
58-
target_dir=model_name_or_path,
59-
files_specs=[
60-
(
61-
"GroundingDINO_SwinT_OGC.py",
62-
DEFAULT_CONFIG_URL,
63-
DEFAULT_CONFIG_MD5,
64-
)
65-
],
66-
)
39+
text_encoder_dir = os.path.join(model_name_or_path, "text_encoder")
40+
loader_kwargs = {}
41+
if os.path.isdir(text_encoder_dir):
42+
loader_kwargs["text_encoder_type"] = text_encoder_dir
6743
model = load_model(
68-
model_config_path=config_path,
69-
model_checkpoint_path=model_package_content["groundingdino_swint_ogc.pth"],
44+
model_config_path=model_package_content["config.py"],
45+
model_checkpoint_path=model_package_content["weights.pth"],
46+
**loader_kwargs,
7047
).to(device)
7148
return cls(model=model, device=device)
7249

@@ -176,19 +153,20 @@ def forward(
176153
text_threshold = conf_thresh
177154
caption = ". ".join(classes)
178155
all_boxes, all_logits, all_phrases = [], [], []
179-
for image in pre_processed_images:
180-
boxes, logits, phrases = predict(
181-
model=self._model,
182-
image=image,
183-
caption=caption,
184-
box_threshold=conf_thresh,
185-
text_threshold=text_threshold,
186-
device=self._device,
187-
remove_combined=True,
188-
)
189-
all_boxes.append(boxes)
190-
all_logits.append(logits)
191-
all_phrases.append(phrases)
156+
with torch.inference_mode():
157+
for image in pre_processed_images:
158+
boxes, logits, phrases = predict(
159+
model=self._model,
160+
image=image,
161+
caption=caption,
162+
box_threshold=conf_thresh,
163+
text_threshold=text_threshold,
164+
device=self._device,
165+
remove_combined=True,
166+
)
167+
all_boxes.append(boxes)
168+
all_logits.append(logits)
169+
all_phrases.append(phrases)
192170
return all_boxes, all_logits, all_phrases, classes
193171

194172
def post_process(

inference_experimental/pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ dependencies = [
3131
"scikit-image>=0.24.0,<0.26.0",
3232
"easyocr~=1.7.2",
3333
"sentencepiece>=0.2.0,<0.3.0",
34+
"rf_groundingdino==0.3.0"
3435
]
3536

3637
[project.optional-dependencies]
@@ -83,9 +84,6 @@ onnx-jp6-cu126 = [
8384
mediapipe = [
8485
"rf-mediapipe>=0.9,<0.11.0"
8586
]
86-
grounding-dino = [
87-
"rf_groundingdino==0.2.0"
88-
]
8987
trt10 = [
9088
"tensorrt-cu12>=10.0.0,<11.0.0; platform_system == 'Linux' or platform_system == 'Windows'",
9189
"tensorrt-lean-cu12>=10.0.0,<11.0.0; platform_system == 'Linux' or platform_system == 'Windows'",
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import numpy as np
2+
import pytest
3+
from inference_exp import AutoModel
4+
5+
6+
@pytest.mark.e2e_model_inference
7+
def test_grounding_dino(dog_image_numpy: np.ndarray, roboflow_api_key: str) -> None:
8+
# given
9+
model = AutoModel.from_pretrained("grounding-dino", api_key=roboflow_api_key)
10+
11+
# when
12+
predictions = model(dog_image_numpy, ["dog", "person", "bagpack"], conf_thresh=0.33)
13+
14+
# then
15+
assert len(predictions[0].xyxy) == 3
16+
assert set(predictions[0].class_id.tolist()) == {0, 1, 2}

0 commit comments

Comments
 (0)