Skip to content
Open

Sam3d #1784

Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6aa2fcb
logs added
Matvezy Nov 25, 2025
5d9fab4
style
Matvezy Nov 25, 2025
d9f272a
init
Matvezy Nov 25, 2025
f296739
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Nov 25, 2025
b8e4ce4
configs
Matvezy Nov 26, 2025
4f1a3f0
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Nov 26, 2025
69b5a8d
update trimmed
Matvezy Nov 27, 2025
5ebfeb6
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Dec 1, 2025
088087f
expand cuda arch list
Matvezy Dec 1, 2025
2cbeb6c
don't return request
Matvezy Dec 1, 2025
a0b7436
cleanup
Matvezy Dec 1, 2025
69967a2
api
Matvezy Dec 7, 2025
b95b9bb
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Dec 7, 2025
5296419
multi-obiect
Matvezy Dec 8, 2025
fce6e48
multi-obiect
Matvezy Dec 8, 2025
98d1798
style
Matvezy Dec 8, 2025
c3a0f24
fix block
Matvezy Dec 8, 2025
fc0e554
fix block
Matvezy Dec 8, 2025
1530eec
fix block
Matvezy Dec 8, 2025
498fbc4
add license
Matvezy Dec 8, 2025
e02270c
add license
Matvezy Dec 8, 2025
7f86aa5
fix block
Matvezy Dec 8, 2025
71ad804
style
Matvezy Dec 8, 2025
8fd1c80
Merge branch 'main' into sam3d
Matvezy Dec 8, 2025
8db5b6d
readme
Matvezy Dec 10, 2025
25ddad8
Merge branch 'sam3d' of https://github.com/roboflow/inference into sam3d
Matvezy Dec 10, 2025
8091f52
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Dec 10, 2025
a053976
download
Matvezy Dec 10, 2025
e998f2c
style
Matvezy Dec 10, 2025
e25d374
Merge branch 'main' of https://github.com/roboflow/inference into sam3d
Matvezy Dec 11, 2025
f175ba1
have universal conversion
Matvezy Dec 11, 2025
ae98950
style
Matvezy Dec 11, 2025
afb92f6
update typing
Matvezy Dec 11, 2025
e7fa1e6
update typing
Matvezy Dec 11, 2025
174b16e
update typing
Matvezy Dec 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/dockerfiles/Dockerfile.onnx.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,4 @@ ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True
ENV STREAM_API_PRELOADED_PROCESSES=2

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
140 changes: 140 additions & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.3d
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 as builder

WORKDIR /app

RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libxext6 \
libopencv-dev \
uvicorn \
python3-pip \
git \
libgdal-dev \
libvips-dev \
wget \
rustc \
cargo \
&& rm -rf /var/lib/apt/lists/*

COPY requirements/requirements.sam.txt \
requirements/requirements.sam3.txt \
requirements/requirements.clip.txt \
requirements/requirements.http.txt \
requirements/requirements.gpu.txt \
requirements/requirements.gaze.txt \
requirements/requirements.doctr.txt \
requirements/requirements.groundingdino.txt \
requirements/requirements.yolo_world.txt \
requirements/_requirements.txt \
requirements/requirements.transformers.txt \
requirements/requirements.pali.flash_attn.txt \
requirements/requirements.easyocr.txt \
requirements/requirements.modal.txt \
requirements/requirements.sam3_3d.txt \
./

RUN python3 -m pip install -U pip uv
RUN uv pip install --system \
-r _requirements.txt \
-r requirements.doctr.txt \
-r requirements.sam.txt \
-r requirements.sam3.txt \
-r requirements.clip.txt \
-r requirements.http.txt \
-r requirements.gpu.txt \
-r requirements.gaze.txt \
-r requirements.groundingdino.txt \
-r requirements.yolo_world.txt \
-r requirements.transformers.txt \
-r requirements.easyocr.txt \
-r requirements.modal.txt \
jupyterlab \
"setuptools<=75.5.0" \
--upgrade \
&& rm -rf ~/.cache/pip

# Install setup.py requirements for flash_attn
RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip

# Install flash_attn required for Paligemma and Florence2
RUN python3 -m pip install -r requirements.pali.flash_attn.txt --no-dependencies --no-build-isolation && rm -rf ~/.cache/pip

ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0"
RUN python3 -m pip install --no-cache-dir --no-build-isolation -r requirements.sam3_3d.txt && rm -rf ~/.cache/pip
# Start runtime stage
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 as runtime

WORKDIR /app

# Copy Python and installed packages from builder
COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
COPY --from=builder /usr/local/bin /usr/local/bin

# Install runtime dependencies
ADD https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring.deb
RUN set -eux; \
rm -rf /var/lib/apt/lists/*; apt-get clean; \
dpkg -i /tmp/cuda-keyring.deb || true; \
rm -f /tmp/cuda-keyring.deb; \
apt-get update -y; \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
libxext6 \
libopencv-dev \
uvicorn \
python3-pip \
git \
libgdal-dev \
libvips-dev \
wget \
rustc \
cargo \
libgl1 \
libegl1 \
libgles2 \
libglvnd0 \
libglx0 \
cuda-nvcc-12-4 \
cuda-cudart-dev-12-4 \
libcusparse-dev-12-4 \
libcublas-dev-12-4 \
libcusolver-dev-12-4 \
libcurand-dev-12-4 \
libcufft-dev-12-4; \
rm -rf /var/lib/apt/lists/*

WORKDIR /build
COPY . .
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN /bin/make create_wheels_for_gpu_notebook
RUN pip3 install --no-cache-dir dist/inference_cli*.whl dist/inference_core*.whl dist/inference_gpu*.whl dist/inference_sdk*.whl "setuptools<=75.5.0"


WORKDIR /notebooks
COPY examples/notebooks .

WORKDIR /app/
COPY inference inference
COPY docker/config/gpu_http.py gpu_http.py

ENV VERSION_CHECK_MODE=continuous
ENV PROJECT=roboflow-platform
ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_SAM3_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True
ENV STREAM_API_PRELOADED_PROCESSES=2
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
ENV NVIDIA_VISIBLE_DEVICES=all
ENV TORCH_EXTENSIONS_DIR=/tmp/torch_extensions
ENV SPARSE_ATTN_BACKEND="flash_attn"
ENV MODEL_LOCK_ACQUIRE_TIMEOUT="300"
ENV SAM3_3D_OBJECTS_ENABLED=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
47 changes: 47 additions & 0 deletions inference/core/entities/requests/sam3_3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import List, Optional, Union

from pydantic import Field, validator

from inference.core.entities.requests.inference import (
BaseRequest,
InferenceRequestImage,
)


class Sam3_3D_Objects_InferenceRequest(BaseRequest):
"""SAM3D inference request for 3D object generation.

Attributes:
api_key (Optional[str]): Roboflow API Key.
image (InferenceRequestImage): The input image to be used for 3D generation.
mask_input: The mask input defining object region(s). Can be either:
- A single mask as flat list: [x1, y1, x2, y2, x3, y3, ...] (COCO polygon format)
- Multiple masks as list of flat lists: [[x1, y1, ...], [x1, y1, ...], ...]
"""

image: InferenceRequestImage = Field(
description="The input image to be used for 3D generation.",
)

mask_input: Union[List[float], List[List[float]]] = Field(
description="The mask input defining object region(s). Can be either a single mask "
"as a flat list of polygon coordinates [x1, y1, x2, y2, ...] (COCO polygon format), "
"or multiple masks as a list of flat lists [[x1, y1, ...], [x1, y1, ...], ...].",
examples=[
[100.0, 100.0, 200.0, 100.0, 200.0, 200.0, 100.0, 200.0],
[
[100.0, 100.0, 200.0, 100.0, 200.0, 200.0],
[300.0, 300.0, 400.0, 300.0, 400.0, 400.0],
],
],
)

model_id: Optional[str] = Field(
default="sam3-3d-objects", description="The model ID for SAM3_3D."
)

@validator("model_id", always=True)
def validate_model_id(cls, value):
if value is not None:
return value
return "sam3-3d-objects"
54 changes: 54 additions & 0 deletions inference/core/entities/responses/sam3_3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


class Sam3_3D_Objects_Metadata(BaseModel):
rotation: Optional[List[float]] = Field(
default=None,
description="Rotation transformation parameters (quaternion, 4 floats)",
)
translation: Optional[List[float]] = Field(
default=None, description="Translation transformation parameters (x, y, z)"
)
scale: Optional[List[float]] = Field(
default=None, description="Scale transformation parameters (x, y, z)"
)


class Sam3_3D_Object_Item(BaseModel):
"""Individual 3D object output with mesh, gaussian, and transformation metadata."""

mesh_glb: Optional[bytes] = Field(
default=None, description="The 3D mesh in GLB format (binary)"
)
gaussian_ply: Optional[bytes] = Field(
default=None, description="The Gaussian splatting in PLY format (binary)"
)
metadata: Sam3_3D_Objects_Metadata = Field(
default_factory=Sam3_3D_Objects_Metadata,
description="3D transformation metadata (rotation, translation, scale)",
)

class Config:
arbitrary_types_allowed = True


class Sam3_3D_Objects_Response(BaseModel):
mesh_glb: Optional[bytes] = Field(
default=None, description="The 3D scene mesh in GLB format (binary)"
)
gaussian_ply: Optional[bytes] = Field(
default=None,
description="The combined Gaussian splatting in PLY format (binary)",
)
objects: List[Sam3_3D_Object_Item] = Field(
default=[],
description="List of individual 3D objects with their meshes, gaussians, and metadata",
)
time: float = Field(
description="The time in seconds it took to produce the 3D outputs including preprocessing"
)

class Config:
arbitrary_types_allowed = True
2 changes: 2 additions & 0 deletions inference/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@

FLORENCE2_ENABLED = str2bool(os.getenv("FLORENCE2_ENABLED", True))

SAM3_3D_OBJECTS_ENABLED = str2bool(os.getenv("SAM3_3D_OBJECTS_ENABLED", False))

# Flag to enable YOLO-World core model, default is True
CORE_MODEL_YOLO_WORLD_ENABLED = str2bool(
os.getenv("CORE_MODEL_YOLO_WORLD_ENABLED", True)
Expand Down
5 changes: 5 additions & 0 deletions inference/core/registries/roboflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"sam2": ("embed", "sam2"),
"sam3": ("embed", "sam3"),
"sam3/sam3_interactive": ("interactive-segmentation", "sam3"),
"sam3-3d-objects": ("3d-reconstruction", "sam3-3d-objects"),
"gaze": ("gaze", "l2cs"),
"doctr": ("ocr", "doctr"),
"easy_ocr": ("ocr", "easy_ocr"),
Expand Down Expand Up @@ -158,8 +159,12 @@ def get_model_type(
MissingDefaultModelError: If default model is not configured and API does not provide this info
MalformedRoboflowAPIResponseError: Roboflow API responds in invalid format.
"""

model_id = resolve_roboflow_model_alias(model_id=model_id)
dataset_id, version_id = get_model_id_chunks(model_id=model_id)
print(
f"Resolved model_id: {model_id}, dataset_id: {dataset_id}, version_id: {version_id}"
)

# first check if the model id as a whole is in the GENERIC_MODELS dictionary
if model_id in GENERIC_MODELS:
Expand Down
1 change: 1 addition & 0 deletions inference/core/utils/roboflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def get_model_id_chunks(
"moondream2",
"depth-anything-v2",
"perception_encoder",
"sam3-3d-objects",
}:
return dataset_id, version_id

Expand Down
7 changes: 7 additions & 0 deletions inference/core/workflows/core_steps/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES,
ALLOW_WORKFLOW_BLOCKS_ACCESSING_LOCAL_STORAGE,
API_KEY,
SAM3_3D_OBJECTS_ENABLED,
WORKFLOW_BLOCKS_WRITE_DIRECTORY,
WORKFLOWS_STEP_EXECUTION_MODE,
)
Expand Down Expand Up @@ -509,6 +510,11 @@
)
from inference.core.workflows.prototypes.block import WorkflowBlock

if SAM3_3D_OBJECTS_ENABLED:
from inference.core.workflows.core_steps.models.foundation.segment_anything3_3d.v1 import (
SegmentAnything3_3D_ObjectsBlockV1,
)

REGISTERED_INITIALIZERS = {
"api_key": API_KEY,
"cache": cache,
Expand Down Expand Up @@ -670,6 +676,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
SegmentAnything2BlockV1,
SegmentAnything3BlockV1,
SegmentAnything3BlockV2,
SegmentAnything3_3D_ObjectsBlockV1,
SegPreviewBlockV1,
StabilityAIInpaintingBlockV1,
StabilityAIImageGenBlockV1,
Expand Down
Loading
Loading