roboflow · Matvezy · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -115,4 +115,4 @@ ENV ENABLE_STREAM_API=True
 ENV ENABLE_PROMETHEUS=True
 ENV STREAM_API_PRELOADED_PROCESSES=2
 
-ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
+ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
@@ -0,0 +1,140 @@
+FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 as builder
+
+WORKDIR /app
+
+RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libxext6 \
+    libopencv-dev \
+    uvicorn \
+    python3-pip \
+    git \
+    libgdal-dev \
+    libvips-dev \
+    wget \
+    rustc \
+    cargo \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements/requirements.sam.txt \
+    requirements/requirements.sam3.txt \
+    requirements/requirements.clip.txt \
+    requirements/requirements.http.txt \
+    requirements/requirements.gpu.txt \
+    requirements/requirements.gaze.txt \
+    requirements/requirements.doctr.txt \
+    requirements/requirements.groundingdino.txt \
+    requirements/requirements.yolo_world.txt \
+    requirements/_requirements.txt \
+    requirements/requirements.transformers.txt \
+    requirements/requirements.pali.flash_attn.txt \
+    requirements/requirements.easyocr.txt \
+    requirements/requirements.modal.txt \
+    requirements/requirements.sam3_3d.txt \
+    ./
+
+RUN python3 -m pip install -U pip uv
+RUN uv pip install --system \
+    -r _requirements.txt \
+    -r requirements.doctr.txt \
+    -r requirements.sam.txt \
+    -r requirements.sam3.txt \
+    -r requirements.clip.txt \
+    -r requirements.http.txt \
+    -r requirements.gpu.txt \
+    -r requirements.gaze.txt \
+    -r requirements.groundingdino.txt \
+    -r requirements.yolo_world.txt \
+    -r requirements.transformers.txt \
+    -r requirements.easyocr.txt \
+    -r requirements.modal.txt \
+    jupyterlab \
+    "setuptools<=75.5.0" \
+    --upgrade \
+    && rm -rf ~/.cache/pip
+
+# Install setup.py requirements for flash_attn
+RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip
+
+# Install flash_attn required for Paligemma and Florence2
+RUN python3 -m pip install -r requirements.pali.flash_attn.txt --no-dependencies --no-build-isolation && rm -rf ~/.cache/pip
+
+ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0"
+RUN python3 -m pip install --no-cache-dir --no-build-isolation -r requirements.sam3_3d.txt && rm -rf ~/.cache/pip
+# Start runtime stage
+FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 as runtime
+
+WORKDIR /app
+
+# Copy Python and installed packages from builder
+COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Install runtime dependencies
+ADD https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring.deb
+RUN set -eux; \
+    rm -rf /var/lib/apt/lists/*; apt-get clean; \
+    dpkg -i /tmp/cuda-keyring.deb || true; \
+    rm -f /tmp/cuda-keyring.deb; \
+    apt-get update -y; \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y \
+        libxext6 \
+        libopencv-dev \
+        uvicorn \
+        python3-pip \
+        git \
+        libgdal-dev \
+        libvips-dev \
+        wget \
+        rustc \
+        cargo \
+        libgl1 \
+        libegl1 \
+        libgles2 \
+        libglvnd0 \
+        libglx0 \
+        cuda-nvcc-12-4 \
+        cuda-cudart-dev-12-4 \
+        libcusparse-dev-12-4 \
+        libcublas-dev-12-4 \
+        libcusolver-dev-12-4 \
+        libcurand-dev-12-4 \
+        libcufft-dev-12-4; \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /build
+COPY . .
+RUN ln -s /usr/bin/python3 /usr/bin/python
+RUN /bin/make create_wheels_for_gpu_notebook
+RUN pip3 install --no-cache-dir dist/inference_cli*.whl dist/inference_core*.whl dist/inference_gpu*.whl dist/inference_sdk*.whl "setuptools<=75.5.0"
+
+
+WORKDIR /notebooks
+COPY examples/notebooks .
+
+WORKDIR /app/
+COPY inference inference
+COPY docker/config/gpu_http.py gpu_http.py
+
+ENV VERSION_CHECK_MODE=continuous
+ENV PROJECT=roboflow-platform
+ENV NUM_WORKERS=1
+ENV HOST=0.0.0.0
+ENV PORT=9001
+ENV WORKFLOWS_STEP_EXECUTION_MODE=local
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
+ENV API_LOGGING_ENABLED=True
+ENV LMM_ENABLED=True
+ENV CORE_MODEL_SAM2_ENABLED=True
+ENV CORE_MODEL_SAM3_ENABLED=True
+ENV CORE_MODEL_OWLV2_ENABLED=True
+ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
+ENV STREAM_API_PRELOADED_PROCESSES=2
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV TORCH_EXTENSIONS_DIR=/tmp/torch_extensions
+ENV SPARSE_ATTN_BACKEND="flash_attn"
+ENV MODEL_LOCK_ACQUIRE_TIMEOUT="300"
+ENV SAM3_3D_OBJECTS_ENABLED=True
+
+ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
@@ -0,0 +1,47 @@
+from typing import List, Optional, Union
+
+from pydantic import Field, validator
+
+from inference.core.entities.requests.inference import (
+    BaseRequest,
+    InferenceRequestImage,
+)
+
+
+class Sam3_3D_Objects_InferenceRequest(BaseRequest):
+    """SAM3D inference request for 3D object generation.
+
+    Attributes:
+        api_key (Optional[str]): Roboflow API Key.
+        image (InferenceRequestImage): The input image to be used for 3D generation.
+        mask_input: The mask input defining object region(s). Can be either:
+            - A single mask as flat list: [x1, y1, x2, y2, x3, y3, ...] (COCO polygon format)
+            - Multiple masks as list of flat lists: [[x1, y1, ...], [x1, y1, ...], ...]
+    """
+
+    image: InferenceRequestImage = Field(
+        description="The input image to be used for 3D generation.",
+    )
+
+    mask_input: Union[List[float], List[List[float]]] = Field(
+        description="The mask input defining object region(s). Can be either a single mask "
+        "as a flat list of polygon coordinates [x1, y1, x2, y2, ...] (COCO polygon format), "
+        "or multiple masks as a list of flat lists [[x1, y1, ...], [x1, y1, ...], ...].",
+        examples=[
+            [100.0, 100.0, 200.0, 100.0, 200.0, 200.0, 100.0, 200.0],
+            [
+                [100.0, 100.0, 200.0, 100.0, 200.0, 200.0],
+                [300.0, 300.0, 400.0, 300.0, 400.0, 400.0],
+            ],
+        ],
+    )
+
+    model_id: Optional[str] = Field(
+        default="sam3-3d-objects", description="The model ID for SAM3_3D."
+    )
+
+    @validator("model_id", always=True)
+    def validate_model_id(cls, value):
+        if value is not None:
+            return value
+        return "sam3-3d-objects"
@@ -0,0 +1,54 @@
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class Sam3_3D_Objects_Metadata(BaseModel):
+    rotation: Optional[List[float]] = Field(
+        default=None,
+        description="Rotation transformation parameters (quaternion, 4 floats)",
+    )
+    translation: Optional[List[float]] = Field(
+        default=None, description="Translation transformation parameters (x, y, z)"
+    )
+    scale: Optional[List[float]] = Field(
+        default=None, description="Scale transformation parameters (x, y, z)"
+    )
+
+
+class Sam3_3D_Object_Item(BaseModel):
+    """Individual 3D object output with mesh, gaussian, and transformation metadata."""
+
+    mesh_glb: Optional[bytes] = Field(
+        default=None, description="The 3D mesh in GLB format (binary)"
+    )
+    gaussian_ply: Optional[bytes] = Field(
+        default=None, description="The Gaussian splatting in PLY format (binary)"
+    )
+    metadata: Sam3_3D_Objects_Metadata = Field(
+        default_factory=Sam3_3D_Objects_Metadata,
+        description="3D transformation metadata (rotation, translation, scale)",
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class Sam3_3D_Objects_Response(BaseModel):
+    mesh_glb: Optional[bytes] = Field(
+        default=None, description="The 3D scene mesh in GLB format (binary)"
+    )
+    gaussian_ply: Optional[bytes] = Field(
+        default=None,
+        description="The combined Gaussian splatting in PLY format (binary)",
+    )
+    objects: List[Sam3_3D_Object_Item] = Field(
+        default=[],
+        description="List of individual 3D objects with their meshes, gaussians, and metadata",
+    )
+    time: float = Field(
+        description="The time in seconds it took to produce the 3D outputs including preprocessing"
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
@@ -201,6 +201,8 @@
 
 FLORENCE2_ENABLED = str2bool(os.getenv("FLORENCE2_ENABLED", True))
 
+SAM3_3D_OBJECTS_ENABLED = str2bool(os.getenv("SAM3_3D_OBJECTS_ENABLED", False))
+
 # Flag to enable YOLO-World core model, default is True
 CORE_MODEL_YOLO_WORLD_ENABLED = str2bool(
     os.getenv("CORE_MODEL_YOLO_WORLD_ENABLED", True)

@@ -49,6 +49,7 @@
     "sam2": ("embed", "sam2"),
     "sam3": ("embed", "sam3"),
     "sam3/sam3_interactive": ("interactive-segmentation", "sam3"),
+    "sam3-3d-objects": ("3d-reconstruction", "sam3-3d-objects"),
     "gaze": ("gaze", "l2cs"),
     "doctr": ("ocr", "doctr"),
     "easy_ocr": ("ocr", "easy_ocr"),
@@ -158,8 +159,12 @@ def get_model_type(
         MissingDefaultModelError: If default model is not configured and API does not provide this info
         MalformedRoboflowAPIResponseError: Roboflow API responds in invalid format.
     """
+
     model_id = resolve_roboflow_model_alias(model_id=model_id)
     dataset_id, version_id = get_model_id_chunks(model_id=model_id)
+    print(
+        f"Resolved model_id: {model_id}, dataset_id: {dataset_id}, version_id: {version_id}"
+    )
 
     # first check if the model id as a whole is in the GENERIC_MODELS dictionary
     if model_id in GENERIC_MODELS:

@@ -34,6 +34,7 @@ def get_model_id_chunks(
         "moondream2",
         "depth-anything-v2",
         "perception_encoder",
+        "sam3-3d-objects",
     }:
         return dataset_id, version_id
 

@@ -5,6 +5,7 @@
     ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES,
     ALLOW_WORKFLOW_BLOCKS_ACCESSING_LOCAL_STORAGE,
     API_KEY,
+    SAM3_3D_OBJECTS_ENABLED,
     WORKFLOW_BLOCKS_WRITE_DIRECTORY,
     WORKFLOWS_STEP_EXECUTION_MODE,
 )
@@ -509,6 +510,11 @@
 )
 from inference.core.workflows.prototypes.block import WorkflowBlock
 
+if SAM3_3D_OBJECTS_ENABLED:
+    from inference.core.workflows.core_steps.models.foundation.segment_anything3_3d.v1 import (
+        SegmentAnything3_3D_ObjectsBlockV1,
+    )
+
 REGISTERED_INITIALIZERS = {
     "api_key": API_KEY,
     "cache": cache,
@@ -670,6 +676,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
         SegmentAnything2BlockV1,
         SegmentAnything3BlockV1,
         SegmentAnything3BlockV2,
+        SegmentAnything3_3D_ObjectsBlockV1,
         SegPreviewBlockV1,
         StabilityAIInpaintingBlockV1,
         StabilityAIImageGenBlockV1,