1717 process_transformers_v4_segmentation_result ,
1818 process_transformers_v5_segmentation_result ,
1919)
20- from supervision .detection .utils .converters import mask_to_xyxy , xywh_to_xyxy
20+ from supervision .detection .utils .converters import (
21+ mask_to_xyxy ,
22+ polygon_to_mask ,
23+ xywh_to_xyxy ,
24+ )
2125from supervision .detection .utils .internal import (
2226 extract_ultralytics_masks ,
2327 get_data_item ,
5256)
5357from supervision .geometry .core import Position
5458from supervision .utils .internal import deprecated , get_instance_variables
55- from supervision .validators import validate_detections_fields
59+ from supervision .validators import validate_detections_fields , validate_resolution
5660
5761
5862@dataclass
@@ -280,9 +284,11 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
280284 xyxy = ultralytics_results .obb .xyxy .cpu ().numpy (),
281285 confidence = ultralytics_results .obb .conf .cpu ().numpy (),
282286 class_id = class_id ,
283- tracker_id = ultralytics_results .obb .id .int ().cpu ().numpy ()
284- if ultralytics_results .obb .id is not None
285- else None ,
287+ tracker_id = (
288+ ultralytics_results .obb .id .int ().cpu ().numpy ()
289+ if ultralytics_results .obb .id is not None
290+ else None
291+ ),
286292 data = {
287293 ORIENTED_BOX_COORDINATES : oriented_box_coordinates ,
288294 CLASS_NAME_DATA_FIELD : class_names ,
@@ -308,9 +314,11 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
308314 confidence = ultralytics_results .boxes .conf .cpu ().numpy (),
309315 class_id = class_id ,
310316 mask = extract_ultralytics_masks (ultralytics_results ),
311- tracker_id = ultralytics_results .boxes .id .int ().cpu ().numpy ()
312- if ultralytics_results .boxes .id is not None
313- else None ,
317+ tracker_id = (
318+ ultralytics_results .boxes .id .int ().cpu ().numpy ()
319+ if ultralytics_results .boxes .id is not None
320+ else None
321+ ),
314322 data = {CLASS_NAME_DATA_FIELD : class_names },
315323 )
316324
@@ -464,9 +472,11 @@ def from_mmdetection(cls, mmdet_results) -> Detections:
464472 xyxy = mmdet_results .pred_instances .bboxes .cpu ().numpy (),
465473 confidence = mmdet_results .pred_instances .scores .cpu ().numpy (),
466474 class_id = mmdet_results .pred_instances .labels .cpu ().numpy ().astype (int ),
467- mask = mmdet_results .pred_instances .masks .cpu ().numpy ()
468- if "masks" in mmdet_results .pred_instances
469- else None ,
475+ mask = (
476+ mmdet_results .pred_instances .masks .cpu ().numpy ()
477+ if "masks" in mmdet_results .pred_instances
478+ else None
479+ ),
470480 )
471481
472482 @classmethod
@@ -584,9 +594,11 @@ class IDs, and confidences of the predictions.
584594 return cls (
585595 xyxy = detectron2_results ["instances" ].pred_boxes .tensor .cpu ().numpy (),
586596 confidence = detectron2_results ["instances" ].scores .cpu ().numpy (),
587- mask = detectron2_results ["instances" ].pred_masks .cpu ().numpy ()
588- if hasattr (detectron2_results ["instances" ], "pred_masks" )
589- else None ,
597+ mask = (
598+ detectron2_results ["instances" ].pred_masks .cpu ().numpy ()
599+ if hasattr (detectron2_results ["instances" ], "pred_masks" )
600+ else None
601+ ),
590602 class_id = detectron2_results ["instances" ]
591603 .pred_classes .cpu ()
592604 .numpy ()
@@ -687,6 +699,119 @@ def from_sam(cls, sam_result: list[dict]) -> Detections:
687699 xyxy = xywh_to_xyxy (xywh = xywh )
688700 return cls (xyxy = xyxy , mask = mask )
689701
702+ @classmethod
703+ def from_sam3 (
704+ cls , sam3_result : dict | Any , resolution_wh : tuple [int , int ]
705+ ) -> Detections :
706+ """
707+ Creates a Detections instance from
708+ [SAM 3](https://github.com/facebookresearch/sam3) inference result.
709+
710+ Args:
711+ sam3_result (dict | Any): The output result from SAM 3 inference,
712+ either Sam3PromptResult from inference package or dict containing
713+ prompt_results with polygon predictions.
714+ resolution_wh (Tuple[int, int]): The width and height of the image
715+ used for mask generation.
716+
717+ Returns:
718+ Detections: A new Detections object.
719+ The `class_id` field contains the prompt index for each polygon.
720+
721+ Example:
722+ ```python
723+ import cv2
724+ import supervision as sv
725+ from inference.models.sam3 import SegmentAnything3
726+ from inference.core.entities.requests.sam3 import Sam3Prompt
727+
728+ image = cv2.imread("<SOURCE_IMAGE_PATH>")
729+ model = SegmentAnything3(
730+ model_id="sam3/sam3_final",
731+ api_key="<ROBOFLOW_API_KEY>"
732+ )
733+
734+ prompts = [
735+ Sam3Prompt(type="text", text="car"),
736+ Sam3Prompt(type="text", text="tire"),
737+ ]
738+
739+ result = model.segment_image(
740+ image=image,
741+ prompts=prompts,
742+ output_prob_thresh=0.5,
743+ format="polygon"
744+ )
745+
746+ height, width = image.shape[:2]
747+ detections = sv.Detections.from_sam3(
748+ sam3_result=result,
749+ resolution_wh=(width, height)
750+ )
751+ ```
752+ """
753+ width , height = validate_resolution (resolution_wh )
754+
755+ masks = []
756+ confidences = []
757+ class_ids = []
758+
759+ if isinstance (sam3_result , dict ):
760+ prompt_results = sam3_result .get ("prompt_results" , [])
761+ else :
762+ prompt_results = getattr (sam3_result , "prompt_results" , [])
763+
764+ for i , prompt_result in enumerate (prompt_results ):
765+ if isinstance (prompt_result , dict ):
766+ predictions = prompt_result .get ("predictions" , [])
767+ prompt_index = prompt_result .get ("prompt_index" , i )
768+ else :
769+ predictions = getattr (prompt_result , "predictions" , [])
770+ prompt_index = getattr (prompt_result , "prompt_index" , i )
771+
772+ for prediction in predictions :
773+ if isinstance (prediction , dict ):
774+ prediction_format = prediction .get ("format" )
775+ if prediction_format and prediction_format != "polygon" :
776+ continue
777+ pred_masks = prediction .get ("masks" , [])
778+ confidence = prediction .get ("confidence" , 1.0 )
779+ else :
780+ prediction_format = getattr (prediction , "format" , None )
781+ if prediction_format and prediction_format != "polygon" :
782+ continue
783+ pred_masks = getattr (prediction , "masks" , [])
784+ confidence = getattr (prediction , "confidence" , 1.0 )
785+
786+ if not pred_masks :
787+ continue
788+
789+ full_mask = np .zeros ((height , width ), dtype = bool )
790+ for poly in pred_masks :
791+ polygon = np .array (poly , dtype = np .int32 )
792+ mask = polygon_to_mask (
793+ polygon = polygon , resolution_wh = (width , height )
794+ )
795+ mask = mask .astype (bool , copy = False )
796+ np .logical_or (full_mask , mask , out = full_mask )
797+
798+ masks .append (full_mask )
799+ confidences .append (confidence )
800+ class_ids .append (prompt_index )
801+
802+ if not masks :
803+ return cls .empty ()
804+
805+ masks_np = np .stack (masks , axis = 0 )
806+ xyxy = mask_to_xyxy (masks_np )
807+
808+ return cls (
809+ xyxy = xyxy .astype (np .float32 ),
810+ mask = masks_np ,
811+ confidence = np .array (confidences , dtype = np .float32 ),
812+ class_id = np .array (class_ids , dtype = int ),
813+ )
814+
690815 @classmethod
691816 def from_azure_analyze_image (
692817 cls , azure_result : dict , class_map : dict [int , str ] | None = None
0 commit comments