11import base64
2- import io
32from pathlib import Path
4- from typing import Optional , Tuple , Union
3+ from typing import List , Optional , Tuple , Union
54
65import cv2
76import numpy as np
@@ -129,22 +128,40 @@ def scale_detections(detections: sv.Detections, in_shape: tuple, out_shape: tupl
129128
130129
131130def base64mask_to_mask (base64mask : str ) -> np .ndarray :
132- return np .array (Image .open (io .BytesIO (base64 .b64decode (base64mask ))))
131+ """
132+ Convert a base64-encoded mask to a binary mask using OpenCV.
133+
134+ Args:
135+ base64mask (str): Base64-encoded string representing the mask.
133136
137+ Returns:
138+ np.ndarray: Decoded binary mask as a NumPy array.
139+ """
140+ # Decode the base64 string to bytes and convert to a NumPy array in one step
141+ np_arr = np .frombuffer (base64 .b64decode (base64mask ), np .uint8 )
142+ # Decode the NumPy array to an image using OpenCV and convert to a binary mask in one step
143+ binary_mask = cv2 .imdecode (np_arr , cv2 .IMREAD_GRAYSCALE ) > 0
144+ return binary_mask .astype (bool )
134145
135- def focoos_detections_to_supervision (
136- inference_output : FocoosDetections ,
137- ) -> sv .Detections :
146+
147+ def fai_detections_to_sv (inference_output : FocoosDetections , im0_shape : tuple ) -> sv .Detections :
138148 xyxy = np .array ([d .bbox if d .bbox is not None else np .empty (4 ) for d in inference_output .detections ])
139149 class_id = np .array ([d .cls_id for d in inference_output .detections ])
140150 confidence = np .array ([d .conf for d in inference_output .detections ])
141151 if xyxy .shape [0 ] == 0 :
142152 xyxy = np .empty ((0 , 4 ))
143153 _masks = []
144- for det in inference_output .detections :
145- if det .mask :
146- mask = base64mask_to_mask (det .mask )
147- _masks .append (mask )
154+ if len (inference_output .detections ) > 0 and inference_output .detections [0 ].mask :
155+ _masks = [np .zeros (im0_shape , dtype = bool ) for _ in inference_output .detections ]
156+ for i , det in enumerate (inference_output .detections ):
157+ if det .mask :
158+ mask = base64mask_to_mask (det .mask )
159+ if det .bbox is not None and not np .array_equal (det .bbox , [0 , 0 , 0 , 0 ]):
160+ x1 , y1 , x2 , y2 = map (int , det .bbox )
161+ y2 , x2 = min (y2 , _masks [i ].shape [0 ]), min (x2 , _masks [i ].shape [1 ])
162+ _masks [i ][y1 :y2 , x1 :x2 ] = mask [: y2 - y1 , : x2 - x1 ]
163+ else :
164+ _masks [i ] = mask
148165 masks = np .array (_masks ).astype (bool ) if len (_masks ) > 0 else None
149166 return sv .Detections (
150167 xyxy = xyxy ,
@@ -156,7 +173,7 @@ def focoos_detections_to_supervision(
156173
157174def binary_mask_to_base64 (binary_mask : np .ndarray ) -> str :
158175 """
159- Converts a binary mask (NumPy array) to a base64-encoded PNG image.
176+ Converts a binary mask (NumPy array) to a base64-encoded PNG image using OpenCV .
160177
161178 This function takes a binary mask, where values of `True` represent the areas of interest (usually 1s)
162179 and `False` represents the background (usually 0s). The binary mask is then converted to an image,
@@ -168,23 +185,19 @@ def binary_mask_to_base64(binary_mask: np.ndarray) -> str:
168185 Returns:
169186 str: A base64-encoded string representing the PNG image of the binary mask.
170187 """
171- # Convert the binary mask to uint8 type, then multiply by 255 to set True values to 255 (white)
172- # and False values to 0 (black).
173- binary_mask = binary_mask .astype (np .uint8 ) * 255
174-
175- # Create a PIL image from the NumPy array
176- binary_mask_image = Image .fromarray (binary_mask )
188+ # Directly convert the binary mask to uint8 and multiply by 255 in one step
189+ binary_mask = (binary_mask * 255 ).astype (np .uint8 )
177190
178- # Save the image to an in-memory buffer as PNG
179- with io .BytesIO () as buffer :
180- binary_mask_image .save (buffer , bitmap_format = "png" , format = "PNG" )
181- # Get the PNG image in binary form and encode it to base64
182- encoded_png = base64 .b64encode (buffer .getvalue ()).decode ("utf-8" )
191+ # Use OpenCV to encode the image as PNG
192+ success , encoded_image = cv2 .imencode (".png" , binary_mask )
193+ if not success :
194+ raise ValueError ("Failed to encode image" )
183195
184- return encoded_png
196+ # Encode the image to base64
197+ return base64 .b64encode (encoded_image ).decode ("utf-8" )
185198
186199
187- def sv_to_focoos_detections (detections : sv .Detections , classes : Optional [list [str ]] = None ) -> FocoosDetections :
200+ def sv_to_fai_detections (detections : sv .Detections , classes : Optional [list [str ]] = None ) -> List [ FocoosDet ] :
188201 """
189202 Convert a list of detections from the supervision format to Focoos detection format.
190203
@@ -213,12 +226,44 @@ def sv_to_focoos_detections(detections: sv.Detections, classes: Optional[list[st
213226 """
214227 res = []
215228 for xyxy , mask , conf , cls_id , _ , _ in detections :
229+ if mask is not None :
230+ cropped_mask = mask [int (xyxy [1 ]) : int (xyxy [3 ]), int (xyxy [0 ]) : int (xyxy [2 ])]
231+ mask = binary_mask_to_base64 (cropped_mask )
216232 det = FocoosDet (
217233 cls_id = int (cls_id ) if cls_id is not None else None ,
218- bbox = [round ( float ( x ), 2 ) for x in xyxy ],
219- mask = binary_mask_to_base64 ( mask ) if mask is not None else None ,
234+ bbox = [int ( x ) for x in xyxy ],
235+ mask = mask ,
220236 conf = round (float (conf ), 2 ) if conf is not None else None ,
221237 label = (classes [cls_id ] if classes is not None and cls_id is not None else None ),
222238 )
223239 res .append (det )
224- return FocoosDetections (detections = res )
240+ return res
241+
242+
243+ def mask_to_xyxy (masks : np .ndarray ) -> np .ndarray :
244+ """
245+ Converts a 3D `np.array` of 2D bool masks into a 2D `np.array` of bounding boxes.
246+
247+ Parameters:
248+ masks (np.ndarray): A 3D `np.array` of shape `(N, W, H)`
249+ containing 2D bool masks
250+
251+ Returns:
252+ np.ndarray: A 2D `np.array` of shape `(N, 4)` containing the bounding boxes
253+ `(x_min, y_min, x_max, y_max)` for each mask
254+ """
255+ # Vectorized approach to find bounding boxes
256+ n = masks .shape [0 ]
257+ xyxy = np .zeros ((n , 4 ), dtype = int )
258+
259+ # Use np.any to quickly find rows and columns with True values
260+ for i , mask in enumerate (masks ):
261+ rows = np .any (mask , axis = 1 )
262+ cols = np .any (mask , axis = 0 )
263+
264+ if np .any (rows ) and np .any (cols ):
265+ y_min , y_max = np .where (rows )[0 ][[0 , - 1 ]]
266+ x_min , x_max = np .where (cols )[0 ][[0 , - 1 ]]
267+ xyxy [i , :] = [x_min , y_min , x_max , y_max ]
268+
269+ return xyxy
0 commit comments