Cast mime type to str always in SDK tests; Fix failing tests and vision logic

hexbabe · hexbabe · commit 0669c03364e2 · 2025-08-28T12:02:29.000-04:00
diff --git a/docs/examples/example.ipynb b/docs/examples/example.ipynb
@@ -151,7 +151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "hide-output"
@@ -165,7 +165,7 @@
     "\n",
     "robot = await connect_with_channel()\n",
     "camera = Camera.from_robot(robot, \"camera0\")\n",
-    "image = await camera.get_image(CameraMimeType.JPEG)\n",
+    "image = await camera.get_image(CameraMimeType.JPEG.value)\n",
     "pil = viam_to_pil_image(image)\n",
     "pil.save(\"foo.png\")\n",
     "\n",
diff --git a/examples/server/v1/client.py b/examples/server/v1/client.py
@@ -34,7 +34,7 @@ async def client():
 
         print("\n#### CAMERA ####")
         camera = Camera.from_robot(robot, "camera0")
-        img = await camera.get_image(mime_type=CameraMimeType.PNG)
+        img = await camera.get_image(mime_type=CameraMimeType.PNG.value)
         assert isinstance(img, Image)
         img.show()
         await asyncio.sleep(1)
diff --git a/examples/server/v1/components.py b/examples/server/v1/components.py
@@ -322,7 +322,7 @@ def __init__(self, name: str):
         img = Image.open(p.parent.absolute().joinpath("viam.jpeg"))
         buf = BytesIO()
         img.copy().save(buf, format="JPEG")
-        self.image = ViamImage(buf.getvalue(), CameraMimeType.JPEG)
+        self.image = ViamImage(buf.getvalue(), CameraMimeType.JPEG.value)
         img.close()
         super().__init__(name)
 
diff --git a/src/viam/components/camera/camera.py b/src/viam/components/camera/camera.py
@@ -67,6 +67,9 @@ async def get_images(self, *, timeout: Optional[float] = None, **kwargs) -> Tupl
         """Get simultaneous images from different imagers, along with associated metadata.
         This should not be used for getting a time series of images from the same imager.
 
+        The extra parameter can be used to pass additional options to the camera resource. The filter_source_names parameter can be used to filter
+        only the images from the specified source names. When unspecified, all images are returned.
+
         ::
 
             my_camera = Camera.from_robot(robot=machine, name="my_camera")
diff --git a/src/viam/components/camera/client.py b/src/viam/components/camera/client.py
@@ -54,7 +54,11 @@ async def get_images(
         response: GetImagesResponse = await self.client.GetImages(request, timeout=timeout, metadata=md)
         imgs = []
         for img_data in response.images:
-            mime_type = CameraMimeType.from_proto(img_data.format)
+            if img_data.mime_type:
+                mime_type = img_data.mime_type
+            else:
+                # TODO(RSDK-11728): remove this once we deleted the format field
+                mime_type = CameraMimeType.from_proto(img_data.format).value
             img = NamedImage(img_data.source_name, img_data.image, mime_type)
             imgs.append(img)
         resp_metadata: ResponseMetadata = response.response_metadata
@@ -68,7 +72,7 @@ async def get_point_cloud(
         **kwargs,
     ) -> Tuple[bytes, str]:
         md = kwargs.get("metadata", self.Metadata()).proto
-        request = GetPointCloudRequest(name=self.name, mime_type=CameraMimeType.PCD, extra=dict_to_struct(extra))
+        request = GetPointCloudRequest(name=self.name, mime_type=CameraMimeType.PCD.value, extra=dict_to_struct(extra))
         response: GetPointCloudResponse = await self.client.GetPointCloud(request, timeout=timeout, metadata=md)
         return (response.point_cloud, response.mime_type)
 
diff --git a/src/viam/components/camera/service.py b/src/viam/components/camera/service.py
@@ -6,6 +6,7 @@
 from viam.proto.common import DoCommandRequest, DoCommandResponse, GetGeometriesRequest, GetGeometriesResponse
 from viam.proto.component.camera import (
     CameraServiceBase,
+    Format,
     GetImageRequest,
     GetImageResponse,
     GetImagesRequest,
@@ -20,6 +21,8 @@
 from viam.resource.rpc_service_base import ResourceRPCServiceBase
 from viam.utils import dict_to_struct, struct_to_dict
 
+from viam.media.video import CameraMimeType
+
 from . import Camera
 
 
@@ -51,9 +54,16 @@ async def GetImages(self, stream: Stream[GetImagesRequest, GetImagesResponse]) -
         images, metadata = await camera.get_images(timeout=timeout, metadata=stream.metadata)
         img_bytes_lst = []
         for img in images:
-            fmt = img.mime_type.to_proto()
+            # TODO(RSDK-11728): remove this try except logic once we deleted the format field
+            try:
+                mime_type = CameraMimeType.from_string(img.mime_type)  # this can ValueError if the mime_type is not a CameraMimeType
+                fmt = mime_type.to_proto()
+            except ValueError:
+                mime_type = img.mime_type
+                fmt = Format.FORMAT_UNSPECIFIED
+
             img_bytes = img.data
-            img_bytes_lst.append(Image(source_name=name, format=fmt, image=img_bytes))
+            img_bytes_lst.append(Image(source_name=name, format=fmt, mime_type=img.mime_type, image=img_bytes))
         response = GetImagesResponse(images=img_bytes_lst, response_metadata=metadata)
         await stream.send_message(response)
 
diff --git a/src/viam/media/utils/pil/__init__.py b/src/viam/media/utils/pil/__init__.py
@@ -25,7 +25,7 @@ def viam_to_pil_image(image: ViamImage) -> Image.Image:
     return Image.open(BytesIO(image.data), formats=LIBRARY_SUPPORTED_FORMATS)
 
 
-def pil_to_viam_image(image: Image.Image, mime_type: CameraMimeType) -> ViamImage:
+def pil_to_viam_image(image: Image.Image, mime_type: str) -> ViamImage:
     """
     Convert a PIL.Image to a ViamImage.
 
@@ -34,11 +34,16 @@ def pil_to_viam_image(image: Image.Image, mime_type: CameraMimeType) -> ViamImag
 
     Args:
         image (Image.Image): The image to convert.
-        mime_type (CameraMimeType): The mime type to convert the image to.
+        mime_type (str): The mime type to convert the image to. Must be of type `CameraMimeType`.
 
     Returns:
         ViamImage: The resulting ViamImage
     """
+    try:
+        mime_type = CameraMimeType.from_string(mime_type)
+    except ValueError as e:
+        raise ValueError(f"Unsupported mimetype str: {mime_type}") from e
+
     if mime_type.name in LIBRARY_SUPPORTED_FORMATS:
         buf = BytesIO()
         if image.mode == "RGBA" and mime_type == CameraMimeType.JPEG:
@@ -48,4 +53,4 @@ def pil_to_viam_image(image: Image.Image, mime_type: CameraMimeType) -> ViamImag
     else:
         raise ValueError(f"Cannot encode image to {mime_type}")
 
-    return ViamImage(data, mime_type)
+    return ViamImage(data, mime_type.value)
diff --git a/src/viam/media/video.py b/src/viam/media/video.py
@@ -17,6 +17,9 @@ class CameraMimeType(str, Enum):
     PNG = "image/png"
     PCD = "pointcloud/pcd"
 
+    def __str__(self) -> str:
+        return self.value
+
     @classmethod
     def from_string(cls, value: str) -> Self:
         """Return the mimetype from a string.
diff --git a/src/viam/services/vision/client.py b/src/viam/services/vision/client.py
@@ -69,7 +69,11 @@ async def capture_all_from_camera(
         result = CaptureAllResult()
         result.extra = struct_to_dict(response.extra)
         if return_image:
-            mime_type = CameraMimeType.from_proto(response.image.format)
+            # TODO(RSDK-11728): remove this branching logic once we deleted the format field
+            if response.image.mime_type:
+                mime_type = response.image.mime_type
+            else:
+                mime_type = CameraMimeType.from_proto(response.image.format).value
             img = ViamImage(response.image.image, mime_type)
             result.image = img
         if return_classifications:
@@ -102,7 +106,7 @@ async def get_detections(
         **kwargs,
     ) -> List[Detection]:
         md = kwargs.get("metadata", self.Metadata()).proto
-        mime_type = CameraMimeType.JPEG
+        mime_type = CameraMimeType.JPEG.value
 
         if image.width is None or image.height is None:
             raise ViamError(f"image {image} needs to have a specified width and height")
@@ -145,7 +149,7 @@ async def get_classifications(
     ) -> List[Classification]:
         md = kwargs.get("metadata", self.Metadata()).proto
 
-        mime_type = CameraMimeType.JPEG
+        mime_type = CameraMimeType.JPEG.value
         if image.width is None or image.height is None:
             raise ViamError(f"image {image} needs to have a specified width and height")
         request = GetClassificationsRequest(
@@ -172,7 +176,7 @@ async def get_object_point_clouds(
         request = GetObjectPointCloudsRequest(
             name=self.name,
             camera_name=camera_name,
-            mime_type=CameraMimeType.PCD,
+            mime_type=CameraMimeType.PCD.value,
             extra=dict_to_struct(extra),
         )
         response: GetObjectPointCloudsResponse = await self.client.GetObjectPointClouds(request, timeout=timeout, metadata=md)
diff --git a/src/viam/services/vision/service.py b/src/viam/services/vision/service.py
@@ -2,7 +2,7 @@
 
 from viam.media.video import CameraMimeType, ViamImage
 from viam.proto.common import DoCommandRequest, DoCommandResponse
-from viam.proto.component.camera import Image
+from viam.proto.component.camera import Format, Image
 from viam.proto.service.vision import (
     CaptureAllFromCameraRequest,
     CaptureAllFromCameraResponse,
@@ -36,7 +36,7 @@ class VisionRPCService(UnimplementedVisionServiceBase, ResourceRPCServiceBase):
     async def CaptureAllFromCamera(self, stream: Stream[CaptureAllFromCameraRequest, CaptureAllFromCameraResponse]) -> None:
         request = await stream.recv_message()
         assert request is not None
-        vision = self.get_resource(request.name)
+        vision: Vision = self.get_resource(request.name)
         extra = struct_to_dict(request.extra)
         timeout = stream.deadline.time_remaining() if stream.deadline else None
         result = await vision.capture_all_from_camera(
@@ -50,9 +50,15 @@ async def CaptureAllFromCamera(self, stream: Stream[CaptureAllFromCameraRequest,
         )
         img = None
         if result.image is not None:
-            fmt = result.image.mime_type.to_proto()
+            # TODO(RSDK-11728): remove this try except logic once we deleted the format field
+            try:
+                mime_type = CameraMimeType.from_string(result.image.mime_type)  # this can ValueError if mime_type is not a CameraMimeType
+                fmt = mime_type.to_proto()
+            except ValueError:
+                mime_type = result.image.mime_type
+                fmt = Format.FORMAT_UNSPECIFIED
             img_bytes = result.image.data
-            img = Image(source_name=request.camera_name, format=fmt, image=img_bytes)
+            img = Image(source_name=request.camera_name, mime_type=result.image.mime_type, format=fmt, image=img_bytes)
         response = CaptureAllFromCameraResponse(
             image=img,
             detections=result.detections,
diff --git a/tests/mocks/components.py b/tests/mocks/components.py
@@ -353,15 +353,15 @@ async def read() -> AsyncIterator[Tick]:
 
 class MockCamera(Camera):
     def __init__(self, name: str):
-        self.image = ViamImage(b"data", CameraMimeType.PNG)
+        self.image = ViamImage(b"data", CameraMimeType.PNG.value)
         self.geometries = GEOMETRIES
         self.point_cloud = b"THIS IS A POINT CLOUD"
         self.extra = None
         self.props = Camera.Properties(
             supports_pcd=False,
             intrinsic_parameters=IntrinsicParameters(width_px=1, height_px=2, focal_x_px=3, focal_y_px=4, center_x_px=5, center_y_px=6),
             distortion_parameters=DistortionParameters(model="no_distortion"),
-            mime_types=[CameraMimeType.PNG, CameraMimeType.JPEG],
+            mime_types=[CameraMimeType.PNG.value, CameraMimeType.JPEG.value],
             frame_rate=10.0,
         )
         self.timeout: Optional[float] = None
@@ -386,7 +386,7 @@ async def get_point_cloud(
     ) -> Tuple[bytes, str]:
         self.extra = extra
         self.timeout = timeout
-        return self.point_cloud, CameraMimeType.PCD
+        return self.point_cloud, CameraMimeType.PCD.value
 
     async def get_properties(self, *, timeout: Optional[float] = None, **kwargs) -> Camera.Properties:
         self.timeout = timeout
diff --git a/tests/test_camera.py b/tests/test_camera.py
@@ -40,7 +40,7 @@
 
 @pytest.fixture(scope="function")
 def image() -> ViamImage:
-    return ViamImage(b"data", CameraMimeType.PNG)
+    return ViamImage(b"data", CameraMimeType.PNG.value)
 
 
 @pytest.fixture(scope="function")
@@ -61,7 +61,7 @@ def properties() -> Camera.Properties:
         supports_pcd=False,
         intrinsic_parameters=IntrinsicParameters(width_px=1, height_px=2, focal_x_px=3, focal_y_px=4, center_x_px=5, center_y_px=6),
         distortion_parameters=DistortionParameters(model="no_distortion"),
-        mime_types=[CameraMimeType.PNG, CameraMimeType.JPEG],
+        mime_types=[CameraMimeType.PNG.value, CameraMimeType.JPEG.value],
         frame_rate=10.0,
     )
 
@@ -85,11 +85,11 @@ def generic_service(camera: Camera) -> GenericRPCService:
 
 class TestCamera:
     async def test_get_image(self, camera: MockCamera, image: ViamImage):
-        img = await camera.get_image(CameraMimeType.PNG)
+        img = await camera.get_image(CameraMimeType.PNG.value)
         assert img.data == image.data
         assert img.mime_type == image.mime_type
 
-        img = await camera.get_image(CameraMimeType.PNG, {"1": 1})
+        img = await camera.get_image(CameraMimeType.PNG.value, {"1": 1})
         assert camera.extra == {"1": 1}
 
     async def test_get_images(self, camera: Camera, image: ViamImage, metadata: ResponseMetadata):
@@ -139,9 +139,10 @@ async def test_get_image(self, camera: MockCamera, service: CameraRPCService, im
             client = CameraServiceStub(channel)
 
             # Test known mime type
-            request = GetImageRequest(name="camera", mime_type=CameraMimeType.PNG)
+            request = GetImageRequest(name="camera", mime_type=CameraMimeType.PNG.value)
             response: GetImageResponse = await client.GetImage(request, timeout=18.1)
             assert response.image == image.data
+            assert response.mime_type == CameraMimeType.PNG.value
             assert camera.timeout == loose_approx(18.1)
 
             # Test empty mime type. Empty mime type should default to response mime type
@@ -159,6 +160,7 @@ async def test_get_images(self, camera: MockCamera, service: CameraRPCService, m
             response: GetImagesResponse = await client.GetImages(request, timeout=18.1)
             raw_img = response.images[0]
             assert raw_img.format == Format.FORMAT_PNG
+            assert raw_img.mime_type == CameraMimeType.PNG.value
             assert raw_img.source_name == camera.name
             assert response.response_metadata == metadata
             assert camera.timeout == loose_approx(18.1)
@@ -167,17 +169,17 @@ async def test_render_frame(self, camera: MockCamera, service: CameraRPCService,
         assert camera.timeout is None
         async with ChannelFor([service]) as channel:
             client = CameraServiceStub(channel)
-            request = RenderFrameRequest(name="camera", mime_type=CameraMimeType.PNG)
+            request = RenderFrameRequest(name="camera", mime_type=CameraMimeType.PNG.value)
             response: HttpBody = await client.RenderFrame(request, timeout=4.4)
-            assert response.content_type == CameraMimeType.PNG
+            assert response.content_type == CameraMimeType.PNG.value
             assert response.data == image.data
             assert camera.timeout == loose_approx(4.4)
 
     async def test_get_point_cloud(self, camera: MockCamera, service: CameraRPCService, point_cloud: bytes):
         assert camera.timeout is None
         async with ChannelFor([service]) as channel:
             client = CameraServiceStub(channel)
-            request = GetPointCloudRequest(name="camera", mime_type=CameraMimeType.PCD)
+            request = GetPointCloudRequest(name="camera", mime_type=CameraMimeType.PCD.value)
             response: GetPointCloudResponse = await client.GetPointCloud(request, timeout=7.86)
             assert response.point_cloud == point_cloud
             assert camera.timeout == loose_approx(7.86)
@@ -217,7 +219,7 @@ async def test_get_image(self, camera: MockCamera, service: CameraRPCService, im
         async with ChannelFor([service]) as channel:
             client = CameraClient("camera", channel)
 
-            img = await client.get_image(timeout=1.82, mime_type=CameraMimeType.PNG)
+            img = await client.get_image(timeout=1.82, mime_type=CameraMimeType.PNG.value)
             assert img.data == image.data
             assert img.mime_type == image.mime_type
 
diff --git a/tests/test_media.py b/tests/test_media.py
diff --git a/tests/test_vision_service.py b/tests/test_vision_service.py