Merge branch 'main' into feat/dg-154-expose-sam3-visual-segment-endpoint

grzegorz-roboflow · web-flow · commit 89c7d4e7c188 · 2026-02-12T10:26:05.000+01:00
diff --git a/inference/core/interfaces/webrtc_worker/utils.py b/inference/core/interfaces/webrtc_worker/utils.py
@@ -41,6 +41,9 @@ def detect_image_output(
 def process_frame(
     frame: VideoFrame,
     frame_id: int,
+    declared_fps: float,
+    measured_fps: float,
+    comes_from_video_file: bool,
     inference_pipeline: InferencePipeline,
     stream_output: Optional[str] = None,
     render_output: bool = True,
@@ -59,9 +62,9 @@ def process_frame(
             image=np_image,
             frame_id=frame_id,
             frame_timestamp=datetime.datetime.now(),
-            comes_from_video_file=False,
-            fps=30,  # placeholder
-            measured_fps=30,  # placeholder
+            comes_from_video_file=comes_from_video_file,
+            fps=declared_fps,
+            measured_fps=measured_fps,
         )
         workflow_output = inference_pipeline._on_video_frame([video_frame])[0]
     except Exception as e:
diff --git a/inference/core/interfaces/webrtc_worker/webrtc.py b/inference/core/interfaces/webrtc_worker/webrtc.py
@@ -231,6 +231,7 @@ def __init__(
         realtime_processing: bool = True,
         is_preview: bool = False,
     ):
+        self._file_processing = False
         self._loop = asyncio_loop
         self._termination_date = termination_date
         self._terminate_event = terminate_event
@@ -620,6 +621,9 @@ async def _process_frame_async(
             process_frame,
             frame,
             frame_id,
+            self._declared_fps,
+            self._declared_fps,  # TODO: measure fps
+            self._file_processing,
             self._inference_pipeline,
             stream_output,
             render_output,
@@ -1140,6 +1144,7 @@ async def on_upload_message(message):
                     None, process_video_upload_message, message, video_processor
                 )
                 if video_path:
+                    video_processor._file_processing = True
                     logger.info(
                         "Video upload complete, processing: realtime=%s, path=%s",
                         webrtc_request.webrtc_realtime_processing,
diff --git a/inference/core/workflows/core_steps/transformations/camera_calibration/v1.py b/inference/core/workflows/core_steps/transformations/camera_calibration/v1.py
@@ -9,6 +9,7 @@
     WorkflowImageData,
 )
 from inference.core.workflows.execution_engine.entities.types import (
+    BOOLEAN_KIND,
     FLOAT_KIND,
     IMAGE_KIND,
     Selector,
@@ -153,6 +154,14 @@ class BlockManifest(WorkflowBlockManifest):
         description="Second tangential distortion coefficient. Part of the camera's distortion parameters used to correct additional tangential distortion effects. p2 works together with p1 to correct lens misalignment distortions. Obtained through camera calibration. For well-aligned lenses, p1 and p2 are often close to zero.",
         examples=[0.123, "$inputs.p2"],
     )
+    use_fisheye_model: Union[
+        Optional[bool],
+        Selector(kind=[BOOLEAN_KIND]),
+    ] = Field(
+        default=False,
+        description="Enable Fisheye distortion model (Rational/Divisional). If true, uses a different mathematical model better suited for fisheye lenses. When enabled, k1 is the primary parameter, and other coefficients are typically 0.",
+        examples=[True, "$inputs.use_fisheye_model"],
+    )
 
     @classmethod
     def describe_outputs(cls) -> List[OutputDefinition]:
@@ -183,6 +192,7 @@ def run(
         k3: float,
         p1: float,
         p2: float,
+        use_fisheye_model: bool = False,
     ) -> BlockResult:
         return {
             OUTPUT_CALIBRATED_IMAGE_KEY: remove_distortions(
@@ -196,6 +206,7 @@ def run(
                 k3=k3,
                 p1=p1,
                 p2=p2,
+                use_fisheye_model=use_fisheye_model,
             )
         }
 
@@ -211,12 +222,47 @@ def remove_distortions(
     k3: float,
     p1: float,
     p2: float,
+    use_fisheye_model: bool = False,
 ) -> Optional[WorkflowImageData]:
     img = image.numpy_image
     h, w = img.shape[:2]
 
-    cameraMatrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float64)
-    distCoeffs = np.array([k1, k2, p1, p2, k3], dtype=np.float64)
+    if use_fisheye_model:
+        # 1. Generate grid for the destination (undistorted) image
+        grid_y, grid_x = np.mgrid[0:h, 0:w].astype(np.float32)
+
+        # 2. Normalize coordinates using original matrix (results in a center crop)
+        x = (grid_x - cx) / fx
+        y = (grid_y - cy) / fy
+        r2 = x**2 + y**2
+
+        # 3. Apply the SimpleDivisional Distortion formula (matches GeoCalib)
+        if abs(k1) < 1e-8:
+            scale = 1.0
+        else:
+            # Formula: p_dist = p_undist * (1 - sqrt(1 - 4*k1*r2)) / (2*k1*r2)
+            discriminant = 1 - 4 * k1 * r2
+            # Clamp to 0 to avoid NaNs
+            discriminant[discriminant < 0] = 0
+            scale = (1 - np.sqrt(discriminant)) / (2 * k1 * r2)
+            # Handle center pixel or very small r2 to avoid division by zero / instability
+            scale[np.abs(r2) < 1e-8] = 1.0
+
+        # 4. Map back to Source (distorted) pixels
+        map_x = (x * scale * fx) + cx
+        map_y = (y * scale * fy) + cy
+
+        # 5. Remap using OpenCV
+        dst = cv.remap(
+            img, map_x.astype(np.float32), map_y.astype(np.float32), cv.INTER_LINEAR
+        )
+        return WorkflowImageData(
+            parent_metadata=image.parent_metadata,
+            numpy_image=dst,
+        )
+
+    cameraMatrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
+    distCoeffs = np.array([k1, k2, p1, p2, k3], dtype=np.float32)
 
     # https://docs.opencv.org/4.11.0/d9/d0c/group__calib3d.html#ga7a6c4e032c97f03ba747966e6ad862b1
     newcameramtx, roi = cv.getOptimalNewCameraMatrix(