openfoodfacts · abderhman487 · Apr 12, 2025
@@ -125,6 +125,9 @@ Postprocessed entities contain the following fields:
 - `char_start`: the character start index of the entity, with respect to the original OCR JSON
 - `char_end` : the character end index of the entity, with respect to the original OCR JSON
 - `valid`: whether the extracted entity is valid. We consider an entity invalid if we couldn't extract nutrient value from the `text` field, or if there are more than one entity for a single nutrient. For example, two `proteins_100g` entities are both considered invalid, but one `proteins_100g` and one `proteins_serving` are considered valid.
+- `bounding_box_absolute`: the bounding box coordinates of the entity in absolute pixel values (y_min, x_min, y_max, x_max).
+
+In addition to these entity fields, the prediction also includes a `bounding_box` field, which represents the bounding box coordinates for the whole nutrition area calculated from all the entities.
 
 ### Integration
 
@@ -133,6 +136,16 @@ If some entities were detected, we create a `Prediction` in DB using the usual i
 
 We only create an insight if we detected at least one nutrient value that is not in the product nutrients [^nutrient_extraction_import].
 
+### Validation
+
+When validating nutrition extraction insights, users can:
+
+1. Accept or reject the extracted nutrient values
+2. Modify the extracted values if needed
+3. Propose a new bounding box for the nutrition area, which will be used for cropping the image
+
+This bounding box information is stored with the insight data and used for visualization and future image cropping.
+
 [^other_nutrient_detection]: Using a fixed set of classes is not the best approach when we have many classes. It however allows us to use LayoutLM architecture, which is very performant for this task, even when the nutrition table is hard to read due to packaging deformations or alterations. To detect the long-tail of nutrients, approaches using graph-based approach, where we would map a nutrient mention to its value, could be explored in the future.
 
 [^predict_function]: In `robotoff.prediction.nutrition_extraction` module

@@ -372,9 +372,29 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
         )
 
         update = req.get_param_as_bool("update", default=True)
-        # This field is only needed for nutritional table structure insights.
+        # This field is only needed for nutritional table structure insights
+        # or nutrient extraction insights.
         data: JSONType | None = req.get_param_as_json("data")
 
+        # Check if user is providing a new bounding box
+        bounding_box = req.get_param_as_json("bounding_box")
+
+        # Validate bounding box format if provided
+        if bounding_box is not None:
+            required_keys = ["y_min", "x_min", "y_max", "x_max"]
+            if not all(key in bounding_box for key in required_keys):
+                raise falcon.HTTPBadRequest(
+                    description=f"bounding_box must contain all of: {', '.join(required_keys)}"
+                )
+            # Validate that coordinates make sense
+            if (
+                bounding_box["y_min"] >= bounding_box["y_max"]
+                or bounding_box["x_min"] >= bounding_box["x_max"]
+            ):
+                raise falcon.HTTPBadRequest(
+                    description="Invalid bounding box coordinates: min values must be less than max values"
+                )
+
         if annotation == 2:
             if data is None:
                 raise falcon.HTTPBadRequest(
@@ -410,6 +430,7 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
             insight_id,
         )
 
+        # If bounding box is provided, we'll pass it separately
         annotation_result = save_annotation(
             insight_id,
             annotation,
@@ -418,6 +439,7 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
             auth=auth,
             device_id=device_id,
             trusted_annotator=trusted_annotator,
+            bounding_box=bounding_box,
         )
 
         resp.media = {
@@ -1851,6 +1873,80 @@ def custom_handle_uncaught_exception(
     raise falcon.HTTPInternalServerError(description=str(ex))
 
 
+class NutritionImageCropResource:
+    def on_get(self, req: falcon.Request, resp: falcon.Response):
+        """Return a cropped nutrition image using the bounding box information.
+
+        This endpoint can be used to get cropped nutrition images for visualization
+        during validation or for display purposes.
+        """
+        image_url = req.get_param("image_url", required=True)
+
+        # Get bounding box directly from parameters or from insight
+        bounding_box = req.get_param_as_json("bounding_box")
+        insight_id = req.get_param_as_uuid("insight_id")
+
+        if bounding_box is None and insight_id is None:
+            raise falcon.HTTPBadRequest(
+                description="Either bounding_box or insight_id must be provided"
+            )
+
+        if bounding_box is not None and insight_id is not None:
+            raise falcon.HTTPBadRequest(
+                description="Only one of bounding_box or insight_id should be provided"
+            )
+
+        # Get bounding box from insight if insight_id is provided
+        if insight_id is not None:
+            try:
+                insight = ProductInsight.get_by_id(insight_id)
+                if insight.bounding_box is None:
+                    raise falcon.HTTPBadRequest(
+                        description=f"No bounding box found for insight {insight_id}"
+                    )
+                bounding_box = insight.bounding_box
+            except ProductInsight.DoesNotExist:
+                raise falcon.HTTPNotFound(
+                    description=f"Insight with ID {insight_id} not found"
+                )
+
+        # Validate bounding box format
+        required_keys = ["x_min", "y_min", "x_max", "y_max"]
+        if not all(key in bounding_box for key in required_keys):
+            raise falcon.HTTPBadRequest(
+                description=f"bounding_box must contain all of: {', '.join(required_keys)}"
+            )
+
+        try:
+            # Download the image
+            response = requests.get(image_url, stream=True)
+            response.raise_for_status()
+
+            # Create a temporary file for the image
+            with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp_file:
+                # Save the image to the temporary file
+                for chunk in response.iter_content(chunk_size=8192):
+                    tmp_file.write(chunk)
+                tmp_file.flush()
+
+                # Crop the image using the bounding box
+                from robotoff.images import crop_nutrition_image
+
+                cropped_image = crop_nutrition_image(tmp_file.name, bounding_box)
+
+                # Serve the cropped image
+                image_response(cropped_image, resp)
+
+        except requests.exceptions.RequestException as e:
+            raise falcon.HTTPBadRequest(
+                description=f"Error downloading image: {str(e)}"
+            )
+        except Exception as e:
+            raise falcon.HTTPInternalServerError(
+                description=f"Error cropping image: {str(e)}"
+            )
+
+
 api = falcon.App(
     middleware=[
         falcon.CORSMiddleware(allow_origins="*", allow_credentials="*"),
@@ -1888,6 +1984,7 @@ def custom_handle_uncaught_exception(
 api.add_route("/api/v1/products/dataset", UpdateDatasetResource())
 api.add_route("/api/v1/images", ImageCollection())
 api.add_route("/api/v1/images/crop", ImageCropResource())
+api.add_route("/api/v1/images/nutrition/crop", NutritionImageCropResource())
 api.add_route("/api/v1/image_predictions", ImagePredictionResource())
 api.add_route("/api/v1/image_predictions/import", ImagePredictionImporterResource())
 api.add_route("/api/v1/images/predict", ImagePredictorResource())

@@ -370,6 +370,7 @@ def save_annotation(
     data: Optional[dict] = None,
     auth: Optional[OFFAuthentication] = None,
     trusted_annotator: bool = False,
+    bounding_box: Optional[dict] = None,
 ) -> AnnotationResult:
     """Saves annotation either by using a single response as ground truth or
     by using several responses.
@@ -394,8 +395,9 @@ def save_annotation(
     :param auth: User authentication data, it is expected to be None if
         `trusted_annotator=False` (=anonymous vote)
     :param trusted_annotator: Defines whether the given annotation comes from
-    an authoritative source (e.g. a trusted user), ot whether the annotation
-    should be subject to the voting system.
+      an authoritative source (e.g. a trusted user), ot whether the annotation
+      should be subject to the voting system.
+    :param bounding_box: Optional bounding box coordinates for nutrition insights
     """
     try:
         insight: Union[ProductInsight, None] = ProductInsight.get_by_id(insight_id)
@@ -408,6 +410,10 @@ def save_annotation(
     if insight.annotation is not None:
         return ALREADY_ANNOTATED_RESULT
 
+    # Update bounding box if provided
+    if bounding_box is not None and trusted_annotator:
+        insight.bounding_box = bounding_box
+
     # We use AnnotationVote mechanism to save annotation = -1 (ignore) for
     # authenticated users, so that it's not returned again to the user
     if not trusted_annotator or annotation == -1:

@@ -295,3 +295,20 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]):
         deleted_embeddings_total,
         deleted_logos_total,
     )
+
+
+def crop_nutrition_image(image_path: str, bounding_box: dict) -> Image.Image:
+    """Crop an image using the nutrition bounding box.
+
+    :param image_path: Path to the image file to crop
+    :param bounding_box: Dictionary with x_min, y_min, x_max, y_max coordinates
+    :return: Cropped image
+    """
+    img = Image.open(image_path)
+    crop_box = (
+        bounding_box["x_min"],
+        bounding_box["y_min"],
+        bounding_box["x_max"],
+        bounding_box["y_max"],
+    )
+    return img.crop(crop_box)
@@ -747,6 +747,11 @@ def process_annotation(
                     description=str(e),
                 )
             validated_nutrients = cls.add_default_unit(validated_nutrients)
+
+            # Save custom bounding box if provided in the validation data
+            if "bounding_box" in data:
+                insight.bounding_box = data["bounding_box"]
+
             # We override the predicted nutrient values by the ones submitted by the
             # user
             insight.data["annotation"] = validated_nutrients.model_dump()
@@ -841,14 +846,22 @@ def select_nutrition_image(
 
         rotation = get_image_rotation(insight.source_image)
 
-        nutrition_table_detections = get_nutrition_table_prediction(
-            insight.source_image, threshold=0.5
-        )
+        # Use bounding box from insight if available
         bounding_box = None
-        # Only crop according to the model predicted bounding box if there is exactly
-        # one nutrition table detected
-        if nutrition_table_detections and len(nutrition_table_detections) == 1:
-            bounding_box = nutrition_table_detections[0]["bounding_box"]
+        if insight.bounding_box is not None:
+            # Convert from database format to the format expected by
+            # convert_crop_bounding_box
+            bb = insight.bounding_box
+            bounding_box = (bb["y_min"], bb["x_min"], bb["y_max"], bb["x_max"])
+        else:
+            # If no bounding box in insight, use nutrition table detector
+            nutrition_table_detections = get_nutrition_table_prediction(
+                insight.source_image, threshold=0.5
+            )
+            # Only crop according to the model predicted bounding box if there is
+            # exactly one nutrition table detected
+            if nutrition_table_detections and len(nutrition_table_detections) == 1:
+                bounding_box = nutrition_table_detections[0]["bounding_box"]
 
         crop_bounding_box: tuple[float, float, float, float] | None = None
         if bounding_box:

@@ -1564,7 +1564,25 @@ def generate_candidates(
 
         for prediction in predictions:
             if cls.keep_prediction(product, list(prediction.data["nutrients"].keys())):
-                yield ProductInsight(**prediction.to_dict())
+                # Create dictionary from prediction
+                prediction_data = prediction.to_dict()
+
+                # Store bounding box directly in the insight model field
+                bounding_box = None
+                if "bounding_box" in prediction.data:
+                    # Convert tuple to dictionary format expected by the database
+                    y_min, x_min, y_max, x_max = prediction.data["bounding_box"]
+                    bounding_box = {
+                        "y_min": y_min,
+                        "x_min": x_min,
+                        "y_max": y_max,
+                        "x_max": x_max,
+                    }
+
+                # Create the insight with the bounding box field
+                insight = ProductInsight(**prediction_data)
+                insight.bounding_box = bounding_box
+                yield insight
 
     @staticmethod
     def keep_prediction(product: Product | None, nutrients_keys: list[str]) -> bool: