Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/references/predictions/nutrient-extraction.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ Postprocessed entities contain the following fields:
- `char_start`: the character start index of the entity, with respect to the original OCR JSON
- `char_end` : the character end index of the entity, with respect to the original OCR JSON
- `valid`: whether the extracted entity is valid. We consider an entity invalid if we couldn't extract nutrient value from the `text` field, or if there are more than one entity for a single nutrient. For example, two `proteins_100g` entities are both considered invalid, but one `proteins_100g` and one `proteins_serving` are considered valid.
- `bounding_box_absolute`: the bounding box coordinates of the entity in absolute pixel values (y_min, x_min, y_max, x_max).

In addition to these entity fields, the prediction also includes a `bounding_box` field, which represents the bounding box coordinates for the whole nutrition area calculated from all the entities.

### Integration

Expand All @@ -133,6 +136,16 @@ If some entities were detected, we create a `Prediction` in DB using the usual i

We only create an insight if we detected at least one nutrient value that is not in the product nutrients [^nutrient_extraction_import].

### Validation

When validating nutrition extraction insights, users can:

1. Accept or reject the extracted nutrient values
2. Modify the extracted values if needed
3. Propose a new bounding box for the nutrition area, which will be used for cropping the image

This bounding box information is stored with the insight data and used for visualization and future image cropping.

[^other_nutrient_detection]: Using a fixed set of classes is not the best approach when we have many classes. It however allows us to use LayoutLM architecture, which is very performant for this task, even when the nutrition table is hard to read due to packaging deformations or alterations. To detect the long-tail of nutrients, approaches using graph-based approach, where we would map a nutrient mention to its value, could be explored in the future.

[^predict_function]: In `robotoff.prediction.nutrition_extraction` module
Expand Down
99 changes: 98 additions & 1 deletion robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,29 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
)

update = req.get_param_as_bool("update", default=True)
# This field is only needed for nutritional table structure insights.
# This field is only needed for nutritional table structure insights
# or nutrient extraction insights.
data: JSONType | None = req.get_param_as_json("data")

# Check if user is providing a new bounding box
bounding_box = req.get_param_as_json("bounding_box")

# Validate bounding box format if provided
if bounding_box is not None:
required_keys = ["y_min", "x_min", "y_max", "x_max"]
if not all(key in bounding_box for key in required_keys):
raise falcon.HTTPBadRequest(
description=f"bounding_box must contain all of: {', '.join(required_keys)}"
)
# Validate that coordinates make sense
if (
bounding_box["y_min"] >= bounding_box["y_max"]
or bounding_box["x_min"] >= bounding_box["x_max"]
):
raise falcon.HTTPBadRequest(
description="Invalid bounding box coordinates: min values must be less than max values"
)

if annotation == 2:
if data is None:
raise falcon.HTTPBadRequest(
Expand Down Expand Up @@ -410,6 +430,7 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
insight_id,
)

# If bounding box is provided, we'll pass it separately
annotation_result = save_annotation(
insight_id,
annotation,
Expand All @@ -418,6 +439,7 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
auth=auth,
device_id=device_id,
trusted_annotator=trusted_annotator,
bounding_box=bounding_box,
)

resp.media = {
Expand Down Expand Up @@ -1851,6 +1873,80 @@ def custom_handle_uncaught_exception(
raise falcon.HTTPInternalServerError(description=str(ex))


class NutritionImageCropResource:
def on_get(self, req: falcon.Request, resp: falcon.Response):
"""Return a cropped nutrition image using the bounding box information.

This endpoint can be used to get cropped nutrition images for visualization
during validation or for display purposes.
"""
image_url = req.get_param("image_url", required=True)

# Get bounding box directly from parameters or from insight
bounding_box = req.get_param_as_json("bounding_box")
insight_id = req.get_param_as_uuid("insight_id")

if bounding_box is None and insight_id is None:
raise falcon.HTTPBadRequest(
description="Either bounding_box or insight_id must be provided"
)

if bounding_box is not None and insight_id is not None:
raise falcon.HTTPBadRequest(
description="Only one of bounding_box or insight_id should be provided"
)

# Get bounding box from insight if insight_id is provided
if insight_id is not None:
try:
insight = ProductInsight.get_by_id(insight_id)
if insight.bounding_box is None:
raise falcon.HTTPBadRequest(
description=f"No bounding box found for insight {insight_id}"
)
bounding_box = insight.bounding_box
except ProductInsight.DoesNotExist:
raise falcon.HTTPNotFound(
description=f"Insight with ID {insight_id} not found"
)

# Validate bounding box format
required_keys = ["x_min", "y_min", "x_max", "y_max"]
if not all(key in bounding_box for key in required_keys):
raise falcon.HTTPBadRequest(
description=f"bounding_box must contain all of: {', '.join(required_keys)}"
)

try:
# Download the image
response = requests.get(image_url, stream=True)
response.raise_for_status()

# Create a temporary file for the image
with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp_file:
# Save the image to the temporary file
for chunk in response.iter_content(chunk_size=8192):
tmp_file.write(chunk)
tmp_file.flush()

# Crop the image using the bounding box
from robotoff.images import crop_nutrition_image

cropped_image = crop_nutrition_image(tmp_file.name, bounding_box)

# Serve the cropped image
image_response(cropped_image, resp)

except requests.exceptions.RequestException as e:
raise falcon.HTTPBadRequest(
description=f"Error downloading image: {str(e)}"
)
except Exception as e:
raise falcon.HTTPInternalServerError(
description=f"Error cropping image: {str(e)}"
)


api = falcon.App(
middleware=[
falcon.CORSMiddleware(allow_origins="*", allow_credentials="*"),
Expand Down Expand Up @@ -1888,6 +1984,7 @@ def custom_handle_uncaught_exception(
api.add_route("/api/v1/products/dataset", UpdateDatasetResource())
api.add_route("/api/v1/images", ImageCollection())
api.add_route("/api/v1/images/crop", ImageCropResource())
api.add_route("/api/v1/images/nutrition/crop", NutritionImageCropResource())
api.add_route("/api/v1/image_predictions", ImagePredictionResource())
api.add_route("/api/v1/image_predictions/import", ImagePredictionImporterResource())
api.add_route("/api/v1/images/predict", ImagePredictorResource())
Expand Down
10 changes: 8 additions & 2 deletions robotoff/app/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ def save_annotation(
data: Optional[dict] = None,
auth: Optional[OFFAuthentication] = None,
trusted_annotator: bool = False,
bounding_box: Optional[dict] = None,
) -> AnnotationResult:
"""Saves annotation either by using a single response as ground truth or
by using several responses.
Expand All @@ -394,8 +395,9 @@ def save_annotation(
:param auth: User authentication data, it is expected to be None if
`trusted_annotator=False` (=anonymous vote)
:param trusted_annotator: Defines whether the given annotation comes from
an authoritative source (e.g. a trusted user), ot whether the annotation
should be subject to the voting system.
an authoritative source (e.g. a trusted user), ot whether the annotation
should be subject to the voting system.
:param bounding_box: Optional bounding box coordinates for nutrition insights
"""
try:
insight: Union[ProductInsight, None] = ProductInsight.get_by_id(insight_id)
Expand All @@ -408,6 +410,10 @@ def save_annotation(
if insight.annotation is not None:
return ALREADY_ANNOTATED_RESULT

# Update bounding box if provided
if bounding_box is not None and trusted_annotator:
insight.bounding_box = bounding_box

# We use AnnotationVote mechanism to save annotation = -1 (ignore) for
# authenticated users, so that it's not returned again to the user
if not trusted_annotator or annotation == -1:
Expand Down
17 changes: 17 additions & 0 deletions robotoff/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,20 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]):
deleted_embeddings_total,
deleted_logos_total,
)


def crop_nutrition_image(image_path: str, bounding_box: dict) -> Image.Image:
"""Crop an image using the nutrition bounding box.

:param image_path: Path to the image file to crop
:param bounding_box: Dictionary with x_min, y_min, x_max, y_max coordinates
:return: Cropped image
"""
img = Image.open(image_path)
crop_box = (
bounding_box["x_min"],
bounding_box["y_min"],
bounding_box["x_max"],
bounding_box["y_max"],
)
return img.crop(crop_box)
27 changes: 20 additions & 7 deletions robotoff/insights/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,11 @@ def process_annotation(
description=str(e),
)
validated_nutrients = cls.add_default_unit(validated_nutrients)

# Save custom bounding box if provided in the validation data
if "bounding_box" in data:
insight.bounding_box = data["bounding_box"]

# We override the predicted nutrient values by the ones submitted by the
# user
insight.data["annotation"] = validated_nutrients.model_dump()
Expand Down Expand Up @@ -841,14 +846,22 @@ def select_nutrition_image(

rotation = get_image_rotation(insight.source_image)

nutrition_table_detections = get_nutrition_table_prediction(
insight.source_image, threshold=0.5
)
# Use bounding box from insight if available
bounding_box = None
# Only crop according to the model predicted bounding box if there is exactly
# one nutrition table detected
if nutrition_table_detections and len(nutrition_table_detections) == 1:
bounding_box = nutrition_table_detections[0]["bounding_box"]
if insight.bounding_box is not None:
# Convert from database format to the format expected by
# convert_crop_bounding_box
bb = insight.bounding_box
bounding_box = (bb["y_min"], bb["x_min"], bb["y_max"], bb["x_max"])
else:
# If no bounding box in insight, use nutrition table detector
nutrition_table_detections = get_nutrition_table_prediction(
insight.source_image, threshold=0.5
)
# Only crop according to the model predicted bounding box if there is
# exactly one nutrition table detected
if nutrition_table_detections and len(nutrition_table_detections) == 1:
bounding_box = nutrition_table_detections[0]["bounding_box"]

crop_bounding_box: tuple[float, float, float, float] | None = None
if bounding_box:
Expand Down
20 changes: 19 additions & 1 deletion robotoff/insights/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,25 @@ def generate_candidates(

for prediction in predictions:
if cls.keep_prediction(product, list(prediction.data["nutrients"].keys())):
yield ProductInsight(**prediction.to_dict())
# Create dictionary from prediction
prediction_data = prediction.to_dict()

# Store bounding box directly in the insight model field
bounding_box = None
if "bounding_box" in prediction.data:
# Convert tuple to dictionary format expected by the database
y_min, x_min, y_max, x_max = prediction.data["bounding_box"]
bounding_box = {
"y_min": y_min,
"x_min": x_min,
"y_max": y_max,
"x_max": x_max,
}

# Create the insight with the bounding box field
insight = ProductInsight(**prediction_data)
insight.bounding_box = bounding_box
yield insight

@staticmethod
def keep_prediction(product: Product | None, nutrients_keys: list[str]) -> bool:
Expand Down
Loading
Loading