Skip to content

Can not find the correct attributes or methods to get the correct values of croping #4818

@AntoineFachez

Description

@AntoineFachez

Hi there!

I am trying to get the data using the below script. But I am still struggleing to get the values of cropped images that where exported as pdf by mac pages or mac keynote.

Any ideas where to pinpoint me?

Goal/Original:
Image

Current state:
Image

`import fitz
import math

def rgb_to_hex(rgb_tuple):
# Converts (r, g, b) float tuple (0-1) to #RRGGBB hex string
if not rgb_tuple or len(rgb_tuple) < 3: return None
try:
r, g, b = [max(0, min(255, int(c * 255))) for c in rgb_tuple[:3]]
return f"#{r:02x}{g:02x}{b:02x}"
except (TypeError, ValueError):
print(f"Warning: Could not convert color tuple {rgb_tuple} to hex.")
return None

def get_rotation_from_matrix(matrix):
"""Calculates rotation angle in degrees (0, 90, 180, 270) from a fitz.Matrix."""
if not isinstance(matrix, fitz.Matrix):
return 0
a, b, _, _, _, _ = matrix # Only need 'a' and 'b' for rotation

# Use atan2 for robust angle calculation
angle = math.degrees(math.atan2(b, a))

# Normalize and snap to the nearest 90-degree angle
final_angle = round(angle / 90) * 90 % 360
return int(final_angle)

def process_shapes_and_lines(page, z_counter):
"""
Extracts vector drawings (shapes and lines) from a page.
This version correctly identifies thin filled rectangles as lines.
Returns a list of vector elements and the updated z_counter.
"""
vector_elements = []
page_area = page.rect.get_area()
drawings = page.get_drawings()

for path in drawings:
    bbox = fitz.Rect(path.get("rect", (0,0,0,0)))
    if not bbox.is_valid or bbox.is_empty:
         continue

    # Filter out large white background rectangles
    is_white_background = (path.get("fill") == (1.0, 1.0, 1.0) and
                           path.get("fill_opacity", 1.0) == 1.0 and
                           bbox.get_area() > page_area * 0.90)
    if is_white_background:
        continue
    
    # Filter out clipping paths (they aren't visible elements)
    if path.get("clip"):
        continue

    element_data = {
        "position": {'x0': bbox.x0, 'y0': bbox.y0, 'x1': bbox.x1, 'y1': bbox.y1},
        "zIndex": z_counter
    }

    is_line = False
    is_shape = False
    
    stroke_color = rgb_to_hex(path.get("color"))
    line_width = path.get("width", 0)

    # 1. CHECK FOR "STROKED" LINES
    if stroke_color and line_width > 0:
        
        if bbox.height < max(line_width * 2, 2) and bbox.width > bbox.height * 3:
             element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
             is_line = True
        elif bbox.width < max(line_width * 2, 2) and bbox.height > bbox.width * 3:
             element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
             is_line = True

    # 2. CHECK FOR "FILLED" ELEMENTS (Shapes & Filled Rectangular Lines)
    fill_color = rgb_to_hex(path.get("fill"))
    fill_opacity = path.get("fill_opacity", 1.0)

    if not is_line and fill_color:
        
        if bbox.height < 2.0 and bbox.width > bbox.height * 3:
            element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.height})
            is_line = True 
        
        elif bbox.width < 2.0 and bbox.height > bbox.width * 3:
            element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.width})
            is_line = True
            
        else:
            element_data.update({"type": "shape", "backgroundColor": fill_color, "opacity": fill_opacity})
            is_shape = True

    # 3. ADD THE ELEMENT TO THE LIST
    if is_line or is_shape:
        vector_elements.append(element_data)
        z_counter += 1

return vector_elements, z_counter

def process_images(doc, page, image_bucket, document_id, z_counter):
"""
Extracts raster images from a page, uploads them, and determines
the final visually cropped bounds and transformation data.

The visually cropped bounds are determined by page.get_image_rects,
which respects PDF clipping paths.
"""
image_elements = []
page_cropbox = page.cropbox
image_info_list = page.get_image_info(xrefs=True)

# Get the visual bounding boxes of all drawn images (clipped)
# bboxlog returns (type, rect) tuples in drawing order.
# We filter for 'fill-image'.
try:
    bbox_log = page.get_bboxlog()
    # bbox_log returns (type, rect_tuple). We need to convert rect_tuple to fitz.Rect.
    visible_image_rects = [fitz.Rect(r) for t, r in bbox_log if t == "fill-image"]
except Exception as e:
    print(f"Warning: Could not get bboxlog: {e}")
    visible_image_rects = []

# Check for images used as fills in drawings (common in Mac Pages)
image_fill_rects = {} # Map xref -> list of rects
# Also collect explicit clipping paths
clipping_paths = []

try:
    drawings = page.get_drawings()
    for draw in drawings:
        # 1. Check for image fills
        if "fill_images" in draw and draw["fill_images"]:
            for xref in draw["fill_images"]:
                if xref not in image_fill_rects:
                    image_fill_rects[xref] = []
                image_fill_rects[xref].append(fitz.Rect(draw["rect"]))
        
        # 2. Check for clipping paths
        if draw["type"] == "clip":
            clipping_paths.append(fitz.Rect(draw["rect"]))
            
except Exception as e:
    print(f"Warning: Could not get drawings: {e}")

for img_index, info in enumerate(image_info_list):
    xref = info["xref"]
    if not xref: continue 

    smask = info.get("smask", 0)
    
    # --- 1. Image Bytes Extraction and Transparency Handling ---
    try:
        image_ext = "png" # Default ext if transparency is involved
        base_image = doc.extract_image(xref)
        image_bytes = None
        
        if smask > 0:
            # Handle images with transparency masks
            mask_image = doc.extract_image(smask)
            pix1 = fitz.Pixmap(doc, xref)
            mask = fitz.Pixmap(mask_image["image"])
            pix_with_mask = fitz.Pixmap(pix1, mask)
            image_bytes = pix_with_mask.tobytes("png")
            pix1 = mask = pix_with_mask = None 
        else:
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]

        if not image_bytes:
             print(f"Warning: Image xref {xref} has empty bytes. Skipping.")
             continue
        
        # 3. Extract RAW image metadata to get original dimensions
        base_img = doc.extract_image(xref)
        orig_w = 0
        orig_h = 0
        if base_img:
            orig_w = base_img["width"]
            orig_h = base_img["height"]
            
            # Check for Aspect Ratio Distortion
            bbox_rect = fitz.Rect(info["bbox"])
            if bbox_rect.width > 0 and bbox_rect.height > 0 and orig_w > 0 and orig_h > 0:
                orig_ratio = orig_w / orig_h
                display_ratio = bbox_rect.width / bbox_rect.height
                
                if abs(orig_ratio - display_ratio) > 0.1:
                    print(f"    • ⚠️ DISTORTION DETECTED for Image {img_index} (Ref: {xref}):")
                    print(f"      Orig: {orig_w}x{orig_h} (Ratio: {orig_ratio:.2f})")
                    print(f"      Display: {bbox_rect.width:.1f}x{bbox_rect.height:.1f} (Ratio: {display_ratio:.2f})")

        # --- 2. Determine Effective Crop Rectangle (Visual Bounds) ---
        full_pos_rect = fitz.Rect(info["bbox"])
        
        # Try to find the matching clipped rect
        effective_crop_rect = full_pos_rect
        
        # Priority 1: Check if image is a fill for a drawing (Mac Pages style)
        # Handle multiple occurrences of the same image xref
        found_fill_match = False
        if xref in image_fill_rects:
            # Find the fill rect that best overlaps with the current image bbox
            best_fill = None
            best_overlap_area = 0
            
            for fill_rect in image_fill_rects[xref]:
                # Calculate intersection with the reported bbox
                inter = full_pos_rect.intersect(fill_rect)
                area = inter.get_area()
                
                # We look for significant overlap
                if area > best_overlap_area:
                    best_overlap_area = area
                    best_fill = fill_rect
            
            # If we found a good match (overlap > 0), use it
            if best_fill and best_overlap_area > 0:
                effective_crop_rect = best_fill
                found_fill_match = True
                # print(f"  -> Found match in drawings (fill): {effective_crop_rect}")

        if not found_fill_match:
            # Priority 2: Check for intersecting clipping paths
            # Heuristic: If a clip rect intersects the image, it's likely masking it.
            best_clip = None
            best_clip_area = 0
            
            for clip_rect in clipping_paths:
                if clip_rect.intersects(full_pos_rect):
                    # Calculate intersection area
                    inter = full_pos_rect.intersect(clip_rect)
                    area = inter.get_area()
                    if area > best_clip_area:
                        best_clip_area = area
                        best_clip = clip_rect
            
            if best_clip:
                effective_crop_rect = full_pos_rect.intersect(best_clip)
                # print(f"  -> Found match in clipping paths: {best_clip} -> Crop: {effective_crop_rect}")
            else:
                # Fallback: intersect with page cropbox
                effective_crop_rect = full_pos_rect.intersect(page_cropbox)
        
        if effective_crop_rect.is_empty:
            effective_crop_rect = full_pos_rect
        
        # Ensure crop is not larger than full (sanity check)
        effective_crop_rect = effective_crop_rect.intersect(full_pos_rect) 
        
        # --- 3. Transformation/Rotation Logic ---
        rotation = 0
        is_flipped_horizontal = False
        is_flipped_vertical = False
        
        # Default values for layout
        unrotated_w = full_pos_rect.width
        unrotated_h = full_pos_rect.height
        center_x = (full_pos_rect.x0 + full_pos_rect.x1) / 2
        center_y = (full_pos_rect.y0 + full_pos_rect.y1) / 2
        
        transform_data = info.get("transform")
        if transform_data and len(transform_data) == 6:
            transform_matrix = fitz.Matrix(transform_data)
            
            # Check determinant for mirroring/flipping (negative determinant = mirror)
            det = transform_matrix.a * transform_matrix.d - transform_matrix.b * transform_matrix.c
            if det < 0:
                is_flipped_horizontal = True 
                # Un-flip the matrix to get correct rotation/dims
                transform_matrix.a = -transform_matrix.a
                transform_matrix.b = -transform_matrix.b
                
            # Calculate unrotated dimensions (scale factors)
            # The matrix maps the unit square (0..1, 0..1) to the image quad.
            # Width vector is (a, b), Height vector is (c, d)
            unrotated_w = math.sqrt(transform_matrix.a**2 + transform_matrix.b**2)
            unrotated_h = math.sqrt(transform_matrix.c**2 + transform_matrix.d**2)
            
            # Calculate rotation
            rotation = get_rotation_from_matrix(transform_matrix)
            
            # Calculate center point (transform of 0.5, 0.5)
            # x = a*0.5 + c*0.5 + e
            # y = b*0.5 + d*0.5 + f
            center_x = transform_matrix.a * 0.5 + transform_matrix.c * 0.5 + transform_matrix.e
            center_y = transform_matrix.b * 0.5 + transform_matrix.d * 0.5 + transform_matrix.f
            
        # --- 4. Upload Logic ---
        image_filename = f"{document_id}/page{page.number + 1}_img{img_index}.{image_ext}"
        blob = image_bucket.blob(image_filename)
        blob.upload_from_string(image_bytes, content_type=f"image/{image_ext}")
        public_url = f"https://storage.googleapis.com/{image_bucket.name}/{image_filename}"

        # --- 5. Build Element Data ---
        image_element = {
            "type": "image", "src": public_url,
            "position": {'x0': full_pos_rect.x0, 'y0': full_pos_rect.y0, 'x1': full_pos_rect.x1, 'y1': full_pos_rect.y1},
            # The 'crop' attribute now holds the precise visual dimensions.
            "crop": {'x0': effective_crop_rect.x0, 'y0': effective_crop_rect.y0, 'x1': effective_crop_rect.x1, 'y1': effective_crop_rect.y1}, 
            "rotation": rotation, 
            "isFlippedHorizontal": is_flipped_horizontal, 
            "isFlippedVertical": is_flipped_vertical, 
            "originalWidth": orig_w,
            "originalHeight": orig_h,
            # New layout data for precise positioning
            "layout": {
                "width": unrotated_w,
                "height": unrotated_h,
                "centerX": center_x,
                "centerY": center_y
            },
            "zIndex": z_counter
        }

        image_elements.append(image_element)
        z_counter += 1

    except Exception as e:
        print(f"Error processing image xref {xref} on page {page.number + 1}. Error: {e}")
    
return image_elements, z_counter`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions