Can not find the correct attributes or methods to get the correct values of croping

Hi there!

I am trying to get the data using the below script. But I am still struggleing to get the values of cropped images that where exported as pdf by mac pages or mac keynote.

Any ideas where to pinpoint me? 

Goal/Original:
<img width="837" height="458" alt="Image" src="https://github.com/user-attachments/assets/80040f03-91a9-49cb-b52e-4ea585e1dae2" />

Current state:
<img width="843" height="473" alt="Image" src="https://github.com/user-attachments/assets/e1aa0956-3e69-40e2-95bc-fd4b4227258b" />

`import fitz
import math

def rgb_to_hex(rgb_tuple):
    # Converts (r, g, b) float tuple (0-1) to #RRGGBB hex string
    if not rgb_tuple or len(rgb_tuple) < 3: return None
    try:
        r, g, b = [max(0, min(255, int(c * 255))) for c in rgb_tuple[:3]]
        return f"#{r:02x}{g:02x}{b:02x}"
    except (TypeError, ValueError):
        print(f"Warning: Could not convert color tuple {rgb_tuple} to hex.")
        return None

def get_rotation_from_matrix(matrix):
    """Calculates rotation angle in degrees (0, 90, 180, 270) from a fitz.Matrix."""
    if not isinstance(matrix, fitz.Matrix):
        return 0
    a, b, _, _, _, _ = matrix # Only need 'a' and 'b' for rotation
    
    # Use atan2 for robust angle calculation
    angle = math.degrees(math.atan2(b, a))
    
    # Normalize and snap to the nearest 90-degree angle
    final_angle = round(angle / 90) * 90 % 360
    return int(final_angle)

def process_shapes_and_lines(page, z_counter):
    """
    Extracts vector drawings (shapes and lines) from a page.
    This version correctly identifies thin filled rectangles as lines.
    Returns a list of vector elements and the updated z_counter.
    """
    vector_elements = []
    page_area = page.rect.get_area()
    drawings = page.get_drawings()

    for path in drawings:
        bbox = fitz.Rect(path.get("rect", (0,0,0,0)))
        if not bbox.is_valid or bbox.is_empty:
             continue

        # Filter out large white background rectangles
        is_white_background = (path.get("fill") == (1.0, 1.0, 1.0) and
                               path.get("fill_opacity", 1.0) == 1.0 and
                               bbox.get_area() > page_area * 0.90)
        if is_white_background:
            continue
        
        # Filter out clipping paths (they aren't visible elements)
        if path.get("clip"):
            continue

        element_data = {
            "position": {'x0': bbox.x0, 'y0': bbox.y0, 'x1': bbox.x1, 'y1': bbox.y1},
            "zIndex": z_counter
        }

        is_line = False
        is_shape = False
        
        stroke_color = rgb_to_hex(path.get("color"))
        line_width = path.get("width", 0)

        # 1. CHECK FOR "STROKED" LINES
        if stroke_color and line_width > 0:
            
            if bbox.height < max(line_width * 2, 2) and bbox.width > bbox.height * 3:
                 element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
                 is_line = True
            elif bbox.width < max(line_width * 2, 2) and bbox.height > bbox.width * 3:
                 element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
                 is_line = True

        # 2. CHECK FOR "FILLED" ELEMENTS (Shapes & Filled Rectangular Lines)
        fill_color = rgb_to_hex(path.get("fill"))
        fill_opacity = path.get("fill_opacity", 1.0)

        if not is_line and fill_color:
            
            if bbox.height < 2.0 and bbox.width > bbox.height * 3:
                element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.height})
                is_line = True 
            
            elif bbox.width < 2.0 and bbox.height > bbox.width * 3:
                element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.width})
                is_line = True
                
            else:
                element_data.update({"type": "shape", "backgroundColor": fill_color, "opacity": fill_opacity})
                is_shape = True

        # 3. ADD THE ELEMENT TO THE LIST
        if is_line or is_shape:
            vector_elements.append(element_data)
            z_counter += 1

    return vector_elements, z_counter

def process_images(doc, page, image_bucket, document_id, z_counter):
    """
    Extracts raster images from a page, uploads them, and determines 
    the final visually cropped bounds and transformation data.
    
    The visually cropped bounds are determined by page.get_image_rects,
    which respects PDF clipping paths.
    """
    image_elements = []
    page_cropbox = page.cropbox
    image_info_list = page.get_image_info(xrefs=True)
    
    # Get the visual bounding boxes of all drawn images (clipped)
    # bboxlog returns (type, rect) tuples in drawing order.
    # We filter for 'fill-image'.
    try:
        bbox_log = page.get_bboxlog()
        # bbox_log returns (type, rect_tuple). We need to convert rect_tuple to fitz.Rect.
        visible_image_rects = [fitz.Rect(r) for t, r in bbox_log if t == "fill-image"]
    except Exception as e:
        print(f"Warning: Could not get bboxlog: {e}")
        visible_image_rects = []

    # Check for images used as fills in drawings (common in Mac Pages)
    image_fill_rects = {} # Map xref -> list of rects
    # Also collect explicit clipping paths
    clipping_paths = []
    
    try:
        drawings = page.get_drawings()
        for draw in drawings:
            # 1. Check for image fills
            if "fill_images" in draw and draw["fill_images"]:
                for xref in draw["fill_images"]:
                    if xref not in image_fill_rects:
                        image_fill_rects[xref] = []
                    image_fill_rects[xref].append(fitz.Rect(draw["rect"]))
            
            # 2. Check for clipping paths
            if draw["type"] == "clip":
                clipping_paths.append(fitz.Rect(draw["rect"]))
                
    except Exception as e:
        print(f"Warning: Could not get drawings: {e}")

    for img_index, info in enumerate(image_info_list):
        xref = info["xref"]
        if not xref: continue 

        smask = info.get("smask", 0)
        
        # --- 1. Image Bytes Extraction and Transparency Handling ---
        try:
            image_ext = "png" # Default ext if transparency is involved
            base_image = doc.extract_image(xref)
            image_bytes = None
            
            if smask > 0:
                # Handle images with transparency masks
                mask_image = doc.extract_image(smask)
                pix1 = fitz.Pixmap(doc, xref)
                mask = fitz.Pixmap(mask_image["image"])
                pix_with_mask = fitz.Pixmap(pix1, mask)
                image_bytes = pix_with_mask.tobytes("png")
                pix1 = mask = pix_with_mask = None 
            else:
                image_bytes = base_image["image"]
                image_ext = base_image["ext"]

            if not image_bytes:
                 print(f"Warning: Image xref {xref} has empty bytes. Skipping.")
                 continue
            
            # 3. Extract RAW image metadata to get original dimensions
            base_img = doc.extract_image(xref)
            orig_w = 0
            orig_h = 0
            if base_img:
                orig_w = base_img["width"]
                orig_h = base_img["height"]
                
                # Check for Aspect Ratio Distortion
                bbox_rect = fitz.Rect(info["bbox"])
                if bbox_rect.width > 0 and bbox_rect.height > 0 and orig_w > 0 and orig_h > 0:
                    orig_ratio = orig_w / orig_h
                    display_ratio = bbox_rect.width / bbox_rect.height
                    
                    if abs(orig_ratio - display_ratio) > 0.1:
                        print(f"    • ⚠️ DISTORTION DETECTED for Image {img_index} (Ref: {xref}):")
                        print(f"      Orig: {orig_w}x{orig_h} (Ratio: {orig_ratio:.2f})")
                        print(f"      Display: {bbox_rect.width:.1f}x{bbox_rect.height:.1f} (Ratio: {display_ratio:.2f})")

            # --- 2. Determine Effective Crop Rectangle (Visual Bounds) ---
            full_pos_rect = fitz.Rect(info["bbox"])
            
            # Try to find the matching clipped rect
            effective_crop_rect = full_pos_rect
            
            # Priority 1: Check if image is a fill for a drawing (Mac Pages style)
            # Handle multiple occurrences of the same image xref
            found_fill_match = False
            if xref in image_fill_rects:
                # Find the fill rect that best overlaps with the current image bbox
                best_fill = None
                best_overlap_area = 0
                
                for fill_rect in image_fill_rects[xref]:
                    # Calculate intersection with the reported bbox
                    inter = full_pos_rect.intersect(fill_rect)
                    area = inter.get_area()
                    
                    # We look for significant overlap
                    if area > best_overlap_area:
                        best_overlap_area = area
                        best_fill = fill_rect
                
                # If we found a good match (overlap > 0), use it
                if best_fill and best_overlap_area > 0:
                    effective_crop_rect = best_fill
                    found_fill_match = True
                    # print(f"  -> Found match in drawings (fill): {effective_crop_rect}")

            if not found_fill_match:
                # Priority 2: Check for intersecting clipping paths
                # Heuristic: If a clip rect intersects the image, it's likely masking it.
                best_clip = None
                best_clip_area = 0
                
                for clip_rect in clipping_paths:
                    if clip_rect.intersects(full_pos_rect):
                        # Calculate intersection area
                        inter = full_pos_rect.intersect(clip_rect)
                        area = inter.get_area()
                        if area > best_clip_area:
                            best_clip_area = area
                            best_clip = clip_rect
                
                if best_clip:
                    effective_crop_rect = full_pos_rect.intersect(best_clip)
                    # print(f"  -> Found match in clipping paths: {best_clip} -> Crop: {effective_crop_rect}")
                else:
                    # Fallback: intersect with page cropbox
                    effective_crop_rect = full_pos_rect.intersect(page_cropbox)
            
            if effective_crop_rect.is_empty:
                effective_crop_rect = full_pos_rect
            
            # Ensure crop is not larger than full (sanity check)
            effective_crop_rect = effective_crop_rect.intersect(full_pos_rect) 
            
            # --- 3. Transformation/Rotation Logic ---
            rotation = 0
            is_flipped_horizontal = False
            is_flipped_vertical = False
            
            # Default values for layout
            unrotated_w = full_pos_rect.width
            unrotated_h = full_pos_rect.height
            center_x = (full_pos_rect.x0 + full_pos_rect.x1) / 2
            center_y = (full_pos_rect.y0 + full_pos_rect.y1) / 2
            
            transform_data = info.get("transform")
            if transform_data and len(transform_data) == 6:
                transform_matrix = fitz.Matrix(transform_data)
                
                # Check determinant for mirroring/flipping (negative determinant = mirror)
                det = transform_matrix.a * transform_matrix.d - transform_matrix.b * transform_matrix.c
                if det < 0:
                    is_flipped_horizontal = True 
                    # Un-flip the matrix to get correct rotation/dims
                    transform_matrix.a = -transform_matrix.a
                    transform_matrix.b = -transform_matrix.b
                    
                # Calculate unrotated dimensions (scale factors)
                # The matrix maps the unit square (0..1, 0..1) to the image quad.
                # Width vector is (a, b), Height vector is (c, d)
                unrotated_w = math.sqrt(transform_matrix.a**2 + transform_matrix.b**2)
                unrotated_h = math.sqrt(transform_matrix.c**2 + transform_matrix.d**2)
                
                # Calculate rotation
                rotation = get_rotation_from_matrix(transform_matrix)
                
                # Calculate center point (transform of 0.5, 0.5)
                # x = a*0.5 + c*0.5 + e
                # y = b*0.5 + d*0.5 + f
                center_x = transform_matrix.a * 0.5 + transform_matrix.c * 0.5 + transform_matrix.e
                center_y = transform_matrix.b * 0.5 + transform_matrix.d * 0.5 + transform_matrix.f
                
            # --- 4. Upload Logic ---
            image_filename = f"{document_id}/page{page.number + 1}_img{img_index}.{image_ext}"
            blob = image_bucket.blob(image_filename)
            blob.upload_from_string(image_bytes, content_type=f"image/{image_ext}")
            public_url = f"https://storage.googleapis.com/{image_bucket.name}/{image_filename}"

            # --- 5. Build Element Data ---
            image_element = {
                "type": "image", "src": public_url,
                "position": {'x0': full_pos_rect.x0, 'y0': full_pos_rect.y0, 'x1': full_pos_rect.x1, 'y1': full_pos_rect.y1},
                # The 'crop' attribute now holds the precise visual dimensions.
                "crop": {'x0': effective_crop_rect.x0, 'y0': effective_crop_rect.y0, 'x1': effective_crop_rect.x1, 'y1': effective_crop_rect.y1}, 
                "rotation": rotation, 
                "isFlippedHorizontal": is_flipped_horizontal, 
                "isFlippedVertical": is_flipped_vertical, 
                "originalWidth": orig_w,
                "originalHeight": orig_h,
                # New layout data for precise positioning
                "layout": {
                    "width": unrotated_w,
                    "height": unrotated_h,
                    "centerX": center_x,
                    "centerY": center_y
                },
                "zIndex": z_counter
            }

            image_elements.append(image_element)
            z_counter += 1

        except Exception as e:
            print(f"Error processing image xref {xref} on page {page.number + 1}. Error: {e}")
        
    return image_elements, z_counter`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Can not find the correct attributes or methods to get the correct values of croping #4818

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Can not find the correct attributes or methods to get the correct values of croping #4818

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions