-
Notifications
You must be signed in to change notification settings - Fork 678
Description
Hi there!
I am trying to get the data using the below script. But I am still struggleing to get the values of cropped images that where exported as pdf by mac pages or mac keynote.
Any ideas where to pinpoint me?
`import fitz
import math
def rgb_to_hex(rgb_tuple):
# Converts (r, g, b) float tuple (0-1) to #RRGGBB hex string
if not rgb_tuple or len(rgb_tuple) < 3: return None
try:
r, g, b = [max(0, min(255, int(c * 255))) for c in rgb_tuple[:3]]
return f"#{r:02x}{g:02x}{b:02x}"
except (TypeError, ValueError):
print(f"Warning: Could not convert color tuple {rgb_tuple} to hex.")
return None
def get_rotation_from_matrix(matrix):
"""Calculates rotation angle in degrees (0, 90, 180, 270) from a fitz.Matrix."""
if not isinstance(matrix, fitz.Matrix):
return 0
a, b, _, _, _, _ = matrix # Only need 'a' and 'b' for rotation
# Use atan2 for robust angle calculation
angle = math.degrees(math.atan2(b, a))
# Normalize and snap to the nearest 90-degree angle
final_angle = round(angle / 90) * 90 % 360
return int(final_angle)
def process_shapes_and_lines(page, z_counter):
"""
Extracts vector drawings (shapes and lines) from a page.
This version correctly identifies thin filled rectangles as lines.
Returns a list of vector elements and the updated z_counter.
"""
vector_elements = []
page_area = page.rect.get_area()
drawings = page.get_drawings()
for path in drawings:
bbox = fitz.Rect(path.get("rect", (0,0,0,0)))
if not bbox.is_valid or bbox.is_empty:
continue
# Filter out large white background rectangles
is_white_background = (path.get("fill") == (1.0, 1.0, 1.0) and
path.get("fill_opacity", 1.0) == 1.0 and
bbox.get_area() > page_area * 0.90)
if is_white_background:
continue
# Filter out clipping paths (they aren't visible elements)
if path.get("clip"):
continue
element_data = {
"position": {'x0': bbox.x0, 'y0': bbox.y0, 'x1': bbox.x1, 'y1': bbox.y1},
"zIndex": z_counter
}
is_line = False
is_shape = False
stroke_color = rgb_to_hex(path.get("color"))
line_width = path.get("width", 0)
# 1. CHECK FOR "STROKED" LINES
if stroke_color and line_width > 0:
if bbox.height < max(line_width * 2, 2) and bbox.width > bbox.height * 3:
element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
is_line = True
elif bbox.width < max(line_width * 2, 2) and bbox.height > bbox.width * 3:
element_data.update({"type": "line", "strokeColor": stroke_color, "strokeWidth": line_width})
is_line = True
# 2. CHECK FOR "FILLED" ELEMENTS (Shapes & Filled Rectangular Lines)
fill_color = rgb_to_hex(path.get("fill"))
fill_opacity = path.get("fill_opacity", 1.0)
if not is_line and fill_color:
if bbox.height < 2.0 and bbox.width > bbox.height * 3:
element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.height})
is_line = True
elif bbox.width < 2.0 and bbox.height > bbox.width * 3:
element_data.update({"type": "line", "strokeColor": fill_color, "strokeWidth": bbox.width})
is_line = True
else:
element_data.update({"type": "shape", "backgroundColor": fill_color, "opacity": fill_opacity})
is_shape = True
# 3. ADD THE ELEMENT TO THE LIST
if is_line or is_shape:
vector_elements.append(element_data)
z_counter += 1
return vector_elements, z_counter
def process_images(doc, page, image_bucket, document_id, z_counter):
"""
Extracts raster images from a page, uploads them, and determines
the final visually cropped bounds and transformation data.
The visually cropped bounds are determined by page.get_image_rects,
which respects PDF clipping paths.
"""
image_elements = []
page_cropbox = page.cropbox
image_info_list = page.get_image_info(xrefs=True)
# Get the visual bounding boxes of all drawn images (clipped)
# bboxlog returns (type, rect) tuples in drawing order.
# We filter for 'fill-image'.
try:
bbox_log = page.get_bboxlog()
# bbox_log returns (type, rect_tuple). We need to convert rect_tuple to fitz.Rect.
visible_image_rects = [fitz.Rect(r) for t, r in bbox_log if t == "fill-image"]
except Exception as e:
print(f"Warning: Could not get bboxlog: {e}")
visible_image_rects = []
# Check for images used as fills in drawings (common in Mac Pages)
image_fill_rects = {} # Map xref -> list of rects
# Also collect explicit clipping paths
clipping_paths = []
try:
drawings = page.get_drawings()
for draw in drawings:
# 1. Check for image fills
if "fill_images" in draw and draw["fill_images"]:
for xref in draw["fill_images"]:
if xref not in image_fill_rects:
image_fill_rects[xref] = []
image_fill_rects[xref].append(fitz.Rect(draw["rect"]))
# 2. Check for clipping paths
if draw["type"] == "clip":
clipping_paths.append(fitz.Rect(draw["rect"]))
except Exception as e:
print(f"Warning: Could not get drawings: {e}")
for img_index, info in enumerate(image_info_list):
xref = info["xref"]
if not xref: continue
smask = info.get("smask", 0)
# --- 1. Image Bytes Extraction and Transparency Handling ---
try:
image_ext = "png" # Default ext if transparency is involved
base_image = doc.extract_image(xref)
image_bytes = None
if smask > 0:
# Handle images with transparency masks
mask_image = doc.extract_image(smask)
pix1 = fitz.Pixmap(doc, xref)
mask = fitz.Pixmap(mask_image["image"])
pix_with_mask = fitz.Pixmap(pix1, mask)
image_bytes = pix_with_mask.tobytes("png")
pix1 = mask = pix_with_mask = None
else:
image_bytes = base_image["image"]
image_ext = base_image["ext"]
if not image_bytes:
print(f"Warning: Image xref {xref} has empty bytes. Skipping.")
continue
# 3. Extract RAW image metadata to get original dimensions
base_img = doc.extract_image(xref)
orig_w = 0
orig_h = 0
if base_img:
orig_w = base_img["width"]
orig_h = base_img["height"]
# Check for Aspect Ratio Distortion
bbox_rect = fitz.Rect(info["bbox"])
if bbox_rect.width > 0 and bbox_rect.height > 0 and orig_w > 0 and orig_h > 0:
orig_ratio = orig_w / orig_h
display_ratio = bbox_rect.width / bbox_rect.height
if abs(orig_ratio - display_ratio) > 0.1:
print(f" • ⚠️ DISTORTION DETECTED for Image {img_index} (Ref: {xref}):")
print(f" Orig: {orig_w}x{orig_h} (Ratio: {orig_ratio:.2f})")
print(f" Display: {bbox_rect.width:.1f}x{bbox_rect.height:.1f} (Ratio: {display_ratio:.2f})")
# --- 2. Determine Effective Crop Rectangle (Visual Bounds) ---
full_pos_rect = fitz.Rect(info["bbox"])
# Try to find the matching clipped rect
effective_crop_rect = full_pos_rect
# Priority 1: Check if image is a fill for a drawing (Mac Pages style)
# Handle multiple occurrences of the same image xref
found_fill_match = False
if xref in image_fill_rects:
# Find the fill rect that best overlaps with the current image bbox
best_fill = None
best_overlap_area = 0
for fill_rect in image_fill_rects[xref]:
# Calculate intersection with the reported bbox
inter = full_pos_rect.intersect(fill_rect)
area = inter.get_area()
# We look for significant overlap
if area > best_overlap_area:
best_overlap_area = area
best_fill = fill_rect
# If we found a good match (overlap > 0), use it
if best_fill and best_overlap_area > 0:
effective_crop_rect = best_fill
found_fill_match = True
# print(f" -> Found match in drawings (fill): {effective_crop_rect}")
if not found_fill_match:
# Priority 2: Check for intersecting clipping paths
# Heuristic: If a clip rect intersects the image, it's likely masking it.
best_clip = None
best_clip_area = 0
for clip_rect in clipping_paths:
if clip_rect.intersects(full_pos_rect):
# Calculate intersection area
inter = full_pos_rect.intersect(clip_rect)
area = inter.get_area()
if area > best_clip_area:
best_clip_area = area
best_clip = clip_rect
if best_clip:
effective_crop_rect = full_pos_rect.intersect(best_clip)
# print(f" -> Found match in clipping paths: {best_clip} -> Crop: {effective_crop_rect}")
else:
# Fallback: intersect with page cropbox
effective_crop_rect = full_pos_rect.intersect(page_cropbox)
if effective_crop_rect.is_empty:
effective_crop_rect = full_pos_rect
# Ensure crop is not larger than full (sanity check)
effective_crop_rect = effective_crop_rect.intersect(full_pos_rect)
# --- 3. Transformation/Rotation Logic ---
rotation = 0
is_flipped_horizontal = False
is_flipped_vertical = False
# Default values for layout
unrotated_w = full_pos_rect.width
unrotated_h = full_pos_rect.height
center_x = (full_pos_rect.x0 + full_pos_rect.x1) / 2
center_y = (full_pos_rect.y0 + full_pos_rect.y1) / 2
transform_data = info.get("transform")
if transform_data and len(transform_data) == 6:
transform_matrix = fitz.Matrix(transform_data)
# Check determinant for mirroring/flipping (negative determinant = mirror)
det = transform_matrix.a * transform_matrix.d - transform_matrix.b * transform_matrix.c
if det < 0:
is_flipped_horizontal = True
# Un-flip the matrix to get correct rotation/dims
transform_matrix.a = -transform_matrix.a
transform_matrix.b = -transform_matrix.b
# Calculate unrotated dimensions (scale factors)
# The matrix maps the unit square (0..1, 0..1) to the image quad.
# Width vector is (a, b), Height vector is (c, d)
unrotated_w = math.sqrt(transform_matrix.a**2 + transform_matrix.b**2)
unrotated_h = math.sqrt(transform_matrix.c**2 + transform_matrix.d**2)
# Calculate rotation
rotation = get_rotation_from_matrix(transform_matrix)
# Calculate center point (transform of 0.5, 0.5)
# x = a*0.5 + c*0.5 + e
# y = b*0.5 + d*0.5 + f
center_x = transform_matrix.a * 0.5 + transform_matrix.c * 0.5 + transform_matrix.e
center_y = transform_matrix.b * 0.5 + transform_matrix.d * 0.5 + transform_matrix.f
# --- 4. Upload Logic ---
image_filename = f"{document_id}/page{page.number + 1}_img{img_index}.{image_ext}"
blob = image_bucket.blob(image_filename)
blob.upload_from_string(image_bytes, content_type=f"image/{image_ext}")
public_url = f"https://storage.googleapis.com/{image_bucket.name}/{image_filename}"
# --- 5. Build Element Data ---
image_element = {
"type": "image", "src": public_url,
"position": {'x0': full_pos_rect.x0, 'y0': full_pos_rect.y0, 'x1': full_pos_rect.x1, 'y1': full_pos_rect.y1},
# The 'crop' attribute now holds the precise visual dimensions.
"crop": {'x0': effective_crop_rect.x0, 'y0': effective_crop_rect.y0, 'x1': effective_crop_rect.x1, 'y1': effective_crop_rect.y1},
"rotation": rotation,
"isFlippedHorizontal": is_flipped_horizontal,
"isFlippedVertical": is_flipped_vertical,
"originalWidth": orig_w,
"originalHeight": orig_h,
# New layout data for precise positioning
"layout": {
"width": unrotated_w,
"height": unrotated_h,
"centerX": center_x,
"centerY": center_y
},
"zIndex": z_counter
}
image_elements.append(image_element)
z_counter += 1
except Exception as e:
print(f"Error processing image xref {xref} on page {page.number + 1}. Error: {e}")
return image_elements, z_counter`

