Skip to content

Commit 9e28f00

Browse files
committed
feat: add and smart-crop dinesh image, finalize all team photos
1 parent 1df7e60 commit 9e28f00

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed
42.4 KB
Loading

backend/scripts/process_team_images.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import cv2
77
import numpy as np
8+
import fitz # PyMuPDF
89

910
# We'll use OpenCV's built-in Haar Cascade for face detection as it's lightweight and usually pre-installed with opencv-python
1011
def download_image(url, output_path):
@@ -24,8 +25,26 @@ def download_image(url, output_path):
2425
return False
2526

2627
def smart_crop_face(image_path, output_path, target_size=(400, 400)):
27-
# Load image
28-
img = cv2.imread(image_path)
28+
# Check if this is a PDF
29+
try:
30+
if image_path.lower().endswith('.pdf'):
31+
doc = fitz.open(image_path)
32+
for page in doc:
33+
pix = page.get_pixmap()
34+
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
35+
if pix.n == 4:
36+
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
37+
elif pix.n == 1:
38+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
39+
else:
40+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
41+
break # Just get first page image
42+
else:
43+
img = cv2.imread(image_path)
44+
except Exception as e:
45+
print(f"Error loading image or PDF {image_path}: {e}")
46+
return False
47+
2948
if img is None:
3049
print(f"Could not read image {image_path}")
3150
return False
@@ -145,17 +164,34 @@ def main():
145164
filename = filename + ".jpg"
146165

147166
output_file = os.path.join(target_dir, filename)
167+
168+
# Since download might be a PDF, check the headers or just try downloading and inspecting
148169
temp_file = os.path.join(target_dir, f"temp_{filename}")
149170

150171
print(f"Processing {name}...")
151172

152173
if download_image(img_url, temp_file):
153-
if smart_crop_face(temp_file, output_file):
174+
# Try to detect if it's a PDF by reading the first few bytes
175+
is_pdf = False
176+
with open(temp_file, 'rb') as tf:
177+
header = tf.read(4)
178+
if header == b'%PDF':
179+
is_pdf = True
180+
181+
# Rename temp file if it's a PDF so PyMuPDF knows how to parse it
182+
proc_file = temp_file
183+
if is_pdf:
184+
proc_file = temp_file + ".pdf"
185+
os.rename(temp_file, proc_file)
186+
187+
if smart_crop_face(proc_file, output_file):
154188
print(f" -> Saved smart cropped image to {output_file}")
155189
else:
156190
print(f" -> Failed to process image")
157191

158192
# Cleanup temp file
193+
if os.path.exists(proc_file):
194+
os.remove(proc_file)
159195
if os.path.exists(temp_file):
160196
os.remove(temp_file)
161197
else:

0 commit comments

Comments
 (0)