Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 213 additions & 1 deletion iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class MaxLimitException(Exception):
valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff']
AUDIO_FORMATS = ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']
VIDEO_FORMATS = ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']
IMAGE_FORMATS = ['JPEG', 'PNG', 'GIF', 'TIFF']

class IsCollection(Exception):
# Used for when we need to raise to the route handler from inside the manifest function
Expand Down Expand Up @@ -737,6 +738,178 @@ def create_canvas_from_br(br_page, zipFile, identifier, pageCount, uri):

return canvas

def check_mixed_media(metadata):
"""
Check if an item contains both original images and videos.
Returns (has_images, has_videos) tuple.
Excludes thumbnail images to avoid false positives.
"""
has_images = False
has_videos = False

for file in metadata.get('files', []):
if file.get('source') != 'original':
continue

file_format = file.get('format', '')
file_name = file.get('name', '')

# Check for video formats
if file_format in VIDEO_FORMATS:
has_videos = True

# Check for image formats (excluding thumbnails)
if (file_format in IMAGE_FORMATS and
'thumb' not in file_name.lower() and
file_format not in ['JPEG Thumb', 'Thumbnail']):
Comment on lines +762 to +764
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thumbnail exclusion logic is duplicated between check_mixed_media (lines 762-764) and the mixed-media processing loop (lines 957-959). This creates a maintenance risk if the criteria for identifying thumbnails changes. Consider extracting this into a helper function like is_thumbnail_image(file_format, file_name) to ensure consistency.

Copilot uses AI. Check for mistakes.
has_images = True

# Early exit if we've found both
if has_images and has_videos:
return (True, True)

return (has_images, has_videos)

def create_image_canvas(identifier, file, metadata, domain, canvas_number):
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The canvas_number parameter is defined but never used in the function body. This parameter appears to be unnecessary since the canvas ID is generated from the file name, not from a sequential number.

Suggested change
def create_image_canvas(identifier, file, metadata, domain, canvas_number):
def create_image_canvas(identifier, file, metadata, domain, _canvas_number):

Copilot uses AI. Check for mistakes.
"""
Create a canvas for an image file.
Returns a Canvas object.
"""
file_name = file.get('name')
normalised_id = file_name.rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")

# Create canvas ID
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Canvas IDs for images in mixed-media items don't include a sequential number (line 783 generates {identifier}/{slugged_id}/canvas), but the existing multi-file image handling includes pageCount in the canvas ID (line 1079: {identifier}${pageCount}/canvas). This inconsistency could cause issues if canvas IDs are expected to follow a specific pattern. Consider using a consistent ID pattern, either including the canvas number or ensuring the slugged filename is sufficient for uniqueness.

Copilot uses AI. Check for mistakes.

# Get image info
imgId = f"{identifier}/{file_name}".replace('/', '%2f')
imgURL = f"{IMG_SRV}/3/{imgId}"

# Get dimensions if available, otherwise use defaults
width = int(file.get('width', 1200))
height = int(file.get('height', 1800))

# Create canvas
canvas = Canvas(id=c_id, label=file_name, height=height, width=width)

# Create annotation page and annotation
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation",
motivation="painting",
target=canvas.id)

# Create body with IIIF image service
body = AnnotationBody(
id=f"{imgURL}/full/max/0/default.jpg",
type='Image',
format='image/jpeg',
height=height,
width=width
)
body.service = [ServiceV3(id=imgURL, profile="level2", type="ImageService3")]

anno.body = body
ap.add_item(anno)
canvas.add_item(ap)

return canvas

def create_video_canvas(identifier, file, metadata, domain, derivatives=None, vttfiles=None):
"""
Create a canvas for a video file.
Returns a Canvas object.
"""
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"

# Get video dimensions and duration
duration = float(file.get('length', 0))
height = int(file.get('height', 480))
width = int(file.get('width', 640))

# Create canvas
c = Canvas(id=c_id, label=normalised_id, duration=duration, height=height, width=width)

# Add vtt if present
if vttfiles and normalised_id in vttfiles:
vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt"
vttNo = 1
for vttFile in vttfiles[normalised_id]:
vtAnno = c.make_annotation(
id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
motivation="supplementing",
target=c.id,
anno_page_id=vttAPId,
body={
"id": f"{domain}resource/{identifier}/{vttFile['name']}",
"type": "Text",
"format": "text/vtt",
}
)
# add label and language
if vttFile['name'].endswith("autogenerated.vtt"):
vtAnno.body.label = {'en': ['autogenerated']}
else:
# Assume language
splitName = vttFile['name'].split(".")
lang = splitName[-2]
vtAnno.body.add_label(lang, language="none")
vtAnno.body.language = lang
vttNo += 1

# Create annotation page and annotation
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation",
motivation="painting",
target=c.id)

# Create body based on whether there are derivatives or not
if derivatives and file['name'] in derivatives:
body = Choice(items=[])
# Add the choices in order
for format in VIDEO_FORMATS:
if format in derivatives[file['name']]:
r = AnnotationBody(
id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Video',
format=to_mimetype(derivatives[file['name']][format]['name'], format),
label={"none": [format]},
duration=duration,
height=height,
width=width
)
body.items.append(r)
elif file['format'] == format:
r = AnnotationBody(
id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}",
type='Video',
format=to_mimetype(file['name'], format),
label={"none": [format]},
duration=duration,
height=height,
width=width
)
body.items.append(r)
else:
# No derivatives, use the original file
body = AnnotationBody(
id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}",
type='Video',
format=to_mimetype(file['name'], file['format']),
label={"none": [file['format']]},
duration=duration,
height=height,
width=width
)

anno.body = body
ap.add_item(anno)
c.add_item(ap)

return c

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand All @@ -761,7 +934,46 @@ def create_manifest3(identifier, domain=None, page=None):
addThumbnails(manifest, identifier, metadata['files'])
addPartOfCollection(manifest, metadata.get('metadata').get('collection', []), domain)

if mediatype == 'texts':
# Check for mixed-media items (both images and videos)
(has_images, has_videos) = check_mixed_media(metadata)

if has_images and has_videos:
# Handle mixed media: items with both original images and videos
# Sort files to ensure consistent ordering
sorted_files = sorted(metadata.get('files', []), key=lambda x: x.get('name', ''))

# Get derivatives and vtt files for video processing
(originals, derivatives, vttfiles) = sortDerivatives(metadata, includeVtt=True)

canvas_number = 0
for file in sorted_files:
if file.get('source') != 'original':
continue

file_format = file.get('format', '')
file_name = file.get('name', '')

# Add image canvas
if (file_format in IMAGE_FORMATS and
'thumb' not in file_name.lower() and
file_format not in ['JPEG Thumb', 'Thumbnail']):
try:
canvas = create_image_canvas(identifier, file, metadata, domain, canvas_number)
manifest.add_item(canvas)
canvas_number += 1
except Exception as e:
print(f'Failed to create image canvas for {file_name}: {e}')
Comment on lines +960 to +965
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception handling catches all exceptions broadly (except Exception as e:), which could mask unexpected errors like KeyError or AttributeError from bugs in the code. The existing image handling at lines 1084-1089 catches specific exceptions (requests.exceptions.HTTPError) and creates a fallback canvas with error information. Consider either: (1) catching specific expected exceptions, or (2) creating a fallback canvas similar to the existing pattern, or (3) logging the error more robustly rather than just printing.

Copilot uses AI. Check for mistakes.

# Add video canvas
elif file_format in VIDEO_FORMATS:
try:
canvas = create_video_canvas(identifier, file, metadata, domain, derivatives, vttfiles)
manifest.add_item(canvas)
canvas_number += 1
except Exception as e:
print(f'Failed to create video canvas for {file_name}: {e}')
Comment on lines +969 to +974
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception handling catches all exceptions broadly (except Exception as e:), which could mask unexpected errors. Consider catching specific expected exceptions or logging errors more robustly. The existing image handling at lines 1084-1089 catches specific exceptions and creates fallback canvases.

Copilot uses AI. Check for mistakes.

elif mediatype == 'texts':
# Get bookreader metadata (mostly for filenames and height / width of image)
# subprefix can be different from the identifier use the scandata filename to find the correct prefix
# if not present fall back to identifier
Expand Down
119 changes: 119 additions & 0 deletions tests/test_mixed_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import os
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The os module is imported but never used in this test file. Consider removing this unused import.

Suggested change
import os

Copilot uses AI. Check for mistakes.
import unittest
from flask.testing import FlaskClient
from iiify.app import app
from iiify.resolver import check_mixed_media

class TestMixedMedia(unittest.TestCase):

def setUp(self) -> None:
app.config['CACHE_TYPE'] = "NullCache"
self.test_app = FlaskClient(app)

def test_check_mixed_media_with_images_and_videos(self):
"""Test check_mixed_media function with both images and videos"""
metadata = {
'files': [
{'source': 'original', 'format': 'JPEG', 'name': 'photo1.jpg'},
{'source': 'original', 'format': 'MPEG4', 'name': 'video1.mp4'},
{'source': 'derivative', 'format': 'JPEG', 'name': 'photo1_thumb.jpg'},
]
}
has_images, has_videos = check_mixed_media(metadata)
self.assertTrue(has_images, "Should detect images")
self.assertTrue(has_videos, "Should detect videos")

def test_check_mixed_media_images_only(self):
"""Test check_mixed_media function with only images"""
metadata = {
'files': [
{'source': 'original', 'format': 'JPEG', 'name': 'photo1.jpg'},
{'source': 'original', 'format': 'JPEG', 'name': 'photo2.jpg'},
]
}
has_images, has_videos = check_mixed_media(metadata)
self.assertTrue(has_images, "Should detect images")
self.assertFalse(has_videos, "Should not detect videos")

def test_check_mixed_media_videos_only(self):
"""Test check_mixed_media function with only videos"""
metadata = {
'files': [
{'source': 'original', 'format': 'MPEG4', 'name': 'video1.mp4'},
{'source': 'original', 'format': 'h.264', 'name': 'video2.mp4'},
]
}
has_images, has_videos = check_mixed_media(metadata)
self.assertFalse(has_images, "Should not detect images")
self.assertTrue(has_videos, "Should detect videos")

def test_check_mixed_media_excludes_thumbnails(self):
"""Test that thumbnail images are excluded from mixed media detection"""
metadata = {
'files': [
{'source': 'original', 'format': 'JPEG Thumb', 'name': 'photo_thumb.jpg'},
{'source': 'original', 'format': 'MPEG4', 'name': 'video1.mp4'},
{'source': 'original', 'format': 'Thumbnail', 'name': 'thumb.jpg'},
]
}
has_images, has_videos = check_mixed_media(metadata)
self.assertFalse(has_images, "Should not detect thumbnail images as regular images")
self.assertTrue(has_videos, "Should detect videos")

def test_check_mixed_media_excludes_derivatives(self):
"""Test that derivative files are excluded from mixed media detection"""
metadata = {
'files': [
{'source': 'derivative', 'format': 'JPEG', 'name': 'photo1.jpg'},
{'source': 'derivative', 'format': 'MPEG4', 'name': 'video1.mp4'},
]
}
has_images, has_videos = check_mixed_media(metadata)
self.assertFalse(has_images, "Should not detect derivative images")
self.assertFalse(has_videos, "Should not detect derivative videos")

def test_v3_mixed_media_manifest_structure(self):
"""Test that a mixed-media manifest has the correct structure"""
# Test with the reference item mentioned in the issue
# Skip if network is unavailable
try:
resp = self.test_app.get("/iiif/3/2025-highland-house-walkthrough-ma/manifest.json")
except Exception as e:
self.skipTest(f"Network unavailable: {e}")

if resp.status_code != 200:
self.skipTest("Network or service unavailable")

manifest = resp.json

# Check basic manifest structure
self.assertEqual(manifest['type'], "Manifest", "Expected type to be Manifest")
self.assertTrue('items' in manifest, "Expected manifest to have items")
self.assertGreater(len(manifest['items']), 1, "Expected multiple canvases for mixed media item")

# Check that we have both image and video canvases
canvas_types = set()
for canvas in manifest['items']:
self.assertTrue('items' in canvas, "Expected canvas to have annotation pages")
if len(canvas['items']) > 0:
anno_page = canvas['items'][0]
if 'items' in anno_page and len(anno_page['items']) > 0:
annotation = anno_page['items'][0]
if 'body' in annotation:
body = annotation['body']
# Handle both direct body and Choice bodies
if isinstance(body, dict):
canvas_types.add(body.get('type', 'Unknown'))
elif hasattr(body, 'items'):
# Choice body
for item in body.items:
if hasattr(item, 'type'):
canvas_types.add(item.type)
Comment on lines +104 to +111
Copy link

Copilot AI Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test is using hasattr(body, 'items') on line 107, but body is a dict (as checked on line 105). The hasattr check will always return True because dicts have an 'items' method. This should check if 'items' is a key in the dict using 'items' in body or check if body is a Choice object. Similarly, line 110 checks hasattr(item, 'type') but item could be a dict from body.items (the dict method), not from the Choice items list.

Suggested change
# Handle both direct body and Choice bodies
if isinstance(body, dict):
canvas_types.add(body.get('type', 'Unknown'))
elif hasattr(body, 'items'):
# Choice body
for item in body.items:
if hasattr(item, 'type'):
canvas_types.add(item.type)
# Handle both direct body and Choice bodies (IIIF v3 JSON is dict-based)
if isinstance(body, dict):
body_type = body.get('type')
if body_type == 'Choice' and 'items' in body:
# Choice body: iterate through the list of items
for item in body['items']:
if isinstance(item, dict) and 'type' in item:
canvas_types.add(item['type'])
else:
# Direct body: use its type
canvas_types.add(body.get('type', 'Unknown'))

Copilot uses AI. Check for mistakes.

# We expect to see both Image and Video types in a mixed-media manifest
# This assertion may need adjustment based on actual item content
self.assertTrue(len(canvas_types) > 0, "Expected to find canvas content types")


if __name__ == '__main__':
unittest.main()
Loading