From c2753cf00a163c3f7451550090b5b5c9f8596f22 Mon Sep 17 00:00:00 2001 From: Mark Baggett Date: Thu, 16 Oct 2025 15:00:45 -0500 Subject: [PATCH 1/4] Refactor for doc. --- iiify/search.py | 25 ++++++++++--------------- tests/test_search.py | 2 +- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/iiify/search.py b/iiify/search.py index a44e219..55c3a34 100644 --- a/iiify/search.py +++ b/iiify/search.py @@ -1,29 +1,25 @@ import requests from .resolver import ARCHIVE, URI_PRIFIX from bs4 import BeautifulSoup -import json -def buildSearchURL(identifier, query): +def build_search_url(identifier, query): response = requests.get(f"{ARCHIVE}/metadata/{identifier}") - response.raise_for_status() - + response.raise_for_status() metadata = response.json() - - return f"https://{metadata['server']}/fulltext/inside.php?item_id={identifier}&doc={identifier}&path={metadata['dir']}&q={query}" + # doc = metadata['files'][0]['name'].split('.')[0] + doc = [file['name'].split('_')[0] for file in metadata["files"] if '_djvu.xml' in file['name']][0] + return f"https://{metadata['server']}/fulltext/inside.php?item_id={identifier}&doc={doc}&path={metadata['dir']}&q={query}" def iiif_search(identifier, query): - url = buildSearchURL(identifier, query) - - # print (f"Search URL:\n{url}") + url = build_search_url(identifier, query) response = requests.get(url) response.raise_for_status() ia_response = response.json() - # print (json.dumps(ia_response, indent=4)) searchResponse = { "@context":"http://iiif.io/api/presentation/2/context.json", "@id": f"{URI_PRIFIX}/search/{identifier}?q={query}", - "@type":"sc:AnnotationList", + "@type": "sc:AnnotationList", "resources": [ ] @@ -51,8 +47,7 @@ def iiif_search(identifier, query): for box in paragraph['boxes']: x = int(box['l']) - y = int (box['t']) - right = 0 + y = int(box['t']) # If r is missing then use the paragraph if 'r' in box: right = int(box['r']) @@ -61,7 +56,7 @@ def iiif_search(identifier, query): width = right - x height = int(box['b']) - y - page = int(paragraph['page']) - 1 + page = int(paragraph['page']) - 1 if "leaf0_missing" in ia_response and ia_response['leaf0_missing'] == False: page = int(paragraph['page']) @@ -82,4 +77,4 @@ def iiif_search(identifier, query): matchNo += 1 count += 1 - return searchResponse \ No newline at end of file + return searchResponse diff --git a/tests/test_search.py b/tests/test_search.py index 5d40e64..610e7a1 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -74,7 +74,7 @@ def mock_response(url, *args, **kwargs): metadataPatch.side_effect = mock_response - self.assertEqual(search.buildSearchURL("journalofexpedit00ford", "query"), "https://ia601302.us.archive.org/fulltext/inside.php?item_id=journalofexpedit00ford&doc=journalofexpedit00ford&path=/31/items/journalofexpedit00ford&q=query", "Unexpected search query") + self.assertEqual(search.build_search_url("journalofexpedit00ford", "query"), "https://ia601302.us.archive.org/fulltext/inside.php?item_id=journalofexpedit00ford&doc=journalofexpedit00ford&path=/31/items/journalofexpedit00ford&q=query", "Unexpected search query") @patch("requests.get") def multi_box(self, searchPatch): From ca20386622deecd6ca542d955b776d56a3092cc5 Mon Sep 17 00:00:00 2001 From: Mark Baggett Date: Thu, 16 Oct 2025 15:56:30 -0500 Subject: [PATCH 2/4] Split more safely. --- iiify/search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/iiify/search.py b/iiify/search.py index 55c3a34..cfc5e3c 100644 --- a/iiify/search.py +++ b/iiify/search.py @@ -6,8 +6,7 @@ def build_search_url(identifier, query): response = requests.get(f"{ARCHIVE}/metadata/{identifier}") response.raise_for_status() metadata = response.json() - # doc = metadata['files'][0]['name'].split('.')[0] - doc = [file['name'].split('_')[0] for file in metadata["files"] if '_djvu.xml' in file['name']][0] + doc = [file['name'].split('_djvu.xml')[0] for file in metadata["files"] if '_djvu.xml' in file['name']][0] return f"https://{metadata['server']}/fulltext/inside.php?item_id={identifier}&doc={doc}&path={metadata['dir']}&q={query}" def iiif_search(identifier, query): From 07a393a65f01a2242317e8697fd7b75b3a513cd6 Mon Sep 17 00:00:00 2001 From: Mark Baggett Date: Thu, 20 Nov 2025 15:52:43 -0600 Subject: [PATCH 3/4] Add test for building_search_url. --- tests/test_search.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_search.py b/tests/test_search.py index 610e7a1..233f6dc 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,5 +1,3 @@ -import os - import unittest from unittest.mock import patch from flask.testing import FlaskClient @@ -162,4 +160,17 @@ def test_matching_characters_not_empty(self): results = resp.json for result in results["resources"]: self.assertTrue(isinstance(result, dict)) - self.assertEqual(result["resource"]["chars"], "Brunswick") \ No newline at end of file + self.assertEqual(result["resource"]["chars"], "Brunswick") + + +class TestBuildSearchUrl(unittest.TestCase): + def test_real_gray_diary_item(self): + """Test that doc parameter matches the original filename minus extension""" + result = search.build_search_url('gray-diary', 'Houston') + + self.assertIn('item_id=gray-diary', result) + self.assertIn('doc=GrayDiary', result) + self.assertIn('q=Houston', result) + + response = requests.get(result) + self.assertEqual(response.status_code, 200) From 01b7e6ba7c836dc41c85e98565debc2e1fe0546a Mon Sep 17 00:00:00 2001 From: Mark Baggett Date: Thu, 20 Nov 2025 17:04:27 -0600 Subject: [PATCH 4/4] Swtich classes. --- tests/test_search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_search.py b/tests/test_search.py index 233f6dc..0beb2ad 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -163,7 +163,6 @@ def test_matching_characters_not_empty(self): self.assertEqual(result["resource"]["chars"], "Brunswick") -class TestBuildSearchUrl(unittest.TestCase): def test_real_gray_diary_item(self): """Test that doc parameter matches the original filename minus extension""" result = search.build_search_url('gray-diary', 'Houston')