Skip to content

Commit b1fb75c

Browse files
committed
fix two errors for title test
1 parent 6c8961f commit b1fb75c

File tree

2 files changed

+74
-67
lines changed

2 files changed

+74
-67
lines changed
Lines changed: 64 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,69 @@
11
import unittest
22
from unittest.mock import MagicMock, patch
33

4-
import title
4+
from . import title
5+
56

67
class TestGenerateTitle(unittest.TestCase):
7-
def test_prefers_metadata_title_if_valid(self):
8-
doc = MagicMock()
9-
doc.metadata = {"title": "A Study Regarding The Efficacy of Drugs"}
10-
self.assertEqual("A Study Regarding The Efficacy of Drugs", title.generate_title(doc))
11-
12-
def test_falls_back_to_first_page_text_if_metadata_title_is_empty(self):
13-
doc = MagicMock()
14-
doc.metadata = {"title": ""}
15-
doc[0].get_text = MagicMock()
16-
17-
foo_block = [None] * 7
18-
foo_block[4] = "foo"
19-
foo_block[6] = 0
20-
21-
title_block = [None] * 7
22-
title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
23-
title_block[6] = 0
24-
25-
bar_block = [None] * 7
26-
bar_block[4] = "bar"
27-
bar_block[6] = 0
28-
doc[0].get_text.return_value = [foo_block, title_block, bar_block]
29-
30-
expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
31-
self.assertEqual(expected_title, title.generate_title(doc))
32-
33-
def test_falls_back_to_first_page_text_if_metadata_title_does_not_match_regex(self):
34-
doc = MagicMock()
35-
doc.metadata = {"title": "abcd1234"}
36-
doc[0].get_text = MagicMock()
37-
38-
foo_block = [None] * 7
39-
foo_block[4] = "foo"
40-
foo_block[6] = 0
41-
42-
title_block = [None] * 7
43-
title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
44-
title_block[6] = 0
45-
46-
bar_block = [None] * 7
47-
bar_block[4] = "bar"
48-
bar_block[6] = 0
49-
doc[0].get_text.return_value = [foo_block, title_block, bar_block]
50-
51-
expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
52-
self.assertEqual(expected_title, title.generate_title(doc))
53-
54-
@patch("server.api.services.openai_services.openAIServices.openAI")
55-
def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI):
56-
doc = MagicMock()
57-
doc.metadata = {"title": None}
58-
doc.get_text.return_value = []
59-
60-
mock_response = MagicMock()
61-
mock_response.choices = [MagicMock()]
62-
mock_response.choices[0].message.content = "A Study Regarding The Efficacy of Drugs"
63-
mock_openAI.return_value = mock_response
64-
65-
title.generate_title(doc)
66-
67-
self.assertTrue(mock_openAI.called)
8+
def test_prefers_metadata_title_if_valid(self):
9+
doc = MagicMock()
10+
doc.metadata = {"title": "A Study Regarding The Efficacy of Drugs"}
11+
self.assertEqual(
12+
"A Study Regarding The Efficacy of Drugs", title.generate_title(doc))
13+
14+
def test_falls_back_to_first_page_text_if_metadata_title_is_empty(self):
15+
doc = MagicMock()
16+
doc.metadata = {"title": ""}
17+
doc[0].get_text = MagicMock()
18+
19+
foo_block = [None] * 7
20+
foo_block[4] = "foo"
21+
foo_block[6] = 0
22+
23+
title_block = [None] * 7
24+
title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
25+
title_block[6] = 0
26+
27+
bar_block = [None] * 7
28+
bar_block[4] = "bar"
29+
bar_block[6] = 0
30+
doc[0].get_text.return_value = [foo_block, title_block, bar_block]
31+
32+
expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
33+
self.assertEqual(expected_title, title.generate_title(doc))
34+
35+
def test_falls_back_to_first_page_text_if_metadata_title_does_not_match_regex(self):
36+
doc = MagicMock()
37+
doc.metadata = {"title": "abcd1234"}
38+
doc[0].get_text = MagicMock()
39+
40+
foo_block = [None] * 7
41+
foo_block[4] = "foo"
42+
foo_block[6] = 0
43+
44+
title_block = [None] * 7
45+
title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
46+
title_block[6] = 0
47+
48+
bar_block = [None] * 7
49+
bar_block[4] = "bar"
50+
bar_block[6] = 0
51+
doc[0].get_text.return_value = [foo_block, title_block, bar_block]
52+
53+
expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
54+
self.assertEqual(expected_title, title.generate_title(doc))
55+
56+
@patch("api.services.openai_services.openAIServices.openAI")
57+
def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI):
58+
doc = MagicMock()
59+
doc.metadata = {"title": None}
60+
doc.get_text.return_value = []
61+
62+
mock_response = MagicMock()
63+
mock_response.choices = [MagicMock()]
64+
mock_response.choices[0].message.content = "A Study Regarding The Efficacy of Drugs"
65+
mock_openAI.return_value = mock_response
66+
67+
title.generate_title(doc)
68+
69+
self.assertTrue(mock_openAI.called)

server/api/views/uploadFile/title.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import fitz
44

5-
from server.api.services.openai_services import openAIServices
5+
from api.services.openai_services import openAIServices
6+
67

78
# regular expression to match common research white paper titles. Created by Chat-gpt
89
# requires at least 3 words, no dates, no version numbers.
9-
title_regex = re.compile(r'^(?=(?:\b\w+\b[\s:,\-\(\)]*){3,})(?!.*\b(?:19|20)\d{2}\b)(?!.*\bv\d+\b)[A-Za-z0-9][\w\s:,\-\(\)]*[A-Za-z\)]$', re.IGNORECASE)
10+
title_regex = re.compile(
11+
r'^(?=(?:\b\w+\b[\s:,\-\(\)]*){3,})(?!.*\b(?:19|20)\d{2}\b)(?!.*\bv\d+\b)[A-Za-z0-9][\w\s:,\-\(\)]*[A-Za-z\)]$', re.IGNORECASE)
12+
1013

1114
def generate_title(pdf: fitz.Document) -> str | None:
1215
document_metadata_title = pdf.metadata["title"]
@@ -23,7 +26,7 @@ def generate_title(pdf: fitz.Document) -> str | None:
2326
text_blocks = [
2427
block[4].strip().replace("\n", " ")
2528
for block in first_page_blocks
26-
if block[6] == 0 # only include text blocks.
29+
if block[6] == 0 # only include text blocks.
2730
]
2831

2932
# For some reason, extracted PDF text has extra spaces. Collapse them here.
@@ -35,7 +38,8 @@ def generate_title(pdf: fitz.Document) -> str | None:
3538
if title_regex.match(text):
3639
return text
3740

38-
print("no suitable title found in first page text. Using GPT-4 to summarize the PDF")
41+
print(
42+
"no suitable title found in first page text. Using GPT-4 to summarize the PDF")
3943
gpt_title = summarize_pdf(pdf)
4044
return gpt_title or None
4145

@@ -52,5 +56,6 @@ def summarize_pdf(pdf: fitz.Document) -> str:
5256

5357
# UploadFile model title is limited to 255 chars.
5458
prompt = "Please provide a title for this document. The title should be less than 256 characters and will be displayed on a webpage."
55-
response = openAIServices.openAI(first_page_content, prompt, model='gpt-4o', temp=0.0)
59+
response = openAIServices.openAI(
60+
first_page_content, prompt, model='gpt-4o', temp=0.0)
5661
return response.choices[0].message.content

0 commit comments

Comments
 (0)