|
| 1 | +# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_workflow_status_triggers.py |
| 2 | +from unittest.mock import Mock, patch |
| 3 | + |
| 4 | +import pytest |
| 5 | +from django.db import transaction |
| 6 | +from django.test import TestCase, TransactionTestCase |
| 7 | + |
| 8 | +from sde_collections.models.collection_choice_fields import ( |
| 9 | + ReindexingStatusChoices, |
| 10 | + WorkflowStatusChoices, |
| 11 | +) |
| 12 | +from sde_collections.models.delta_url import DeltaUrl, DumpUrl |
| 13 | +from sde_collections.tasks import fetch_and_replace_full_text |
| 14 | +from sde_collections.tests.factories import CollectionFactory, DumpUrlFactory |
| 15 | + |
| 16 | + |
| 17 | +class TestWorkflowStatusTransitions(TestCase): |
| 18 | + def setUp(self): |
| 19 | + self.collection = CollectionFactory() |
| 20 | + |
| 21 | + @patch("sde_collections.models.collection.Collection.create_scraper_config") |
| 22 | + @patch("sde_collections.models.collection.Collection.create_indexer_config") |
| 23 | + def test_ready_for_engineering_triggers_config_creation(self, mock_indexer, mock_scraper): |
| 24 | + """When status changes to READY_FOR_ENGINEERING, it should create configs""" |
| 25 | + self.collection.workflow_status = WorkflowStatusChoices.READY_FOR_ENGINEERING |
| 26 | + self.collection.save() |
| 27 | + |
| 28 | + mock_scraper.assert_called_once_with(overwrite=False) |
| 29 | + mock_indexer.assert_called_once_with(overwrite=False) |
| 30 | + |
| 31 | + @patch("sde_collections.tasks.fetch_and_replace_full_text.delay") |
| 32 | + def test_indexing_finished_triggers_full_text_fetch(self, mock_fetch): |
| 33 | + """When status changes to INDEXING_FINISHED_ON_DEV, it should trigger full text fetch""" |
| 34 | + self.collection.workflow_status = WorkflowStatusChoices.INDEXING_FINISHED_ON_DEV |
| 35 | + self.collection.save() |
| 36 | + |
| 37 | + mock_fetch.assert_called_once_with(self.collection.id, "lrm_dev") |
| 38 | + |
| 39 | + @patch("sde_collections.models.collection.Collection.create_plugin_config") |
| 40 | + def test_ready_for_curation_triggers_plugin_config(self, mock_plugin): |
| 41 | + """When status changes to READY_FOR_CURATION, it should create plugin config""" |
| 42 | + self.collection.workflow_status = WorkflowStatusChoices.READY_FOR_CURATION |
| 43 | + self.collection.save() |
| 44 | + |
| 45 | + mock_plugin.assert_called_once_with(overwrite=True) |
| 46 | + |
| 47 | + @patch("sde_collections.models.collection.Collection.promote_to_curated") |
| 48 | + def test_curated_triggers_promotion(self, mock_promote): |
| 49 | + """When status changes to CURATED, it should promote DeltaUrls to CuratedUrls""" |
| 50 | + self.collection.workflow_status = WorkflowStatusChoices.CURATED |
| 51 | + self.collection.save() |
| 52 | + |
| 53 | + mock_promote.assert_called_once() |
| 54 | + |
| 55 | + @patch("sde_collections.models.collection.Collection.add_to_public_query") |
| 56 | + def test_quality_check_perfect_triggers_public_query(self, mock_add): |
| 57 | + """When status changes to QUALITY_CHECK_PERFECT, it should add to public query""" |
| 58 | + self.collection.workflow_status = WorkflowStatusChoices.QUALITY_CHECK_PERFECT |
| 59 | + self.collection.save() |
| 60 | + |
| 61 | + mock_add.assert_called_once() |
| 62 | + |
| 63 | + |
| 64 | +class TestReindexingStatusTransitions(TestCase): |
| 65 | + def setUp(self): |
| 66 | + self.collection = CollectionFactory( |
| 67 | + workflow_status=WorkflowStatusChoices.QUALITY_CHECK_PERFECT, |
| 68 | + reindexing_status=ReindexingStatusChoices.REINDEXING_NOT_NEEDED, |
| 69 | + ) |
| 70 | + |
| 71 | + @patch("sde_collections.tasks.fetch_and_replace_full_text.delay") |
| 72 | + def test_reindexing_finished_triggers_full_text_fetch(self, mock_fetch): |
| 73 | + """When reindexing status changes to FINISHED, it should trigger full text fetch""" |
| 74 | + self.collection.reindexing_status = ReindexingStatusChoices.REINDEXING_FINISHED_ON_DEV |
| 75 | + self.collection.save() |
| 76 | + |
| 77 | + mock_fetch.assert_called_once_with(self.collection.id, "lrm_dev") |
| 78 | + |
| 79 | + @patch("sde_collections.models.collection.Collection.promote_to_curated") |
| 80 | + def test_reindexing_curated_triggers_promotion(self, mock_promote): |
| 81 | + """When reindexing status changes to CURATED, it should promote DeltaUrls""" |
| 82 | + self.collection.reindexing_status = ReindexingStatusChoices.REINDEXING_CURATED |
| 83 | + self.collection.save() |
| 84 | + |
| 85 | + mock_promote.assert_called_once() |
| 86 | + |
| 87 | + |
| 88 | +class TestFullTextImport(TestCase): |
| 89 | + def setUp(self): |
| 90 | + self.collection = CollectionFactory() |
| 91 | + self.existing_dump = DumpUrlFactory(collection=self.collection) |
| 92 | + self.api_response = [ |
| 93 | + {"url": "http://example.com/1", "title": "Title 1", "full_text": "Content 1"}, |
| 94 | + {"url": "http://example.com/2", "title": "Title 2", "full_text": "Content 2"}, |
| 95 | + ] |
| 96 | + |
| 97 | + @patch("sde_collections.tasks.Api") |
| 98 | + @patch("sde_collections.models.collection.GitHubHandler") |
| 99 | + def test_full_text_import_workflow(self, MockGitHub, MockApi): |
| 100 | + """Test the full process of importing full text data""" |
| 101 | + # Setup mock GitHub handler with proper XML content |
| 102 | + mock_github = Mock() |
| 103 | + mock_github.check_file_exists.return_value = True |
| 104 | + mock_file_contents = Mock() |
| 105 | + # Include all the fields that convert_template_to_plugin_indexer checks for |
| 106 | + mock_xml = """<?xml version="1.0" encoding="UTF-8"?> |
| 107 | + <Sinequa> |
| 108 | + <KeepHashFragmentInUrl>false</KeepHashFragmentInUrl> |
| 109 | + <CorrectDomainCookies>false</CorrectDomainCookies> |
| 110 | + <IgnoreSessionCookies>false</IgnoreSessionCookies> |
| 111 | + <DownloadImages>false</DownloadImages> |
| 112 | + <DownloadMedia>false</DownloadMedia> |
| 113 | + <DownloadCss>false</DownloadCss> |
| 114 | + <DownloadFtp>true</DownloadFtp> |
| 115 | + <DownloadFile>true</DownloadFile> |
| 116 | + <IndexJs>false</IndexJs> |
| 117 | + <FollowJs>true</FollowJs> |
| 118 | + <CrawlFlash>true</CrawlFlash> |
| 119 | + <NormalizeUrls>true</NormalizeUrls> |
| 120 | + <NormalizeSecureSchemesWhenTestingVisited>True</NormalizeSecureSchemesWhenTestingVisited> |
| 121 | + <UrlAccess> |
| 122 | + <AllowXPathCookies>false</AllowXPathCookies> |
| 123 | + <UseBrowserForWebRequests>true</UseBrowserForWebRequests> |
| 124 | + <UseHttpClientForWebRequests>false</UseHttpClientForWebRequests> |
| 125 | + </UrlAccess> |
| 126 | + <RetryCount></RetryCount> |
| 127 | + <RetryPause></RetryPause> |
| 128 | + <AddBaseHref></AddBaseHref> |
| 129 | + <AddMetaContentType></AddMetaContentType> |
| 130 | + </Sinequa>""" |
| 131 | + mock_file_contents.decoded_content = mock_xml.encode("utf-8") |
| 132 | + mock_github._get_file_contents.return_value = mock_file_contents |
| 133 | + MockGitHub.return_value = mock_github |
| 134 | + |
| 135 | + # Setup mock API |
| 136 | + mock_api = Mock() |
| 137 | + mock_api.get_full_texts.return_value = [self.api_response] |
| 138 | + MockApi.return_value = mock_api |
| 139 | + |
| 140 | + # Setup initial workflow state |
| 141 | + self.collection.workflow_status = WorkflowStatusChoices.INDEXING_FINISHED_ON_DEV |
| 142 | + self.collection.save() |
| 143 | + |
| 144 | + # Run the import |
| 145 | + fetch_and_replace_full_text(self.collection.id, "lrm_dev") |
| 146 | + |
| 147 | + # Verify old DumpUrls were cleared |
| 148 | + assert not DumpUrl.objects.filter(id=self.existing_dump.id).exists() |
| 149 | + |
| 150 | + # Verify new Delta urls were created |
| 151 | + new_deltas = DeltaUrl.objects.filter(collection=self.collection) |
| 152 | + assert new_deltas.count() == 2 |
| 153 | + assert {dump.url for dump in new_deltas} == {"http://example.com/1", "http://example.com/2"} |
| 154 | + |
| 155 | + # Verify status updates |
| 156 | + self.collection.refresh_from_db() |
| 157 | + assert self.collection.workflow_status == WorkflowStatusChoices.READY_FOR_CURATION |
| 158 | + |
| 159 | + |
| 160 | +class TestErrorHandling(TransactionTestCase): |
| 161 | + def setUp(self): |
| 162 | + self.collection = CollectionFactory(workflow_status=WorkflowStatusChoices.RESEARCH_IN_PROGRESS) |
| 163 | + |
| 164 | + @patch("sde_collections.models.collection.Collection.create_scraper_config") |
| 165 | + @patch("sde_collections.models.collection.Collection.create_indexer_config") |
| 166 | + def test_config_creation_failure_handling(self, mock_indexer, mock_scraper): |
| 167 | + """Test handling of config creation failures""" |
| 168 | + mock_scraper.side_effect = Exception("Config creation failed") |
| 169 | + |
| 170 | + initial_status = self.collection.workflow_status |
| 171 | + |
| 172 | + with pytest.raises(Exception): |
| 173 | + with transaction.atomic(): |
| 174 | + self.collection.workflow_status = WorkflowStatusChoices.READY_FOR_ENGINEERING |
| 175 | + self.collection.save() |
| 176 | + |
| 177 | + # Verify status wasn't changed on error |
| 178 | + self.collection.refresh_from_db() |
| 179 | + assert self.collection.workflow_status == initial_status |
| 180 | + |
| 181 | + @patch("sde_collections.tasks.Api") |
| 182 | + def test_full_text_fetch_failure_handling(self, MockApi): |
| 183 | + """Test handling of full text fetch failures""" |
| 184 | + mock_api = Mock() |
| 185 | + mock_api.get_full_texts.side_effect = Exception("API error") |
| 186 | + MockApi.return_value = mock_api |
| 187 | + |
| 188 | + initial_status = self.collection.workflow_status |
| 189 | + |
| 190 | + with pytest.raises(Exception): |
| 191 | + fetch_and_replace_full_text(self.collection.id, "lrm_dev") |
| 192 | + |
| 193 | + # Verify status wasn't changed on error |
| 194 | + self.collection.refresh_from_db() |
| 195 | + assert self.collection.workflow_status == initial_status |
0 commit comments