Skip to content

Commit 016442f

Browse files
author
Your Name
committed
Modifications post PR review
1 parent 2199843 commit 016442f

File tree

4 files changed

+16
-29
lines changed

4 files changed

+16
-29
lines changed

sde_collections/models/collection.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -634,22 +634,6 @@ def apply_all_patterns(self):
634634
for pattern in self.deltadivisionpatterns.all():
635635
pattern.apply()
636636

637-
def count_curated_urls(self):
638-
"""Return the count of Curated URLs for the collection."""
639-
return CuratedUrl.objects.filter(collection=self).count()
640-
641-
def count_dump_urls(self):
642-
"""Return the count of all Dump URLs for the collection."""
643-
return DumpUrl.objects.filter(collection=self).count()
644-
645-
def count_delta_urls(self):
646-
"""Return the count of Delta URLs identified."""
647-
return DeltaUrl.objects.filter(collection=self).count()
648-
649-
def count_marked_for_deletion_urls(self):
650-
"""Return the count of Delta URLs marked for deletion."""
651-
return DeltaUrl.objects.filter(collection=self, to_delete=True).count()
652-
653637
def save(self, *args, **kwargs):
654638
# Call the function to generate the value for the generated_field based on the original_field
655639
if not self.config_folder:

sde_collections/sinequa_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def get_full_texts(
257257
if total_count is None:
258258
total_count = response.get("TotalRowCount", 0)
259259

260-
yield self._process_rows_to_records(rows)
260+
yield (self._process_rows_to_records(rows),total_count)
261261

262262
current_offset += len(rows)
263263

sde_collections/tasks.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
)
1717
from sde_collections.utils import slack_utils
1818

19-
from .models.delta_url import DumpUrl
19+
from .models.delta_url import DumpUrl,DeltaUrl,CuratedUrl
2020
from .sinequa_api import Api
2121
from .utils.github_helper import GitHubHandler
2222

@@ -174,11 +174,13 @@ def fetch_and_replace_full_text(collection_id, server_name):
174174
# Step 1: Delete existing DumpUrl entries
175175
deleted_count, _ = DumpUrl.objects.filter(collection=collection).delete()
176176
print(f"Deleted {deleted_count} old records.")
177-
177+
total_server_count=0
178178
try:
179179
# Step 2: Process data in batches
180180
total_processed = 0
181-
for batch in api.get_full_texts(collection.config_folder):
181+
for batch,total_count in api.get_full_texts(collection.config_folder):
182+
if total_server_count == 0:
183+
total_server_count = total_count
182184
with transaction.atomic():
183185
DumpUrl.objects.bulk_create(
184186
[
@@ -194,9 +196,15 @@ def fetch_and_replace_full_text(collection_id, server_name):
194196
total_processed += len(batch)
195197
print(f"Processed batch of {len(batch)} records. Total: {total_processed}")
196198

199+
dump_count = DumpUrl.objects.filter(collection=collection).count()
200+
197201
# Step 3: Migrate dump URLs to delta URLs
198202
collection.migrate_dump_to_delta()
199203

204+
curated_count = CuratedUrl.objects.filter(collection=collection).count()
205+
delta_count = DeltaUrl.objects.filter(collection=collection).count()
206+
marked_for_deletion_count = DeltaUrl.objects.filter(collection=collection, to_delete=True).count()
207+
200208
# Step 4: Update statuses if needed
201209
collection.refresh_from_db()
202210

@@ -216,13 +224,8 @@ def fetch_and_replace_full_text(collection_id, server_name):
216224
collection.reindexing_status = ReindexingStatusChoices.REINDEXING_READY_FOR_CURATION
217225
collection.save()
218226

219-
curated_count = collection.count_curated_urls()
220-
dump_count = collection.count_dump_urls()
221-
delta_count = collection.count_delta_urls()
222-
deletion_count = collection.count_marked_for_deletion_urls()
223-
224227
slack_utils.send_detailed_import_notification(
225-
collection.name, total_processed, curated_count, dump_count, delta_count, deletion_count
228+
collection.name, total_server_count, curated_count, dump_count, delta_count, marked_for_deletion_count
226229
)
227230
return f"Successfully processed {total_processed} records and updated the database."
228231

sde_collections/utils/slack_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ def format_slack_message(name, details, collection_id):
6060

6161

6262
def send_detailed_import_notification(
63-
collection_name, total_processed, curated_count, dump_count, delta_count, deletion_count
63+
collection_name, total_server_count, curated_count, dump_count, delta_count, marked_for_deletion_count
6464
):
6565
message = (
6666
f"'{collection_name}' brought into COSMOS. "
67-
f"Prior Curated: {curated_count}, URL counts - [Server: {total_processed}, "
68-
f"Dump: {dump_count}, New Deltas: {delta_count}, Deleted: {deletion_count}]"
67+
f"Prior Curated: {curated_count}, URL Counts - [Server: {total_server_count}, "
68+
f"URLs Imported: {dump_count}, New Deltas: {delta_count}, Marked For Deletion: {marked_for_deletion_count}]"
6969
)
7070

7171
webhook_url = settings.SLACK_WEBHOOK_URL

0 commit comments

Comments
 (0)