16
16
)
17
17
from sde_collections .utils import slack_utils
18
18
19
- from .models .delta_url import DumpUrl
19
+ from .models .delta_url import DumpUrl , DeltaUrl , CuratedUrl
20
20
from .sinequa_api import Api
21
21
from .utils .github_helper import GitHubHandler
22
22
@@ -174,11 +174,13 @@ def fetch_and_replace_full_text(collection_id, server_name):
174
174
# Step 1: Delete existing DumpUrl entries
175
175
deleted_count , _ = DumpUrl .objects .filter (collection = collection ).delete ()
176
176
print (f"Deleted { deleted_count } old records." )
177
-
177
+ total_server_count = 0
178
178
try :
179
179
# Step 2: Process data in batches
180
180
total_processed = 0
181
- for batch in api .get_full_texts (collection .config_folder ):
181
+ for batch ,total_count in api .get_full_texts (collection .config_folder ):
182
+ if total_server_count == 0 :
183
+ total_server_count = total_count
182
184
with transaction .atomic ():
183
185
DumpUrl .objects .bulk_create (
184
186
[
@@ -194,9 +196,15 @@ def fetch_and_replace_full_text(collection_id, server_name):
194
196
total_processed += len (batch )
195
197
print (f"Processed batch of { len (batch )} records. Total: { total_processed } " )
196
198
199
+ dump_count = DumpUrl .objects .filter (collection = collection ).count ()
200
+
197
201
# Step 3: Migrate dump URLs to delta URLs
198
202
collection .migrate_dump_to_delta ()
199
203
204
+ curated_count = CuratedUrl .objects .filter (collection = collection ).count ()
205
+ delta_count = DeltaUrl .objects .filter (collection = collection ).count ()
206
+ marked_for_deletion_count = DeltaUrl .objects .filter (collection = collection , to_delete = True ).count ()
207
+
200
208
# Step 4: Update statuses if needed
201
209
collection .refresh_from_db ()
202
210
@@ -216,13 +224,8 @@ def fetch_and_replace_full_text(collection_id, server_name):
216
224
collection .reindexing_status = ReindexingStatusChoices .REINDEXING_READY_FOR_CURATION
217
225
collection .save ()
218
226
219
- curated_count = collection .count_curated_urls ()
220
- dump_count = collection .count_dump_urls ()
221
- delta_count = collection .count_delta_urls ()
222
- deletion_count = collection .count_marked_for_deletion_urls ()
223
-
224
227
slack_utils .send_detailed_import_notification (
225
- collection .name , total_processed , curated_count , dump_count , delta_count , deletion_count
228
+ collection .name , total_server_count , curated_count , dump_count , delta_count , marked_for_deletion_count
226
229
)
227
230
return f"Successfully processed { total_processed } records and updated the database."
228
231
0 commit comments