Skip to content

Commit b7a1106

Browse files
tw4likreymer
andauthored
Track dedupe index file size in org storage stats (#3207)
Fixes #3206 ## Backend changes - Adds size of dedupe index files to org storage stats - Removes bytes from org storage stats when dedupe index is deleted if file was saved - Adds new stats to org metrics endpoint - Adds `await` that was missing in method for updating index state - Updates (and fixes) test for org metrics endpoint ## Frontend changes - Includes dedupe index files in Misc storage in dashboard storage meter - Updates type for org metrics --------- Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
1 parent fba3ec4 commit b7a1106

File tree

8 files changed

+70
-26
lines changed

8 files changed

+70
-26
lines changed

backend/btrixcloud/colls.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ async def create_dedupe_index(self, coll: Collection, org: Organization):
731731
# enable index by setting indexState to a non-null value
732732
# and setting stats to zeroed out default
733733
await self.update_dedupe_index_info(
734-
coll.id, state="initing" if coll.crawlCount else "idle"
734+
coll.id, org.id, state="initing" if coll.crawlCount else "idle"
735735
)
736736

737737
await self.update_dedupe_index_stats(coll.id, DedupeIndexStats())
@@ -793,7 +793,7 @@ async def run_index_import_job(
793793
if job_type in ("import", "purge"):
794794
# if job created, update state here so its reflected in the UI more quickly
795795
await self.update_dedupe_index_info(
796-
coll_id, state="purging" if job_type == "purge" else "importing"
796+
coll_id, oid, state="purging" if job_type == "purge" else "importing"
797797
)
798798

799799
async def delete_dedupe_index(
@@ -817,6 +817,10 @@ async def delete_dedupe_index(
817817
)
818818
raise HTTPException(status_code=400, detail="file_deletion_error")
819819

820+
await self.orgs.inc_org_bytes_stored_field(
821+
org.id, "bytesStoredDedupeIndexes", -coll.indexFile.size
822+
)
823+
820824
await self.collections.find_one_and_update(
821825
{"_id": coll.id},
822826
{
@@ -848,7 +852,7 @@ async def update_dedupe_index_stats(
848852
self, coll_id: UUID, stats: DedupeIndexStats, disk_space_used: int = 0
849853
):
850854
"""update dedupe index stats for specified collection"""
851-
self.collections.find_one_and_update(
855+
await self.collections.find_one_and_update(
852856
{"_id": coll_id, "indexState": {"$ne": None}},
853857
{
854858
"$set": {
@@ -861,11 +865,12 @@ async def update_dedupe_index_stats(
861865
async def update_dedupe_index_info(
862866
self,
863867
coll_id: UUID,
868+
oid: UUID,
864869
state: TYPE_DEDUPE_INDEX_STATES,
865870
index_file: Optional[DedupeIndexFile] = None,
866871
dt: Optional[datetime] = None,
867872
if_exists=False,
868-
):
873+
) -> bool:
869874
"""update the state, and optionally, dedupe index file info"""
870875
query: dict[str, Any] = {"indexState": state}
871876
if index_file and dt:
@@ -877,7 +882,23 @@ async def update_dedupe_index_info(
877882
if if_exists:
878883
match["indexState"] = {"$ne": None}
879884

880-
res = self.collections.find_one_and_update(match, {"$set": query})
885+
res = await self.collections.find_one_and_update(
886+
match,
887+
{"$set": query},
888+
return_document=pymongo.ReturnDocument.BEFORE,
889+
)
890+
891+
if index_file:
892+
size_diff = index_file.size
893+
894+
prev_coll = Collection.from_dict(res) if res else None
895+
if prev_coll and prev_coll.indexFile:
896+
size_diff = index_file.size - prev_coll.indexFile.size
897+
898+
await self.orgs.inc_org_bytes_stored_field(
899+
oid, "bytesStoredDedupeIndexes", size_diff
900+
)
901+
881902
return res is not None
882903

883904
async def get_dedupe_index_saved(self, coll_id: UUID) -> Optional[datetime]:

backend/btrixcloud/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,7 @@ class OrgOut(BaseMongoModel):
24342434
bytesStoredProfiles: int
24352435
bytesStoredSeedFiles: int = 0
24362436
bytesStoredThumbnails: int = 0
2437+
bytesStoredDedupeIndexes: int = 0
24372438
origin: Optional[AnyHttpUrl] = None
24382439

24392440
storageQuotaReached: Optional[bool] = False
@@ -2501,6 +2502,7 @@ class Organization(BaseMongoModel):
25012502
bytesStoredProfiles: int = 0
25022503
bytesStoredSeedFiles: int = 0
25032504
bytesStoredThumbnails: int = 0
2505+
bytesStoredDedupeIndexes: int = 0
25042506

25052507
# total usage + exec time
25062508
usage: Dict[str, int] = {}
@@ -2652,6 +2654,7 @@ class OrgMetrics(BaseModel):
26522654
storageUsedProfiles: int
26532655
storageUsedSeedFiles: int
26542656
storageUsedThumbnails: int
2657+
storageUsedDedupeIndexes: int
26552658
storageQuotaBytes: int
26562659
archivedItemCount: int
26572660
crawlCount: int

backend/btrixcloud/operator/collindexes.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ async def sync_index(self, data: MCSyncData):
147147
status = CollIndexStatus(**data.parent.get("status", {}))
148148

149149
coll_id = spec.id
150+
oid = spec.oid
150151
redis_name = f"redis-coll-{coll_id}"
151152
new_children = []
152153

@@ -159,7 +160,7 @@ async def sync_index(self, data: MCSyncData):
159160
if data.finalizing:
160161
is_done = False
161162
if status.state == "saved" and status.finishedAt:
162-
await self.set_state("idle", status, coll_id)
163+
await self.set_state("idle", status, coll_id, oid)
163164
is_done = True
164165
elif status.state == "idle" and status.index.notFound:
165166
is_done = True
@@ -168,7 +169,7 @@ async def sync_index(self, data: MCSyncData):
168169
is_done = True
169170
else:
170171
try:
171-
coll = await self.coll_ops.get_collection_raw(spec.id, spec.oid)
172+
coll = await self.coll_ops.get_collection_raw(coll_id, oid)
172173
# if index state is not set, index has been deleted
173174
# also delete immediately
174175
if not coll.get("indexState"):
@@ -184,7 +185,7 @@ async def sync_index(self, data: MCSyncData):
184185
is_done = True
185186

186187
if is_done:
187-
print(f"CollIndex removed: {spec.id}")
188+
print(f"CollIndex removed: {coll_id}")
188189
return {
189190
"status": status.dict(),
190191
"children": [],
@@ -195,19 +196,19 @@ async def sync_index(self, data: MCSyncData):
195196
# determine if index was previously saved before initing redis
196197
if not redis_pod:
197198
if not status.indexLastSavedAt:
198-
res = await self.coll_ops.get_dedupe_index_saved(spec.id)
199+
res = await self.coll_ops.get_dedupe_index_saved(coll_id)
199200
if res:
200201
status.indexLastSavedAt = date_to_str(res)
201202

202203
if self.is_expired(status) or data.finalizing:
203204
# do actual deletion here
204205
if not data.finalizing:
205-
self.run_task(self.do_delete(spec.id))
206+
self.run_task(self.do_delete(coll_id))
206207

207208
# Saving process
208209
# 1. run bgsave while redis is active
209210
if status.index.running:
210-
await self.do_save_redis(spec.id, status)
211+
await self.do_save_redis(coll_id, oid, status)
211212

212213
elif status.index.finished and not status.index.savedAt:
213214
await self.k8s.send_signal_to_pod(redis_name, "SIGUSR1", "save")
@@ -217,7 +218,7 @@ async def sync_index(self, data: MCSyncData):
217218
await self.mark_index_saved(redis_name, spec, status)
218219

219220
else:
220-
await self.update_state(data, spec.id, status)
221+
await self.update_state(data, coll_id, oid, status)
221222

222223
# pylint: disable=broad-exception-caught
223224
except Exception as e:
@@ -289,7 +290,9 @@ def sync_redis_pod_status(self, pod, status: CollIndexStatus):
289290
except:
290291
pass
291292

292-
async def update_state(self, data, coll_id: UUID, status: CollIndexStatus):
293+
async def update_state(
294+
self, data, coll_id: UUID, oid: UUID, status: CollIndexStatus
295+
):
293296
"""update state"""
294297
desired_state = status.state
295298
if not status.index.loaded:
@@ -317,7 +320,7 @@ async def update_state(self, data, coll_id: UUID, status: CollIndexStatus):
317320
desired_state = "initing"
318321

319322
if desired_state != status.state:
320-
await self.set_state(desired_state, status, coll_id)
323+
await self.set_state(desired_state, status, coll_id, oid)
321324

322325
def is_expired(self, status: CollIndexStatus):
323326
"""return true if collindex is considered expired and should be deleted"""
@@ -343,21 +346,27 @@ def is_last_active_exceeds(
343346
return False
344347

345348
async def set_state(
346-
self, state: TYPE_DEDUPE_INDEX_STATES, status: CollIndexStatus, coll_id: UUID
349+
self,
350+
state: TYPE_DEDUPE_INDEX_STATES,
351+
status: CollIndexStatus,
352+
coll_id: UUID,
353+
oid: UUID,
347354
):
348355
"""set state after updating db"""
349356
print(f"Setting coll index state {status.state} -> {state} {coll_id}")
350357
status.state = state
351358
status.lastStateChangeAt = date_to_str(dt_now())
352359

353-
await self.coll_ops.update_dedupe_index_info(coll_id, state, if_exists=True)
360+
await self.coll_ops.update_dedupe_index_info(
361+
coll_id, oid, state, if_exists=True
362+
)
354363

355364
async def do_delete(self, coll_id: UUID):
356365
"""delete the CollIndex object"""
357366
print(f"Deleting collindex {coll_id}")
358367
await self.k8s.delete_custom_object(f"collindex-{coll_id}", "collindexes")
359368

360-
async def do_save_redis(self, coll_id: UUID, status: CollIndexStatus):
369+
async def do_save_redis(self, coll_id: UUID, oid: UUID, status: CollIndexStatus):
361370
"""shutdown save redis"""
362371
try:
363372
redis = await self.k8s.get_redis_connected(f"coll-{coll_id}")
@@ -367,14 +376,14 @@ async def do_save_redis(self, coll_id: UUID, status: CollIndexStatus):
367376
if status.state not in ("saving", "saved"):
368377
await redis.bgsave(False)
369378

370-
await self.set_state("saving", status, coll_id)
379+
await self.set_state("saving", status, coll_id, oid)
371380

372381
if await self.is_bgsave_done(redis):
373382
await redis.shutdown()
374383

375384
# pylint: disable=broad-exception-caught
376385
except Exception:
377-
await self.set_state("ready", status, coll_id)
386+
await self.set_state("ready", status, coll_id, oid)
378387
traceback.print_exc()
379388

380389
async def is_bgsave_done(self, redis: Redis) -> bool:
@@ -576,5 +585,5 @@ async def update_saved_dedupe_index_state_in_db(
576585
)
577586

578587
await self.coll_ops.update_dedupe_index_info(
579-
coll_id, "idle", index_file, finished_at
588+
coll_id, oid, "idle", index_file, finished_at
580589
)

backend/btrixcloud/orgs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,7 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]:
11291129
"storageUsedProfiles": org.bytesStoredProfiles,
11301130
"storageUsedSeedFiles": org.bytesStoredSeedFiles or 0,
11311131
"storageUsedThumbnails": org.bytesStoredThumbnails or 0,
1132+
"storageUsedDedupeIndexes": org.bytesStoredDedupeIndexes or 0,
11321133
"storageQuotaBytes": storage_quota,
11331134
"archivedItemCount": archived_item_count,
11341135
"crawlCount": crawl_count,

backend/test/test_org.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,16 +520,18 @@ def test_org_metrics(crawler_auth_headers, default_org_id):
520520
assert data["storageUsedBytes"] > 0
521521
assert data["storageUsedCrawls"] > 0
522522
assert data["storageUsedUploads"] >= 0
523-
assert data["storageUsedThumbnails"] >= 0
524-
assert data["storageUsedThumbnails"] >= 0
525523
assert data["storageUsedProfiles"] >= 0
524+
assert data["storageUsedSeedFiles"] >= 0
525+
assert data["storageUsedThumbnails"] >= 0
526+
assert data["storageUsedDedupeIndexes"] >= 0
526527
assert (
527528
data["storageUsedBytes"]
528529
== data["storageUsedCrawls"]
529530
+ data["storageUsedUploads"]
530531
+ data["storageUsedProfiles"]
531532
+ data["storageUsedSeedFiles"]
532533
+ data["storageUsedThumbnails"]
534+
+ data["storageUsedDedupeIndexes"]
533535
)
534536
assert data["storageQuotaBytes"] >= 0
535537
assert data["archivedItemCount"] > 0

frontend/src/features/meters/storage/storage-meter.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ export class StorageMeter extends BtrixElement {
3131
const hasQuota = Boolean(metrics.storageQuotaBytes);
3232
const isStorageFull =
3333
hasQuota && metrics.storageUsedBytes >= metrics.storageQuotaBytes;
34-
const misc = metrics.storageUsedSeedFiles + metrics.storageUsedThumbnails;
34+
const misc =
35+
metrics.storageUsedSeedFiles +
36+
metrics.storageUsedThumbnails +
37+
metrics.storageUsedDedupeIndexes;
3538

3639
const values = {
3740
crawls: metrics.storageUsedCrawls,

frontend/src/pages/org/dashboard.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ export class Dashboard extends BtrixElement {
268268
url: "/browser-profiles",
269269
},
270270
})}
271-
${metrics.storageUsedSeedFiles || metrics.storageUsedThumbnails
271+
${metrics.storageUsedSeedFiles ||
272+
metrics.storageUsedThumbnails ||
273+
metrics.storageUsedDedupeIndexes
272274
? this.renderMiscStorage(metrics)
273275
: nothing}
274276
@@ -538,7 +540,7 @@ export class Dashboard extends BtrixElement {
538540
${msg("Miscellaneous")}
539541
<btrix-popover
540542
content=${msg(
541-
"Total size of all supplementary files in use by your organization, such as workflow URL list files and custom collection thumbnails.",
543+
"Total size of all supplementary files in use by your organization, such as workflow URL list files, custom collection thumbnails, and deduplication indexes.",
542544
)}
543545
>
544546
<sl-icon
@@ -549,7 +551,9 @@ export class Dashboard extends BtrixElement {
549551
</dt>
550552
<dd class="font-monostyle text-xs text-neutral-500">
551553
${this.localize.bytes(
552-
metrics.storageUsedSeedFiles + metrics.storageUsedThumbnails,
554+
metrics.storageUsedSeedFiles +
555+
metrics.storageUsedThumbnails +
556+
metrics.storageUsedDedupeIndexes,
553557
)}
554558
</dd>
555559
</div>

frontend/src/types/org.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ export type Metrics = {
126126
storageUsedProfiles: number;
127127
storageUsedSeedFiles: number;
128128
storageUsedThumbnails: number;
129+
storageUsedDedupeIndexes: number;
129130
storageQuotaBytes: number;
130131
archivedItemCount: number;
131132
crawlCount: number;

0 commit comments

Comments
 (0)