Skip to content

Commit a74911d

Browse files
authored
Remove IndexMap document list (distributed-system-analysis#3606)
* Remove IndexMap document list PBENCH-1315 The production server, with "only" 108,728 indexed datasets (many more still haven't been migrated from the passthrough server), currently claims 84.1Gb of PostgreSQL storage just for the `IndexMap` table. Most of this consists of a list of each Opensearch document ID in order to allow using bulk update and delete operations to manage the index. This is straining the capacity of our RDU2 PostgreSQL server. As an alternative, this PR removes the document list and instead of the bulk update and delete operations uses `_delete_by_query` and `_update_by_query` searching for documents in the appropriate indices (which we still store in the `IndexMap`) by parent dataset resource ID. Along the way, I noticed that (oops) we were missing the `"authorization"` subdocument in some of our Elasticsearch documents, which would impact the authenticated search API behaviors. And I acted on a deprecation warning for a camelCase template keyword by replacing it with a snake_case alternative.
1 parent 369736b commit a74911d

File tree

22 files changed

+603
-1681
lines changed

22 files changed

+603
-1681
lines changed

lib/pbench/cli/server/report.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from collections import defaultdict
22
import datetime
3-
import json
43
import re
54
from threading import Thread
65
import time
@@ -323,13 +322,9 @@ def report_sql():
323322
indices = set()
324323
root_size = 0
325324
index_size = 0
326-
document_count = 0
327-
document_size = 0
328-
json_size = 0
329-
document_int_size = 0
330325

331-
query = select(IndexMap.root, IndexMap.index, IndexMap.documents)
332-
for root, index, documents in Database.db_session.execute(
326+
query = select(IndexMap.root, IndexMap.index)
327+
for root, index in Database.db_session.execute(
333328
query, execution_options={"stream_results": True}
334329
).yield_per(500):
335330
record_count += 1
@@ -338,12 +333,6 @@ def report_sql():
338333
indices.add(index)
339334
root_size += len(root)
340335
index_size += len(index)
341-
json_size += len(json.dumps(documents))
342-
for d in documents:
343-
document_count += 1
344-
document_size += len(d)
345-
i = int(d, base=16)
346-
document_int_size += (i.bit_length() + 7) / 8
347336
unique_root_size = sum(len(r) for r in roots)
348337
unique_index_size = sum(len(i) for i in indices)
349338

@@ -359,11 +348,6 @@ def report_sql():
359348
f" deduped: {humanize.naturalsize(unique_index_size)} for index "
360349
f"names, {humanize.naturalsize(unique_root_size)} for root names"
361350
)
362-
detailer.message(
363-
f" {humanize.naturalsize(document_size)} for {document_count:,d} document IDs, "
364-
f"{humanize.naturalsize(json_size)} as JSON, "
365-
f"{humanize.naturalsize(document_int_size)} as (Python) ints"
366-
)
367351

368352

369353
def report_states():

lib/pbench/server/api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
2424
from pbench.server.api.resources.datasets_visualize import DatasetsVisualize
2525
from pbench.server.api.resources.endpoint_configure import EndpointConfig
26-
from pbench.server.api.resources.query_apis.dataset import Datasets
26+
from pbench.server.api.resources.query_apis.datasets.datasets import Datasets
2727
from pbench.server.api.resources.query_apis.datasets.datasets_detail import (
2828
DatasetsDetail,
2929
)

lib/pbench/server/api/resources/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ class ApiAttributes:
10301030
"""
10311031

10321032
action: str
1033-
operation_name: OperationName
1033+
operation_name: Optional[OperationName]
10341034
require_stable: bool
10351035
require_map: bool
10361036

0 commit comments

Comments
 (0)