Skip to content

Commit 1d14673

Browse files
authored
Fix OpenSearch bucket aggregations (#272)
* Fix OpenSearch bucket aggregations In order to build filter lists, we collect the values assigned to each key using an OpenSearch bucket aggregation. Unfortunately, by default, a bucket aggregation returns only the first 10 buckets; in at least one specific case where someone noticed (the releaseStream keyword) we've exceeded 10 values and expected terms aren't being reported. This uses the `field`'s `size` keyword to return up to 1000 buckets instead. This is likely overkill for many cases, but should be "safe" for most cases. I considered adding code into `search.py` to analyzed returned bucket aggregations for a non-zero `sum_other_doc_count` (which means we could have returned additional terms), and report any case where this happens for log stream analysis. This doesn't solve any additional problems, however, adds more code, and is a little tricky given the logic to merge aggregations against two OpenSearch instances. If some reviewer wants to insist, or if I start feeling too guilty before I get an approval, I'll go ahead and do it. Closes #271
1 parent 471fea8 commit 1d14673

File tree

5 files changed

+10
-7
lines changed

5 files changed

+10
-7
lines changed

backend/app/api/v1/commons/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"formal",
3030
"ciSystem",
3131
)
32+
AGG_BUCKET_SIZE = 1000
3233
MAX_PAGE = 10000
3334
OCP_SHORT_VER_LEN = 6
3435

backend/app/api/v1/commons/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def normalize_pagination(offset: Optional[int], size: Optional[int]) -> tuple[in
124124
def buildAggregateQuery(constant_dict):
125125
aggregate = {}
126126
for x, y in constant_dict.items():
127-
obj = {x: {"terms": {"field": y}}}
127+
obj = {x: {"terms": {"field": y, "size": constants.AGG_BUCKET_SIZE}}}
128128
aggregate.update(obj)
129129
return aggregate
130130

backend/app/api/v1/endpoints/ocp/graph.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from fastapi import APIRouter
44
import pandas as pd
55

6+
from app.api.v1.commons.constants import AGG_BUCKET_SIZE
67
from app.api.v1.commons.utils import getMetadata
78
from app.services.search import ElasticService
89

@@ -381,11 +382,11 @@ async def getBurnerCPUResults(uuids: list, namespace: str, index: str):
381382
"size": 0,
382383
"aggs": {
383384
"time": {
384-
"terms": {"field": "uuid.keyword"},
385+
"terms": {"field": "uuid.keyword", "size": AGG_BUCKET_SIZE},
385386
"aggs": {"time": {"avg": {"field": "timestamp"}}},
386387
},
387388
"uuid": {
388-
"terms": {"field": "uuid.keyword"},
389+
"terms": {"field": "uuid.keyword", "size": AGG_BUCKET_SIZE},
389390
"aggs": {"cpu": {"avg": {"field": "value"}}},
390391
},
391392
},

backend/app/api/v1/endpoints/quay/quayGraphs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from fastapi import APIRouter
22

3+
from app.api.v1.commons.constants import AGG_BUCKET_SIZE
34
from app.api.v1.commons.utils import getMetadata
45
from app.services.search import ElasticService
56

@@ -157,7 +158,7 @@ async def getImageMetrics(uuids: list, index: str):
157158
"size": 0,
158159
"aggs": {
159160
"uuid": {
160-
"terms": {"field": "uuid.keyword"},
161+
"terms": {"field": "uuid.keyword", "size": AGG_BUCKET_SIZE},
161162
"aggs": {
162163
"latency": {"avg": {"field": "elapsed_time"}},
163164
"success_count": {"sum": {"field": "success_count"}},

backend/tests/unit/test_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -423,9 +423,9 @@ def test_build_aggregate_query_structure(self):
423423

424424
expected_result = {
425425
"aggregate_structure": {
426-
"platform": {"terms": {"field": "platform.keyword"}},
427-
"benchmark": {"terms": {"field": "benchmark.keyword"}},
428-
"jobStatus": {"terms": {"field": "jobStatus.keyword"}},
426+
"platform": {"terms": {"field": "platform.keyword", "size": 1000}},
427+
"benchmark": {"terms": {"field": "benchmark.keyword", "size": 1000}},
428+
"jobStatus": {"terms": {"field": "jobStatus.keyword", "size": 1000}},
429429
},
430430
"correct_structure": True,
431431
}

0 commit comments

Comments
 (0)