Skip to content

Commit 33a4d8f

Browse files
kevinemooreKevin Moore
andauthored
Remove doc limit for stats query (#1597)
## Remove doc limit for stats query Two minor changes to the search lambda: 1. removing the document-per-shard limit only for the stats query, which can take advantage of cached summary statistics insdie elastic. 2. allow the MAX_DOCS_PER_SHARD to be set in the environment. Small test fix: Grabbing environment variables at initialization doesn't work (at least in pytest). Moving it into the body of the lambda handler. Co-authored-by: Kevin Moore <kevin@quiltdata.io>
1 parent dcdcdd4 commit 33a4d8f

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

lambdas/search/index.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111
from t4_lambda_shared.decorator import api
1212
from t4_lambda_shared.utils import get_default_origins, make_json_response
1313

14-
INDEX_OVERRIDES = os.getenv('INDEX_OVERRIDES', '')
1514
MAX_QUERY_DURATION = '15s'
16-
MAX_DOCUMENTS_PER_SHARD = 10000
1715
NUM_PREVIEW_IMAGES = 100
1816
NUM_PREVIEW_FILES = 100
1917
IMG_EXTS = [
@@ -46,8 +44,10 @@ def lambda_handler(request):
4644
"""
4745
Proxy the request to the elastic search.
4846
"""
47+
4948
action = request.args.get('action')
5049
indexes = request.args.get('index')
50+
terminate_after = os.getenv('MAX_DOCUMENTS_PER_SHARD')
5151

5252
if action == 'search':
5353
query = request.args.get('query', '')
@@ -75,6 +75,8 @@ def lambda_handler(request):
7575
}
7676
size = 0
7777
_source = []
78+
# Consider all documents when computing counts, etc.
79+
terminate_after = None
7880
elif action == 'images':
7981
body = {
8082
'query': {'terms': {'ext': IMG_EXTS}},
@@ -118,7 +120,8 @@ def lambda_handler(request):
118120

119121
es_host = os.environ['ES_HOST']
120122
region = os.environ['AWS_REGION']
121-
123+
index_overrides = os.getenv('INDEX_OVERRIDES', '')
124+
122125
auth = BotoAWSRequestsAuth(
123126
aws_host=es_host,
124127
aws_region=region,
@@ -133,13 +136,13 @@ def lambda_handler(request):
133136
connection_class=RequestsHttpConnection
134137
)
135138

136-
to_search = f"{indexes},{INDEX_OVERRIDES}" if INDEX_OVERRIDES else indexes
139+
to_search = f"{indexes},{index_overrides}" if index_overrides else indexes
137140
result = es_client.search(
138141
to_search,
139142
body,
140143
_source=_source,
141144
size=size,
142-
terminate_after=MAX_DOCUMENTS_PER_SHARD,
145+
terminate_after=terminate_after,
143146
timeout=MAX_QUERY_DURATION
144147
)
145148

lambdas/search/tests/test_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def setUp(self):
1919
'AWS_ACCESS_KEY_ID': 'test_key',
2020
'AWS_SECRET_ACCESS_KEY': 'test_secret',
2121
'AWS_REGION': 'ng-north-1',
22-
'ES_HOST': 'www.example.com'
22+
'ES_HOST': 'www.example.com',
23+
'MAX_DOCUMENTS_PER_SHARD': '10000',
2324
})
2425
self.env_patcher.start()
2526

@@ -86,7 +87,6 @@ def _callback(request):
8687
def test_stats(self):
8788
url = 'https://www.example.com:443/bucket/_search?' + urlencode(dict(
8889
timeout='15s',
89-
terminate_after=10000,
9090
size=0,
9191
_source = '',
9292
))

0 commit comments

Comments
 (0)