Skip to content

Commit c6cbaf7

Browse files
authored
Reduce memory consumption when building index by streaming response (#335)
1 parent c12ebe1 commit c6cbaf7

File tree

5 files changed

+36
-11
lines changed

5 files changed

+36
-11
lines changed

data_hub_api/docmaps/v2/api_router.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22

33
from fastapi import APIRouter, HTTPException
4+
from fastapi.responses import StreamingResponse
45

56
from data_hub_api.docmaps.v2.provider import DocmapsProvider
67

@@ -13,9 +14,12 @@ def create_docmaps_router(
1314
) -> APIRouter:
1415
router = APIRouter()
1516

16-
@router.get("/v2/index")
17-
def get_enhanced_preprints_docmaps_index():
18-
return docmaps_provider.get_docmaps_index()
17+
@router.get("/v2/index", response_class=StreamingResponse)
18+
def stream_docmaps_index() -> StreamingResponse:
19+
return StreamingResponse(
20+
docmaps_provider.iter_docmaps_index_json_stream(),
21+
media_type="application/json"
22+
)
1923

2024
@router.get("/v2/by-publisher/elife/get-by-manuscript-id")
2125
def get_enhanced_preprints_docmaps_by_manuscript_id_by_publisher_elife(manuscript_id: str):

data_hub_api/docmaps/v2/provider.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
from pathlib import Path
34
from time import monotonic
@@ -70,6 +71,13 @@ def iter_docmaps_by_manuscript_id(
7071
def get_docmaps_by_manuscript_id(self, manuscript_id: str) -> Sequence[Docmap]:
7172
return list(self.iter_docmaps_by_manuscript_id(manuscript_id))
7273

73-
def get_docmaps_index(self) -> dict:
74-
article_docmaps_list = list(self.iter_docmaps_by_manuscript_id())
75-
return {'docmaps': article_docmaps_list}
74+
def iter_docmaps_index_json_stream(self):
75+
yield '{"docmaps":['
76+
first = True
77+
for docmap in self.iter_docmaps_by_manuscript_id():
78+
if not first:
79+
yield ','
80+
else:
81+
first = False
82+
yield json.dumps(docmap)
83+
yield ']}'

tests/unit_tests/docmaps/v2/api_router_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest.mock import MagicMock
23
import pytest
34

@@ -28,7 +29,9 @@ def test_should_return_json_with_docmaps_list_from_enhanced_preprint_provider(
2829
docmaps_provider_mock: MagicMock
2930
):
3031
docmaps_index = [{'docmaps': [{'id': 'docmap_1'}, {'id': 'docmap_2'}]}]
31-
docmaps_provider_mock.get_docmaps_index.return_value = docmaps_index
32+
docmaps_provider_mock.iter_docmaps_index_json_stream.return_value = ([
33+
json.dumps(docmaps_index)
34+
])
3235
client = create_test_client(docmaps_provider_mock)
3336
response = client.get('/v2/index')
3437
assert response.json() == docmaps_index

tests/unit_tests/docmaps/v2/provider_test.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest.mock import patch, MagicMock
23
from typing import Iterable, cast
34

@@ -19,6 +20,12 @@ def _iter_dict_from_bq_query_mock() -> Iterable[MagicMock]:
1920
yield mock
2021

2122

23+
def _get_docmaps_index_dict(provider: DocmapsProvider) -> dict:
24+
return json.loads(
25+
''.join(provider.iter_docmaps_index_json_stream())
26+
)
27+
28+
2229
class TestEnhancedPreprintsDocmapsProvider:
2330
def test_should_create_index_with_non_empty_docmaps(
2431
self,
@@ -27,7 +34,7 @@ def test_should_create_index_with_non_empty_docmaps(
2734
iter_dict_from_bq_query_mock.return_value = iter([
2835
DOCMAPS_QUERY_RESULT_ITEM_1
2936
])
30-
docmaps_index = DocmapsProvider().get_docmaps_index()
37+
docmaps_index = _get_docmaps_index_dict(DocmapsProvider())
3138
assert docmaps_index['docmaps'] == [
3239
get_docmap_item_for_query_result_item(cast(ApiInput, DOCMAPS_QUERY_RESULT_ITEM_1))
3340
]
@@ -42,8 +49,8 @@ def test_should_cache_docmaps_query_results(
4249
docmaps_provider = DocmapsProvider(
4350
query_results_cache=InMemorySingleObjectCache(max_age_in_seconds=10)
4451
)
45-
docmaps_provider.get_docmaps_index()
46-
docmaps_index = docmaps_provider.get_docmaps_index()
52+
docmaps_index = _get_docmaps_index_dict(docmaps_provider)
53+
docmaps_index = _get_docmaps_index_dict(docmaps_provider)
4754
assert iter_dict_from_bq_query_mock.call_count == 1
4855
assert docmaps_index['docmaps'] == [
4956
get_docmap_item_for_query_result_item(cast(ApiInput, DOCMAPS_QUERY_RESULT_ITEM_1))

tests/unit_tests/main_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest.mock import patch, MagicMock
23
from typing import Iterable, Sequence
34

@@ -47,7 +48,9 @@ def test_should_return_json_with_docmaps_list_from_enhanced_preprint_provider(
4748
enhanced_preprints_docmaps_provider_mock: MagicMock
4849
):
4950
docmaps_index = [{'docmaps': [{'id': 'docmap_1'}, {'id': 'docmap_2'}]}]
50-
enhanced_preprints_docmaps_provider_mock.get_docmaps_index.return_value = docmaps_index
51+
enhanced_preprints_docmaps_provider_mock.iter_docmaps_index_json_stream.return_value = iter(
52+
[json.dumps(docmaps_index)]
53+
)
5154
client = TestClient(create_app())
5255
response = client.get('/enhanced-preprints/docmaps/v2/index')
5356
assert response.json() == docmaps_index

0 commit comments

Comments
 (0)