Skip to content

Commit 481ab0c

Browse files
committed
Finetune STACAPIJobDatabase tests
Eliminate some fixture anti-patterns (too much abstraction and decoupling) Based on working on #794 and #798
1 parent 6fe95c2 commit 481ab0c

File tree

1 file changed

+96
-116
lines changed

1 file changed

+96
-116
lines changed

tests/extra/job_management/test_stac_job_db.py

Lines changed: 96 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,13 @@
1111
import pystac
1212
import pystac_client
1313
import pytest
14-
from requests.auth import AuthBase
1514
from shapely.geometry import Point
1615

1716
from openeo.extra.job_management import MultiBackendJobManager
1817
from openeo.extra.job_management.stac_job_db import STACAPIJobDatabase
1918
from openeo.rest._testing import DummyBackend
2019

2120

22-
@pytest.fixture
23-
def mock_auth():
24-
return MagicMock(spec=AuthBase)
25-
26-
27-
@pytest.fixture
28-
def mock_stac_api_job_database(mock_auth) -> STACAPIJobDatabase:
29-
return STACAPIJobDatabase(collection_id="test_id", stac_root_url="http://fake-stac-api.test", auth=mock_auth)
30-
31-
3221
@pytest.fixture
3322
def mock_pystac_client(dummy_stac_item):
3423
mock_client = MagicMock(spec=pystac_client.Client)
@@ -65,89 +54,24 @@ def job_db_not_exists(mock_pystac_client) -> STACAPIJobDatabase:
6554
)
6655

6756

68-
@pytest.fixture
69-
def dummy_dataframe() -> pd.DataFrame:
70-
return pd.DataFrame({"no": [1], "geometry": [2], "here": [3]})
71-
72-
73-
@pytest.fixture
74-
def normalized_dummy_dataframe() -> pd.DataFrame:
75-
return pd.DataFrame(
76-
{
77-
"item_id": [0],
78-
"no": [1],
79-
"geometry": [2],
80-
"here": [3],
81-
"id": None,
82-
"backend_name": None,
83-
"status": ["not_started"],
84-
"start_time": None,
85-
"running_start_time": None,
86-
"cpu": None,
87-
"memory": None,
88-
"duration": None,
89-
"costs": None,
90-
},
91-
)
92-
93-
94-
@pytest.fixture
95-
def another_dummy_dataframe() -> pd.DataFrame:
96-
return pd.DataFrame({"item_id": [1], "no": [4], "geometry": [5], "here": [6]})
97-
98-
99-
@pytest.fixture
100-
def normalized_merged_dummy_dataframe() -> pd.DataFrame:
101-
return pd.DataFrame(
102-
{
103-
"item_id": [0, 1],
104-
"no": [1, 4],
105-
"geometry": [2, 5],
106-
"here": [3, 6],
107-
"id": None,
108-
"backend_name": None,
109-
"status": ["not_started", "not_started"],
110-
"start_time": None,
111-
"running_start_time": None,
112-
"cpu": None,
113-
"memory": None,
114-
"duration": None,
115-
"costs": None,
116-
}
117-
)
118-
119-
120-
@pytest.fixture
121-
def dummy_geodataframe() -> gpd.GeoDataFrame:
122-
return gpd.GeoDataFrame(
123-
{
124-
"there": [1],
125-
"is": [2],
126-
"geometry": [Point(1, 1)],
127-
},
128-
geometry="geometry",
129-
)
130-
131-
132-
@pytest.fixture
133-
def normalized_dummy_geodataframe() -> pd.DataFrame:
134-
return pd.DataFrame(
135-
{
136-
"item_id": [0],
137-
"there": [1],
138-
"is": [2],
139-
"geometry": [{"type": "Point", "coordinates": (1.0, 1.0)}],
140-
"id": None,
141-
"backend_name": None,
142-
"status": ["not_started"],
143-
"start_time": None,
144-
"running_start_time": None,
145-
"cpu": None,
146-
"memory": None,
147-
"duration": None,
148-
"costs": None,
149-
}
150-
)
57+
def _common_normalized_df_data(rows: int = 1) -> dict:
58+
"""
59+
Helper to build a dict (to be passed to `pd.DataFrame`)
60+
with common columns that are the result of normalization.
61+
In the context of these tests however, they are
62+
mainly irrelevant boilerplate data that is tedious to repeat.
63+
"""
64+
return {
65+
"id": None,
66+
"backend_name": None,
67+
"status": ["not_started"] * rows,
68+
"start_time": None,
69+
"running_start_time": None,
70+
"cpu": None,
71+
"memory": None,
72+
"duration": None,
73+
"costs": None,
74+
}
15175

15276

15377
def _pystac_item(
@@ -189,7 +113,6 @@ def dummy_series_no_item_id() -> pd.Series:
189113
return pd.Series({"datetime": "2020-05-22T00:00:00Z", "some_property": "value"}, name="test")
190114

191115

192-
193116
@pytest.fixture
194117
def bulk_dataframe():
195118
return pd.DataFrame(
@@ -209,19 +132,33 @@ def test_exists(self, job_db_exists, job_db_not_exists):
209132
assert job_db_not_exists.exists() == False
210133

211134
@patch("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist", return_value=None)
212-
def test_initialize_from_df_non_existing(
213-
self, mock_persist, job_db_not_exists, dummy_dataframe, normalized_dummy_dataframe
214-
):
215-
216-
job_db_not_exists.initialize_from_df(dummy_dataframe)
135+
def test_initialize_from_df_non_existing(self, mock_persist, job_db_not_exists):
136+
df = pd.DataFrame(
137+
{
138+
"no": [1],
139+
"geometry": [2],
140+
"here": [3],
141+
}
142+
)
143+
job_db_not_exists.initialize_from_df(df)
217144

218145
mock_persist.assert_called_once()
219-
pdt.assert_frame_equal(mock_persist.call_args[0][0], normalized_dummy_dataframe)
146+
expected = pd.DataFrame(
147+
{
148+
"item_id": [0],
149+
"no": [1],
150+
"geometry": [2],
151+
"here": [3],
152+
**_common_normalized_df_data(),
153+
},
154+
)
155+
pdt.assert_frame_equal(mock_persist.call_args[0][0], expected)
220156
assert job_db_not_exists.has_geometry == False
221157

222-
def test_initialize_from_df_existing_error(self, job_db_exists, dummy_dataframe):
158+
def test_initialize_from_df_existing_error(self, job_db_exists):
159+
df = pd.DataFrame({"hello": ["world"]})
223160
with pytest.raises(FileExistsError):
224-
job_db_exists.initialize_from_df(dummy_dataframe)
161+
job_db_exists.initialize_from_df(df)
225162

226163
@patch("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist", return_value=None)
227164
@patch("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.get_by_status")
@@ -230,25 +167,63 @@ def test_initialize_from_df_existing_append(
230167
mock_get_by_status,
231168
mock_persist,
232169
job_db_exists,
233-
normalized_dummy_dataframe,
234-
another_dummy_dataframe,
235-
normalized_merged_dummy_dataframe,
236170
):
237-
mock_get_by_status.return_value = normalized_dummy_dataframe
238-
job_db_exists.initialize_from_df(another_dummy_dataframe, on_exists="append")
171+
mock_get_by_status.return_value = pd.DataFrame(
172+
{
173+
"item_id": [0],
174+
"no": [1],
175+
"geometry": [2],
176+
"here": [3],
177+
**_common_normalized_df_data(),
178+
},
179+
)
180+
181+
df = pd.DataFrame(
182+
{
183+
"item_id": [1],
184+
"no": [4],
185+
"geometry": [5],
186+
"here": [6],
187+
}
188+
)
189+
job_db_exists.initialize_from_df(df, on_exists="append")
239190

240191
mock_persist.assert_called_once()
241-
pdt.assert_frame_equal(mock_persist.call_args[0][0], normalized_merged_dummy_dataframe)
192+
expected_df = pd.DataFrame(
193+
{
194+
"item_id": [0, 1],
195+
"no": [1, 4],
196+
"geometry": [2, 5],
197+
"here": [3, 6],
198+
**_common_normalized_df_data(rows=2),
199+
}
200+
)
201+
pdt.assert_frame_equal(mock_persist.call_args[0][0], expected_df)
242202
assert job_db_exists.has_geometry == False
243203

244204
@patch("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist", return_value=None)
245-
def test_initialize_from_df_with_geometry(
246-
self, mock_persists, job_db_not_exists, dummy_geodataframe, normalized_dummy_geodataframe
247-
):
248-
job_db_not_exists.initialize_from_df(dummy_geodataframe)
205+
def test_initialize_from_df_with_geometry(self, mock_persists, job_db_not_exists):
206+
df = gpd.GeoDataFrame(
207+
{
208+
"there": [1],
209+
"is": [2],
210+
"geometry": [Point(1, 1)],
211+
},
212+
geometry="geometry",
213+
)
214+
job_db_not_exists.initialize_from_df(df)
249215

250216
mock_persists.assert_called_once()
251-
pdt.assert_frame_equal(mock_persists.call_args[0][0], normalized_dummy_geodataframe)
217+
expected = pd.DataFrame(
218+
{
219+
"item_id": [0],
220+
"there": [1],
221+
"is": [2],
222+
"geometry": [{"type": "Point", "coordinates": (1.0, 1.0)}],
223+
**_common_normalized_df_data(),
224+
}
225+
)
226+
pdt.assert_frame_equal(mock_persists.call_args[0][0], expected)
252227
assert job_db_not_exists.has_geometry == True
253228
assert job_db_not_exists.geometry_column == "geometry"
254229

@@ -331,8 +306,13 @@ def test_item_from_geometry(self, job_db_exists, series, expected):
331306
assert item.to_dict() == expected.to_dict()
332307

333308
@patch("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.get_by_status")
334-
def test_count_by_status(self, mock_get_by_status, normalized_dummy_dataframe, job_db_exists):
335-
mock_get_by_status.return_value = normalized_dummy_dataframe
309+
def test_count_by_status(self, mock_get_by_status, job_db_exists):
310+
mock_get_by_status.return_value = pd.DataFrame(
311+
{
312+
"item_id": [0],
313+
"status": ["not_started"],
314+
},
315+
)
336316
assert job_db_exists.count_by_status() == {"not_started": 1}
337317

338318
def test_get_by_status_no_filter(self, job_db_exists):

0 commit comments

Comments
 (0)