1111import pystac
1212import pystac_client
1313import pytest
14- from requests .auth import AuthBase
1514from shapely .geometry import Point
1615
1716from openeo .extra .job_management import MultiBackendJobManager
1817from openeo .extra .job_management .stac_job_db import STACAPIJobDatabase
1918from openeo .rest ._testing import DummyBackend
2019
2120
22- @pytest .fixture
23- def mock_auth ():
24- return MagicMock (spec = AuthBase )
25-
26-
27- @pytest .fixture
28- def mock_stac_api_job_database (mock_auth ) -> STACAPIJobDatabase :
29- return STACAPIJobDatabase (collection_id = "test_id" , stac_root_url = "http://fake-stac-api.test" , auth = mock_auth )
30-
31-
3221@pytest .fixture
3322def mock_pystac_client (dummy_stac_item ):
3423 mock_client = MagicMock (spec = pystac_client .Client )
@@ -65,89 +54,24 @@ def job_db_not_exists(mock_pystac_client) -> STACAPIJobDatabase:
6554 )
6655
6756
68- @pytest .fixture
69- def dummy_dataframe () -> pd .DataFrame :
70- return pd .DataFrame ({"no" : [1 ], "geometry" : [2 ], "here" : [3 ]})
71-
72-
73- @pytest .fixture
74- def normalized_dummy_dataframe () -> pd .DataFrame :
75- return pd .DataFrame (
76- {
77- "item_id" : [0 ],
78- "no" : [1 ],
79- "geometry" : [2 ],
80- "here" : [3 ],
81- "id" : None ,
82- "backend_name" : None ,
83- "status" : ["not_started" ],
84- "start_time" : None ,
85- "running_start_time" : None ,
86- "cpu" : None ,
87- "memory" : None ,
88- "duration" : None ,
89- "costs" : None ,
90- },
91- )
92-
93-
94- @pytest .fixture
95- def another_dummy_dataframe () -> pd .DataFrame :
96- return pd .DataFrame ({"item_id" : [1 ], "no" : [4 ], "geometry" : [5 ], "here" : [6 ]})
97-
98-
99- @pytest .fixture
100- def normalized_merged_dummy_dataframe () -> pd .DataFrame :
101- return pd .DataFrame (
102- {
103- "item_id" : [0 , 1 ],
104- "no" : [1 , 4 ],
105- "geometry" : [2 , 5 ],
106- "here" : [3 , 6 ],
107- "id" : None ,
108- "backend_name" : None ,
109- "status" : ["not_started" , "not_started" ],
110- "start_time" : None ,
111- "running_start_time" : None ,
112- "cpu" : None ,
113- "memory" : None ,
114- "duration" : None ,
115- "costs" : None ,
116- }
117- )
118-
119-
120- @pytest .fixture
121- def dummy_geodataframe () -> gpd .GeoDataFrame :
122- return gpd .GeoDataFrame (
123- {
124- "there" : [1 ],
125- "is" : [2 ],
126- "geometry" : [Point (1 , 1 )],
127- },
128- geometry = "geometry" ,
129- )
130-
131-
132- @pytest .fixture
133- def normalized_dummy_geodataframe () -> pd .DataFrame :
134- return pd .DataFrame (
135- {
136- "item_id" : [0 ],
137- "there" : [1 ],
138- "is" : [2 ],
139- "geometry" : [{"type" : "Point" , "coordinates" : (1.0 , 1.0 )}],
140- "id" : None ,
141- "backend_name" : None ,
142- "status" : ["not_started" ],
143- "start_time" : None ,
144- "running_start_time" : None ,
145- "cpu" : None ,
146- "memory" : None ,
147- "duration" : None ,
148- "costs" : None ,
149- }
150- )
57+ def _common_normalized_df_data (rows : int = 1 ) -> dict :
58+ """
59+ Helper to build a dict (to be passed to `pd.DataFrame`)
60+ with common columns that are the result of normalization.
61+ In the context of these tests however, they are
62+ mainly irrelevant boilerplate data that is tedious to repeat.
63+ """
64+ return {
65+ "id" : None ,
66+ "backend_name" : None ,
67+ "status" : ["not_started" ] * rows ,
68+ "start_time" : None ,
69+ "running_start_time" : None ,
70+ "cpu" : None ,
71+ "memory" : None ,
72+ "duration" : None ,
73+ "costs" : None ,
74+ }
15175
15276
15377def _pystac_item (
@@ -189,7 +113,6 @@ def dummy_series_no_item_id() -> pd.Series:
189113 return pd .Series ({"datetime" : "2020-05-22T00:00:00Z" , "some_property" : "value" }, name = "test" )
190114
191115
192-
193116@pytest .fixture
194117def bulk_dataframe ():
195118 return pd .DataFrame (
@@ -209,19 +132,33 @@ def test_exists(self, job_db_exists, job_db_not_exists):
209132 assert job_db_not_exists .exists () == False
210133
211134 @patch ("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist" , return_value = None )
212- def test_initialize_from_df_non_existing (
213- self , mock_persist , job_db_not_exists , dummy_dataframe , normalized_dummy_dataframe
214- ):
215-
216- job_db_not_exists .initialize_from_df (dummy_dataframe )
135+ def test_initialize_from_df_non_existing (self , mock_persist , job_db_not_exists ):
136+ df = pd .DataFrame (
137+ {
138+ "no" : [1 ],
139+ "geometry" : [2 ],
140+ "here" : [3 ],
141+ }
142+ )
143+ job_db_not_exists .initialize_from_df (df )
217144
218145 mock_persist .assert_called_once ()
219- pdt .assert_frame_equal (mock_persist .call_args [0 ][0 ], normalized_dummy_dataframe )
146+ expected = pd .DataFrame (
147+ {
148+ "item_id" : [0 ],
149+ "no" : [1 ],
150+ "geometry" : [2 ],
151+ "here" : [3 ],
152+ ** _common_normalized_df_data (),
153+ },
154+ )
155+ pdt .assert_frame_equal (mock_persist .call_args [0 ][0 ], expected )
220156 assert job_db_not_exists .has_geometry == False
221157
222- def test_initialize_from_df_existing_error (self , job_db_exists , dummy_dataframe ):
158+ def test_initialize_from_df_existing_error (self , job_db_exists ):
159+ df = pd .DataFrame ({"hello" : ["world" ]})
223160 with pytest .raises (FileExistsError ):
224- job_db_exists .initialize_from_df (dummy_dataframe )
161+ job_db_exists .initialize_from_df (df )
225162
226163 @patch ("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist" , return_value = None )
227164 @patch ("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.get_by_status" )
@@ -230,25 +167,63 @@ def test_initialize_from_df_existing_append(
230167 mock_get_by_status ,
231168 mock_persist ,
232169 job_db_exists ,
233- normalized_dummy_dataframe ,
234- another_dummy_dataframe ,
235- normalized_merged_dummy_dataframe ,
236170 ):
237- mock_get_by_status .return_value = normalized_dummy_dataframe
238- job_db_exists .initialize_from_df (another_dummy_dataframe , on_exists = "append" )
171+ mock_get_by_status .return_value = pd .DataFrame (
172+ {
173+ "item_id" : [0 ],
174+ "no" : [1 ],
175+ "geometry" : [2 ],
176+ "here" : [3 ],
177+ ** _common_normalized_df_data (),
178+ },
179+ )
180+
181+ df = pd .DataFrame (
182+ {
183+ "item_id" : [1 ],
184+ "no" : [4 ],
185+ "geometry" : [5 ],
186+ "here" : [6 ],
187+ }
188+ )
189+ job_db_exists .initialize_from_df (df , on_exists = "append" )
239190
240191 mock_persist .assert_called_once ()
241- pdt .assert_frame_equal (mock_persist .call_args [0 ][0 ], normalized_merged_dummy_dataframe )
192+ expected_df = pd .DataFrame (
193+ {
194+ "item_id" : [0 , 1 ],
195+ "no" : [1 , 4 ],
196+ "geometry" : [2 , 5 ],
197+ "here" : [3 , 6 ],
198+ ** _common_normalized_df_data (rows = 2 ),
199+ }
200+ )
201+ pdt .assert_frame_equal (mock_persist .call_args [0 ][0 ], expected_df )
242202 assert job_db_exists .has_geometry == False
243203
244204 @patch ("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.persist" , return_value = None )
245- def test_initialize_from_df_with_geometry (
246- self , mock_persists , job_db_not_exists , dummy_geodataframe , normalized_dummy_geodataframe
247- ):
248- job_db_not_exists .initialize_from_df (dummy_geodataframe )
205+ def test_initialize_from_df_with_geometry (self , mock_persists , job_db_not_exists ):
206+ df = gpd .GeoDataFrame (
207+ {
208+ "there" : [1 ],
209+ "is" : [2 ],
210+ "geometry" : [Point (1 , 1 )],
211+ },
212+ geometry = "geometry" ,
213+ )
214+ job_db_not_exists .initialize_from_df (df )
249215
250216 mock_persists .assert_called_once ()
251- pdt .assert_frame_equal (mock_persists .call_args [0 ][0 ], normalized_dummy_geodataframe )
217+ expected = pd .DataFrame (
218+ {
219+ "item_id" : [0 ],
220+ "there" : [1 ],
221+ "is" : [2 ],
222+ "geometry" : [{"type" : "Point" , "coordinates" : (1.0 , 1.0 )}],
223+ ** _common_normalized_df_data (),
224+ }
225+ )
226+ pdt .assert_frame_equal (mock_persists .call_args [0 ][0 ], expected )
252227 assert job_db_not_exists .has_geometry == True
253228 assert job_db_not_exists .geometry_column == "geometry"
254229
@@ -331,8 +306,13 @@ def test_item_from_geometry(self, job_db_exists, series, expected):
331306 assert item .to_dict () == expected .to_dict ()
332307
333308 @patch ("openeo.extra.job_management.stac_job_db.STACAPIJobDatabase.get_by_status" )
334- def test_count_by_status (self , mock_get_by_status , normalized_dummy_dataframe , job_db_exists ):
335- mock_get_by_status .return_value = normalized_dummy_dataframe
309+ def test_count_by_status (self , mock_get_by_status , job_db_exists ):
310+ mock_get_by_status .return_value = pd .DataFrame (
311+ {
312+ "item_id" : [0 ],
313+ "status" : ["not_started" ],
314+ },
315+ )
336316 assert job_db_exists .count_by_status () == {"not_started" : 1 }
337317
338318 def test_get_by_status_no_filter (self , job_db_exists ):
0 commit comments