Skip to content

Commit 4d19355

Browse files
committed
add catalog to client
1 parent b9e9637 commit 4d19355

File tree

1 file changed

+207
-43
lines changed
  • python-package/cartiflette/cartiflette

1 file changed

+207
-43
lines changed

python-package/cartiflette/cartiflette/client.py

Lines changed: 207 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from datetime import date
2+
from functools import reduce, lru_cache
23
import logging
34
import os
45
import typing
6+
from warnings import warn
57

68
from requests_cache import CachedSession
79
import geopandas as gpd
@@ -14,6 +16,9 @@
1416
PATH_WITHIN_BUCKET,
1517
CATALOG,
1618
)
19+
20+
# TODO : mettre bucket et path_within_bucket en véritables constantes
21+
1722
from cartiflette.config import _config
1823
from cartiflette.utils import (
1924
create_path_bucket,
@@ -33,6 +38,8 @@ class CartifletteSession(CachedSession):
3338
def __init__(
3439
self,
3540
expire_after: int = _config["DEFAULT_EXPIRE_AFTER"],
41+
bucket: str = BUCKET,
42+
path_within_bucket: str = PATH_WITHIN_BUCKET,
3643
**kwargs,
3744
):
3845
super().__init__(
@@ -41,6 +48,9 @@ def __init__(
4148
**kwargs,
4249
)
4350

51+
self.bucket = bucket
52+
self.path_within_bucket = path_within_bucket
53+
4454
for protocol in ["http", "https"]:
4555
try:
4656
proxy = {protocol: os.environ[f"{protocol}_proxy"]}
@@ -50,11 +60,8 @@ def __init__(
5060

5161
def download_cartiflette_single(
5262
self,
53-
*args,
54-
bucket: str = BUCKET,
55-
path_within_bucket: str = PATH_WITHIN_BUCKET,
56-
provider: str = "IGN",
57-
dataset_family: str = "ADMINEXPRESS",
63+
provider: str = "Cartiflette",
64+
dataset_family: str = "production",
5865
source: str = "EXPRESS-COG-TERRITOIRE",
5966
vectorfile_format: str = "geojson",
6067
borders: str = "COMMUNE",
@@ -65,28 +72,92 @@ def download_cartiflette_single(
6572
crs: typing.Union[list, str, int, float] = 2154,
6673
simplification: typing.Union[str, int, float] = None,
6774
filename: str = "raw",
68-
**kwargs,
69-
):
75+
) -> gpd.GeoDataFrame:
76+
"""
77+
Download a single geodataset from Cartiflette
78+
79+
Parameters
80+
----------
81+
provider : str, optional
82+
Deprecated. The default is "Cartiflette".
83+
dataset_family : str, optional
84+
Deprecated. The default is "production".
85+
source : str, optional
86+
DESCRIPTION. The default is "EXPRESS-COG-TERRITOIRE".
87+
vectorfile_format : str, optional
88+
DESCRIPTION. The default is "geojson".
89+
borders : str, optional
90+
DESCRIPTION. The default is "COMMUNE".
91+
filter_by : str, optional
92+
DESCRIPTION. The default is "region".
93+
territory : str, optional
94+
DESCRIPTION. The default is "metropole".
95+
year : typing.Union[str, int, float], optional
96+
DESCRIPTION. The default is None.
97+
value : typing.Union[str, int, float], optional
98+
DESCRIPTION. The default is "28".
99+
crs : typing.Union[list, str, int, float], optional
100+
DESCRIPTION. The default is 2154.
101+
simplification : typing.Union[str, int, float], optional
102+
DESCRIPTION. The default is None.
103+
filename : str, optional
104+
DESCRIPTION. The default is "raw".
105+
: TYPE
106+
DESCRIPTION.
107+
108+
Returns
109+
-------
110+
TYPE
111+
DESCRIPTION.
112+
113+
"""
114+
115+
if provider:
116+
warn(
117+
"provider is deprecated and will be removed in a future "
118+
"version. You can safely drop this argument.",
119+
DeprecationWarning,
120+
stacklevel=2,
121+
)
122+
123+
if provider:
124+
warn(
125+
"dataset_family is deprecated and will be removed in a future "
126+
"version. You can safely drop this argument.",
127+
DeprecationWarning,
128+
stacklevel=2,
129+
)
130+
131+
if borders == "COMMUNE_ARRONDISSEMENT":
132+
warn(
133+
"'COMMUNE_ARRONDISSESMENT' is deprecated for borders and will "
134+
"be removed in a future version. Please use 'ARM' instead.",
135+
DeprecationWarning,
136+
stacklevel=2,
137+
)
138+
139+
# TODO : vérifier borders vs. administrative_level
140+
70141
if not year:
71142
year = str(date.today().year)
72143

73-
corresp_filter_by_columns, format_read, driver = standardize_inputs(
144+
_corresp_filter_by_columns, format_read, _driver = standardize_inputs(
74145
vectorfile_format
75146
)
76147

77148
url = create_path_bucket(
78149
{
79-
"bucket": bucket,
80-
"path_within_bucket": path_within_bucket,
150+
"bucket": self.bucket,
151+
"path_within_bucket": self.path_within_bucket,
81152
"vectorfile_format": format_read,
82153
"territory": territory,
83154
"borders": borders,
84155
"filter_by": filter_by,
85156
"year": year,
86157
"value": value,
87158
"crs": crs,
88-
"provider": provider,
89-
"dataset_family": dataset_family,
159+
"provider": "Cartiflette",
160+
"dataset_family": "production",
90161
"source": source,
91162
"simplification": simplification,
92163
"filename": filename,
@@ -100,15 +171,75 @@ def download_cartiflette_single(
100171
gdf = gpd.read_file(r.content)
101172
except Exception as e:
102173
logger.error(
103-
f"There was an error while reading the file from the URL: {url}"
174+
"There was an error while reading the file from the URL: %s",
175+
url,
104176
)
105-
logger.error(f"Error message: {str(e)}")
177+
logger.error("Error message: %s", str(e))
178+
return gpd.GeoDataFrame()
106179
else:
107180
return gdf
108181

109182
def get_catalog(self, **kwargs) -> pd.DataFrame:
110183
"""
111-
Retrieve and load cartiflette's current datasets' inventory (as a
184+
Retrieve and load cartiflette's current datasets' catalog (as a
185+
dataframe), filtered on any of the following columns:
186+
[
187+
'source',
188+
'year',
189+
'administrative_level',
190+
'crs',
191+
'filter_by',
192+
'value',
193+
'vectorfile_format',
194+
'territory',
195+
'simplification'
196+
]
197+
198+
Each row corresponds to an available DataFrame.
199+
200+
Parameters
201+
----------
202+
kwargs: dict
203+
pairs of column/filter values
204+
205+
Returns
206+
-------
207+
df : pd.DataFrame
208+
Filtered catalog as DataFrame
209+
210+
Example
211+
-------
212+
>>> kwargs = {"territory": "france", "source": "CONTOUR-IRIS"}
213+
>>> with CartifletteSession() as carti_session:
214+
return carti_session.get_catalog(**kwargs)
215+
216+
source year ... territory simplification
217+
0 CONTOUR-IRIS 2023 ... france 40
218+
1 CONTOUR-IRIS 2023 ... france 40
219+
2 CONTOUR-IRIS 2023 ... france 40
220+
3 CONTOUR-IRIS 2023 ... france 40
221+
4 CONTOUR-IRIS 2023 ... france 40
222+
... ... ... ... ...
223+
5745 CONTOUR-IRIS 2023 ... france 40
224+
5746 CONTOUR-IRIS 2023 ... france 40
225+
5747 CONTOUR-IRIS 2023 ... france 40
226+
5748 CONTOUR-IRIS 2023 ... france 40
227+
5749 CONTOUR-IRIS 2023 ... france 40
228+
229+
[5750 rows x 9 columns]
230+
231+
"""
232+
df = self._get_full_catalog()
233+
if kwargs:
234+
mask = reduce(
235+
lambda x, y: x & y, [df[k] == v for k, v in kwargs.items()]
236+
)
237+
df = df[mask].copy()
238+
return df
239+
240+
def _get_full_catalog(self) -> pd.DataFrame:
241+
"""
242+
Retrieve and load cartiflette's current datasets' catalog (as a
112243
dataframe).
113244
114245
Inventory columns are [
@@ -125,16 +256,6 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
125256
126257
Each row corresponds to an available DataFrame.
127258
128-
Parameters
129-
----------
130-
fs : S3FileSystem, optional
131-
S3 File System. The default is FS.
132-
bucket : str, optional
133-
Used bucket (both for inventory querying and json storage). The default
134-
is BUCKET.
135-
path_within_bucket : str, optional
136-
Path used within bucket. The default is PATH_WITHIN_BUCKET.
137-
138259
Returns
139260
-------
140261
df : pd.DataFrame
@@ -143,17 +264,15 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
143264
"""
144265

145266
url = CATALOG
146-
147-
url = f"https://minio.lab.sspcloud.fr/{url}"
148-
149267
try:
150268
r = self.get(url)
151269
d = r.json()
152270
except Exception as e:
153271
logger.error(
154-
f"There was an error while reading the file from the URL: {url}"
272+
"There was an error while reading the file from the URL: %s",
273+
url,
155274
)
156-
logger.error(f"Error message: {str(e)}")
275+
logger.error("Error message: %s", str(e))
157276
return
158277

159278
d = flatten_dict(d)
@@ -174,28 +293,26 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
174293
]
175294

176295
df = df.reset_index(drop=False)
296+
177297
return df
178298

179299
def get_dataset(
180300
self,
181301
values: typing.List[typing.Union[str, int, float]],
182-
*args,
183302
borders: str = "COMMUNE",
184303
filter_by: str = "region",
185304
territory: str = "metropole",
186305
vectorfile_format: str = "geojson",
187306
year: typing.Union[str, int, float] = None,
188307
crs: typing.Union[list, str, int, float] = 2154,
189308
simplification: typing.Union[str, int, float] = None,
190-
bucket: str = BUCKET,
191-
path_within_bucket: str = PATH_WITHIN_BUCKET,
192-
provider: str = "IGN",
193-
dataset_family: str = "ADMINEXPRESS",
309+
provider: str = "Cartiflette",
310+
dataset_family: str = "production",
194311
source: str = "EXPRESS-COG-TERRITOIRE",
195312
filename: str = "raw",
196313
return_as_json: bool = False,
197-
**kwargs,
198314
) -> typing.Union[gpd.GeoDataFrame, str]:
315+
# TODO : fix docstring
199316
"""
200317
Downloads and aggregates official geographic datasets using the Cartiflette API
201318
for a set of specified values.
@@ -225,8 +342,9 @@ def get_dataset(
225342
Other parameters required for accessing the Cartiflette API.
226343
227344
- return_as_json (bool, optional):
228-
If True, the function returns a JSON string representation of the aggregated GeoDataFrame.
229-
If False, it returns a GeoDataFrame. Default is False.
345+
If True, the function returns a JSON string representation of the
346+
aggregated GeoDataFrame. If False, it returns a GeoDataFrame. Default
347+
is False.
230348
231349
Returns:
232350
- Union[gpd.GeoDataFrame, str]:
@@ -250,8 +368,6 @@ def get_dataset(
250368
for value in values:
251369
gdf_single = self.download_cartiflette_single(
252370
value=value,
253-
bucket=bucket,
254-
path_within_bucket=path_within_bucket,
255371
provider=provider,
256372
dataset_family=dataset_family,
257373
source=source,
@@ -335,7 +451,9 @@ def carti_download(
335451
if return_as_json is True.
336452
"""
337453

338-
with CartifletteSession() as carti_session:
454+
with CartifletteSession(
455+
bucket=bucket, path_within_bucket=path_within_bucket
456+
) as carti_session:
339457
return carti_session.get_dataset(
340458
values=values,
341459
*args,
@@ -346,12 +464,58 @@ def carti_download(
346464
year=year,
347465
crs=crs,
348466
simplification=simplification,
349-
bucket=bucket,
350-
path_within_bucket=path_within_bucket,
351467
provider=provider,
352468
dataset_family=dataset_family,
353469
source=source,
354470
filename=filename,
355471
return_as_json=return_as_json,
356472
**kwargs,
357473
)
474+
475+
476+
@lru_cache(maxsize=128)
477+
def get_catalog(
478+
bucket: str = BUCKET,
479+
path_within_bucket: str = PATH_WITHIN_BUCKET,
480+
**kwargs,
481+
) -> pd.DataFrame:
482+
"""
483+
Retrieve Cartiflette's catalog. If kwargs are specified, will filter that
484+
catalog according to the pairs of column/values given.
485+
486+
This function is cached.
487+
488+
Parameters
489+
----------
490+
kwargs :
491+
Pairs of keys/values from the catalog, optional.
492+
493+
Returns
494+
-------
495+
pd.DataFrame
496+
Catalog of available datasets.
497+
498+
Example
499+
-------
500+
>>> get_catalog(territory="france", source="CONTOUR-IRIS")
501+
502+
source year ... territory simplification
503+
0 CONTOUR-IRIS 2023 ... france 40
504+
1 CONTOUR-IRIS 2023 ... france 40
505+
2 CONTOUR-IRIS 2023 ... france 40
506+
3 CONTOUR-IRIS 2023 ... france 40
507+
4 CONTOUR-IRIS 2023 ... france 40
508+
... ... ... ... ...
509+
5745 CONTOUR-IRIS 2023 ... france 40
510+
5746 CONTOUR-IRIS 2023 ... france 40
511+
5747 CONTOUR-IRIS 2023 ... france 40
512+
5748 CONTOUR-IRIS 2023 ... france 40
513+
5749 CONTOUR-IRIS 2023 ... france 40
514+
515+
[5750 rows x 9 columns]
516+
517+
"""
518+
with CartifletteSession(
519+
bucket=bucket, path_within_bucket=path_within_bucket
520+
) as carti_session:
521+
return carti_session.get_catalog(**kwargs)

0 commit comments

Comments
 (0)