11from datetime import date
2+ from functools import reduce , lru_cache
23import logging
34import os
45import typing
6+ from warnings import warn
57
68from requests_cache import CachedSession
79import geopandas as gpd
1416 PATH_WITHIN_BUCKET ,
1517 CATALOG ,
1618)
19+
20+ # TODO : mettre bucket et path_within_bucket en véritables constantes
21+
1722from cartiflette .config import _config
1823from cartiflette .utils import (
1924 create_path_bucket ,
@@ -33,6 +38,8 @@ class CartifletteSession(CachedSession):
3338 def __init__ (
3439 self ,
3540 expire_after : int = _config ["DEFAULT_EXPIRE_AFTER" ],
41+ bucket : str = BUCKET ,
42+ path_within_bucket : str = PATH_WITHIN_BUCKET ,
3643 ** kwargs ,
3744 ):
3845 super ().__init__ (
@@ -41,6 +48,9 @@ def __init__(
4148 ** kwargs ,
4249 )
4350
51+ self .bucket = bucket
52+ self .path_within_bucket = path_within_bucket
53+
4454 for protocol in ["http" , "https" ]:
4555 try :
4656 proxy = {protocol : os .environ [f"{ protocol } _proxy" ]}
@@ -50,11 +60,8 @@ def __init__(
5060
5161 def download_cartiflette_single (
5262 self ,
53- * args ,
54- bucket : str = BUCKET ,
55- path_within_bucket : str = PATH_WITHIN_BUCKET ,
56- provider : str = "IGN" ,
57- dataset_family : str = "ADMINEXPRESS" ,
63+ provider : str = "Cartiflette" ,
64+ dataset_family : str = "production" ,
5865 source : str = "EXPRESS-COG-TERRITOIRE" ,
5966 vectorfile_format : str = "geojson" ,
6067 borders : str = "COMMUNE" ,
@@ -65,28 +72,92 @@ def download_cartiflette_single(
6572 crs : typing .Union [list , str , int , float ] = 2154 ,
6673 simplification : typing .Union [str , int , float ] = None ,
6774 filename : str = "raw" ,
68- ** kwargs ,
69- ):
75+ ) -> gpd .GeoDataFrame :
76+ """
77+ Download a single geodataset from Cartiflette
78+
79+ Parameters
80+ ----------
81+ provider : str, optional
82+ Deprecated. The default is "Cartiflette".
83+ dataset_family : str, optional
84+ Deprecated. The default is "production".
85+ source : str, optional
86+ DESCRIPTION. The default is "EXPRESS-COG-TERRITOIRE".
87+ vectorfile_format : str, optional
88+ DESCRIPTION. The default is "geojson".
89+ borders : str, optional
90+ DESCRIPTION. The default is "COMMUNE".
91+ filter_by : str, optional
92+ DESCRIPTION. The default is "region".
93+ territory : str, optional
94+ DESCRIPTION. The default is "metropole".
95+ year : typing.Union[str, int, float], optional
96+ DESCRIPTION. The default is None.
97+ value : typing.Union[str, int, float], optional
98+ DESCRIPTION. The default is "28".
99+ crs : typing.Union[list, str, int, float], optional
100+ DESCRIPTION. The default is 2154.
101+ simplification : typing.Union[str, int, float], optional
102+ DESCRIPTION. The default is None.
103+ filename : str, optional
104+ DESCRIPTION. The default is "raw".
105+ : TYPE
106+ DESCRIPTION.
107+
108+ Returns
109+ -------
110+ TYPE
111+ DESCRIPTION.
112+
113+ """
114+
115+ if provider :
116+ warn (
117+ "provider is deprecated and will be removed in a future "
118+ "version. You can safely drop this argument." ,
119+ DeprecationWarning ,
120+ stacklevel = 2 ,
121+ )
122+
123+ if provider :
124+ warn (
125+ "dataset_family is deprecated and will be removed in a future "
126+ "version. You can safely drop this argument." ,
127+ DeprecationWarning ,
128+ stacklevel = 2 ,
129+ )
130+
131+ if borders == "COMMUNE_ARRONDISSEMENT" :
132+ warn (
133+ "'COMMUNE_ARRONDISSESMENT' is deprecated for borders and will "
134+ "be removed in a future version. Please use 'ARM' instead." ,
135+ DeprecationWarning ,
136+ stacklevel = 2 ,
137+ )
138+
139+ # TODO : vérifier borders vs. administrative_level
140+
70141 if not year :
71142 year = str (date .today ().year )
72143
73- corresp_filter_by_columns , format_read , driver = standardize_inputs (
144+ _corresp_filter_by_columns , format_read , _driver = standardize_inputs (
74145 vectorfile_format
75146 )
76147
77148 url = create_path_bucket (
78149 {
79- "bucket" : bucket ,
80- "path_within_bucket" : path_within_bucket ,
150+ "bucket" : self . bucket ,
151+ "path_within_bucket" : self . path_within_bucket ,
81152 "vectorfile_format" : format_read ,
82153 "territory" : territory ,
83154 "borders" : borders ,
84155 "filter_by" : filter_by ,
85156 "year" : year ,
86157 "value" : value ,
87158 "crs" : crs ,
88- "provider" : provider ,
89- "dataset_family" : dataset_family ,
159+ "provider" : "Cartiflette" ,
160+ "dataset_family" : "production" ,
90161 "source" : source ,
91162 "simplification" : simplification ,
92163 "filename" : filename ,
@@ -100,15 +171,75 @@ def download_cartiflette_single(
100171 gdf = gpd .read_file (r .content )
101172 except Exception as e :
102173 logger .error (
103- f"There was an error while reading the file from the URL: { url } "
174+ "There was an error while reading the file from the URL: %s" ,
175+ url ,
104176 )
105- logger .error (f"Error message: { str (e )} " )
177+ logger .error ("Error message: %s" , str (e ))
178+ return gpd .GeoDataFrame ()
106179 else :
107180 return gdf
108181
109182 def get_catalog (self , ** kwargs ) -> pd .DataFrame :
110183 """
111- Retrieve and load cartiflette's current datasets' inventory (as a
184+ Retrieve and load cartiflette's current datasets' catalog (as a
185+ dataframe), filtered on any of the following columns:
186+ [
187+ 'source',
188+ 'year',
189+ 'administrative_level',
190+ 'crs',
191+ 'filter_by',
192+ 'value',
193+ 'vectorfile_format',
194+ 'territory',
195+ 'simplification'
196+ ]
197+
198+ Each row corresponds to an available DataFrame.
199+
200+ Parameters
201+ ----------
202+ kwargs: dict
203+ pairs of column/filter values
204+
205+ Returns
206+ -------
207+ df : pd.DataFrame
208+ Filtered catalog as DataFrame
209+
210+ Example
211+ -------
212+ >>> kwargs = {"territory": "france", "source": "CONTOUR-IRIS"}
213+ >>> with CartifletteSession() as carti_session:
214+ return carti_session.get_catalog(**kwargs)
215+
216+ source year ... territory simplification
217+ 0 CONTOUR-IRIS 2023 ... france 40
218+ 1 CONTOUR-IRIS 2023 ... france 40
219+ 2 CONTOUR-IRIS 2023 ... france 40
220+ 3 CONTOUR-IRIS 2023 ... france 40
221+ 4 CONTOUR-IRIS 2023 ... france 40
222+ ... ... ... ... ...
223+ 5745 CONTOUR-IRIS 2023 ... france 40
224+ 5746 CONTOUR-IRIS 2023 ... france 40
225+ 5747 CONTOUR-IRIS 2023 ... france 40
226+ 5748 CONTOUR-IRIS 2023 ... france 40
227+ 5749 CONTOUR-IRIS 2023 ... france 40
228+
229+ [5750 rows x 9 columns]
230+
231+ """
232+ df = self ._get_full_catalog ()
233+ if kwargs :
234+ mask = reduce (
235+ lambda x , y : x & y , [df [k ] == v for k , v in kwargs .items ()]
236+ )
237+ df = df [mask ].copy ()
238+ return df
239+
240+ def _get_full_catalog (self ) -> pd .DataFrame :
241+ """
242+ Retrieve and load cartiflette's current datasets' catalog (as a
112243 dataframe).
113244
114245 Inventory columns are [
@@ -125,16 +256,6 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
125256
126257 Each row corresponds to an available DataFrame.
127258
128- Parameters
129- ----------
130- fs : S3FileSystem, optional
131- S3 File System. The default is FS.
132- bucket : str, optional
133- Used bucket (both for inventory querying and json storage). The default
134- is BUCKET.
135- path_within_bucket : str, optional
136- Path used within bucket. The default is PATH_WITHIN_BUCKET.
137-
138259 Returns
139260 -------
140261 df : pd.DataFrame
@@ -143,17 +264,15 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
143264 """
144265
145266 url = CATALOG
146-
147- url = f"https://minio.lab.sspcloud.fr/{ url } "
148-
149267 try :
150268 r = self .get (url )
151269 d = r .json ()
152270 except Exception as e :
153271 logger .error (
154- f"There was an error while reading the file from the URL: { url } "
272+ "There was an error while reading the file from the URL: %s" ,
273+ url ,
155274 )
156- logger .error (f "Error message: { str (e )} " )
275+ logger .error ("Error message: %s" , str (e ))
157276 return
158277
159278 d = flatten_dict (d )
@@ -174,28 +293,26 @@ def get_catalog(self, **kwargs) -> pd.DataFrame:
174293 ]
175294
176295 df = df .reset_index (drop = False )
296+
177297 return df
178298
179299 def get_dataset (
180300 self ,
181301 values : typing .List [typing .Union [str , int , float ]],
182- * args ,
183302 borders : str = "COMMUNE" ,
184303 filter_by : str = "region" ,
185304 territory : str = "metropole" ,
186305 vectorfile_format : str = "geojson" ,
187306 year : typing .Union [str , int , float ] = None ,
188307 crs : typing .Union [list , str , int , float ] = 2154 ,
189308 simplification : typing .Union [str , int , float ] = None ,
190- bucket : str = BUCKET ,
191- path_within_bucket : str = PATH_WITHIN_BUCKET ,
192- provider : str = "IGN" ,
193- dataset_family : str = "ADMINEXPRESS" ,
309+ provider : str = "Cartiflette" ,
310+ dataset_family : str = "production" ,
194311 source : str = "EXPRESS-COG-TERRITOIRE" ,
195312 filename : str = "raw" ,
196313 return_as_json : bool = False ,
197- ** kwargs ,
198314 ) -> typing .Union [gpd .GeoDataFrame , str ]:
315+ # TODO : fix docstring
199316 """
200317 Downloads and aggregates official geographic datasets using the Cartiflette API
201318 for a set of specified values.
@@ -225,8 +342,9 @@ def get_dataset(
225342 Other parameters required for accessing the Cartiflette API.
226343
227344 - return_as_json (bool, optional):
228- If True, the function returns a JSON string representation of the aggregated GeoDataFrame.
229- If False, it returns a GeoDataFrame. Default is False.
345+ If True, the function returns a JSON string representation of the
346+ aggregated GeoDataFrame. If False, it returns a GeoDataFrame. Default
347+ is False.
230348
231349 Returns:
232350 - Union[gpd.GeoDataFrame, str]:
@@ -250,8 +368,6 @@ def get_dataset(
250368 for value in values :
251369 gdf_single = self .download_cartiflette_single (
252370 value = value ,
253- bucket = bucket ,
254- path_within_bucket = path_within_bucket ,
255371 provider = provider ,
256372 dataset_family = dataset_family ,
257373 source = source ,
@@ -335,7 +451,9 @@ def carti_download(
335451 if return_as_json is True.
336452 """
337453
338- with CartifletteSession () as carti_session :
454+ with CartifletteSession (
455+ bucket = bucket , path_within_bucket = path_within_bucket
456+ ) as carti_session :
339457 return carti_session .get_dataset (
340458 values = values ,
341459 * args ,
@@ -346,12 +464,58 @@ def carti_download(
346464 year = year ,
347465 crs = crs ,
348466 simplification = simplification ,
349- bucket = bucket ,
350- path_within_bucket = path_within_bucket ,
351467 provider = provider ,
352468 dataset_family = dataset_family ,
353469 source = source ,
354470 filename = filename ,
355471 return_as_json = return_as_json ,
356472 ** kwargs ,
357473 )
474+
475+
476+ @lru_cache (maxsize = 128 )
477+ def get_catalog (
478+ bucket : str = BUCKET ,
479+ path_within_bucket : str = PATH_WITHIN_BUCKET ,
480+ ** kwargs ,
481+ ) -> pd .DataFrame :
482+ """
483+ Retrieve Cartiflette's catalog. If kwargs are specified, will filter that
484+ catalog according to the pairs of column/values given.
485+
486+ This function is cached.
487+
488+ Parameters
489+ ----------
490+ kwargs :
491+ Pairs of keys/values from the catalog, optional.
492+
493+ Returns
494+ -------
495+ pd.DataFrame
496+ Catalog of available datasets.
497+
498+ Example
499+ -------
500+ >>> get_catalog(territory="france", source="CONTOUR-IRIS")
501+
502+ source year ... territory simplification
503+ 0 CONTOUR-IRIS 2023 ... france 40
504+ 1 CONTOUR-IRIS 2023 ... france 40
505+ 2 CONTOUR-IRIS 2023 ... france 40
506+ 3 CONTOUR-IRIS 2023 ... france 40
507+ 4 CONTOUR-IRIS 2023 ... france 40
508+ ... ... ... ... ...
509+ 5745 CONTOUR-IRIS 2023 ... france 40
510+ 5746 CONTOUR-IRIS 2023 ... france 40
511+ 5747 CONTOUR-IRIS 2023 ... france 40
512+ 5748 CONTOUR-IRIS 2023 ... france 40
513+ 5749 CONTOUR-IRIS 2023 ... france 40
514+
515+ [5750 rows x 9 columns]
516+
517+ """
518+ with CartifletteSession (
519+ bucket = bucket , path_within_bucket = path_within_bucket
520+ ) as carti_session :
521+ return carti_session .get_catalog (** kwargs )
0 commit comments