2424 >>> to_json(observations, 'my_observations.json')
2525 >>> to_parquet(observations, 'my_observations.parquet')
2626
27+ Or export to any supported format by file extension:
28+
29+ >>> export(observations, 'my_observations.csv')
30+ >>> export(observations, 'my_observations.geojson')
31+ >>> export(observations, 'my_observations.gpx')
32+
2733 Load back into Observation objects:
2834
2935 >>> observations = read('my_observations.csv')
3339 >>> observations = read('my_observations.json')
3440 >>> observations = read('my_observations.parquet')
3541
42+
3643**Export functions:**
3744
3845.. autosummary::
3946 :nosignatures:
4047
48+ export
4149 to_csv
4250 to_excel
4351 to_feather
6169import json
6270from collections .abc import Iterable , Sequence
6371from copy import deepcopy
72+ from datetime import datetime
6473from logging import getLogger
6574from pathlib import Path
66- from typing import TYPE_CHECKING , Optional , TypeAlias
75+ from typing import TYPE_CHECKING , TypeAlias
6776
6877from flatten_dict import flatten , unflatten
6978from pyinaturalist import BaseModel , JsonResponse , ModelObjects , Observation , ResponseResult , Taxon
@@ -147,7 +156,7 @@ def to_dicts(value: InputTypes) -> Iterable[dict]:
147156 return [value ] # type: ignore [list-item]
148157
149158
150- def to_csv (observations : AnyObservations , filename : Optional [ str ] = None ):
159+ def to_csv (observations : AnyObservations , filename : PathOrStr ):
151160 """Convert observations to CSV"""
152161 from pandas import DataFrame
153162
@@ -178,30 +187,31 @@ def to_dataset(observations: AnyObservations) -> Dataset:
178187 return dataset
179188
180189
181- def to_excel (observations : AnyObservations , filename : str ):
190+ def to_excel (observations : AnyObservations , filename : PathOrStr ):
182191 """Convert observations to an Excel spreadsheet (xlsx)"""
183- xlsx_observations = to_dataset (observations ).get_xlsx ()
184- write (xlsx_observations , filename , 'wb' )
192+ dataset = to_dataset (observations )
193+ dataset = _strip_tzinfo (dataset )
194+ write (dataset .get_xlsx (), filename , 'wb' )
185195
186196
187- def to_feather (observations : AnyObservations , filename : str ):
197+ def to_feather (observations : AnyObservations , filename : PathOrStr ):
188198 """Convert observations into a Feather file"""
189199 df = to_dataframe (observations )
190200 df .to_feather (filename )
191201
192202
193- def to_hdf (observations : AnyObservations , filename : str ):
203+ def to_hdf (observations : AnyObservations , filename : PathOrStr ):
194204 """Convert observations into a HDF5 file"""
195205 df = to_dataframe (observations )
196206 df .to_hdf (filename , 'observations' )
197207
198208
199- def to_json (observations : AnyObservations , filename : str ):
209+ def to_json (observations : AnyObservations , filename : PathOrStr ):
200210 """Convert observations into a JSON file"""
201- write (json .dumps (observations , indent = 2 , default = str ), filename )
211+ write (json .dumps (to_dicts ( observations ) , indent = 2 , default = str ), filename )
202212
203213
204- def to_parquet (observations : AnyObservations , filename : str ):
214+ def to_parquet (observations : AnyObservations , filename : PathOrStr ):
205215 """Convert observations into a Parquet file"""
206216 df = to_dataframe (observations )
207217 df .to_parquet (filename )
@@ -225,43 +235,89 @@ def read(filename: PathOrStr) -> list[Observation]:
225235 import pandas as pd
226236
227237 from .csv import is_csv_export , load_csv_exports
238+ from .db import get_db_observations
228239 from .dwc import dwc_to_observations
229240 from .geojson import geojson_to_observations
230241 from .gpx import gpx_to_observations
231242
232243 file_path = Path (filename ).expanduser ()
233244 ext = file_path .suffix .lower ().replace ('.' , '' )
234- if ext == 'json' :
235- return Observation .from_json_file (file_path )
236- elif ext == 'dwc' :
237- return dwc_to_observations (file_path )
238- elif ext == 'geojson' :
239- return geojson_to_observations (file_path )
240- elif ext == 'gpx' :
241- return gpx_to_observations (file_path )
242- elif ext in ('sqlite' , 'db' ):
243- from .db import get_db_observations
244-
245- return list (get_db_observations (file_path ))
246- # For CSV, check if it came from the export tool or from API results
247- elif ext == 'csv' and is_csv_export (file_path ):
248- df = load_csv_exports (file_path )
249- elif ext == 'csv' :
250- df = pd .read_csv (file_path )
251- elif ext == 'feather' :
252- df = pd .read_feather (file_path )
253- elif ext == 'hdf' :
254- df = pd .read_hdf (file_path , 'observations' )
255- elif ext == 'parquet' :
256- df = pd .read_parquet (file_path )
257- elif ext == 'xlsx' :
258- df = pd .read_excel (file_path )
259- else :
260- raise ValueError (f'File format not yet supported: { file_path .suffix } ' )
245+ match ext :
246+ case 'json' :
247+ return Observation .from_json_file (file_path )
248+ case 'dwc' :
249+ return dwc_to_observations (file_path )
250+ case 'geojson' :
251+ return geojson_to_observations (file_path )
252+ case 'gpx' :
253+ return gpx_to_observations (file_path )
254+ case 'sqlite' | 'db' :
255+ return list (get_db_observations (file_path ))
256+ # For CSV, check if it came from the export tool or from API results
257+ case 'csv' :
258+ df = load_csv_exports (file_path ) if is_csv_export (file_path ) else pd .read_csv (file_path )
259+ case 'feather' :
260+ df = pd .read_feather (file_path )
261+ case 'hdf' :
262+ df = pd .read_hdf (file_path , 'observations' )
263+ case 'parquet' :
264+ df = pd .read_parquet (file_path )
265+ case 'xlsx' :
266+ df = pd .read_excel (file_path )
267+ case _:
268+ raise ValueError (f'File format not yet supported: { file_path .suffix } ' )
261269
262270 return Observation .from_json_list (_df_to_dicts (df ))
263271
264272
273+ def export (observations : AnyObservations , filename : PathOrStr ):
274+ """Export observations to any of the following file formats, based on file extension:
275+
276+ * CSV (``.csv``)
277+ * Darwin Core (``.dwc``)
278+ * Feather (``.feather``)
279+ * GeoJSON (``.geojson``)
280+ * GPX (``.gpx``)
281+ * HDF5 (``.hdf``)
282+ * JSON (``.json``)
283+ * Parquet (``.parquet``)
284+ * Excel (``.xlsx``)
285+ * SQLite (``.sqlite`` or ``.db``)
286+ """
287+ from .db import create_tables , save_observations
288+ from .dwc import to_dwc
289+ from .geojson import to_geojson
290+ from .gpx import to_gpx
291+
292+ file_path = Path (filename ).expanduser ()
293+ ext = file_path .suffix .lower ().replace ('.' , '' )
294+
295+ match ext :
296+ case 'json' :
297+ to_json (observations , file_path )
298+ case 'csv' :
299+ to_csv (observations , file_path )
300+ case 'dwc' :
301+ to_dwc (observations , file_path )
302+ case 'feather' :
303+ to_feather (observations , file_path )
304+ case 'geojson' :
305+ to_geojson (observations , file_path )
306+ case 'gpx' :
307+ to_gpx (observations , file_path )
308+ case 'hdf' :
309+ to_hdf (observations , file_path )
310+ case 'parquet' :
311+ to_parquet (observations , file_path )
312+ case 'xlsx' :
313+ to_excel (observations , file_path )
314+ case 'sqlite' | 'db' :
315+ create_tables (file_path )
316+ save_observations (observations , file_path )
317+ case _:
318+ raise ValueError (f'File format not supported: { ext } ' )
319+
320+
265321def write (content : str | bytes , filename : PathOrStr , mode = 'w' ):
266322 """Write converted observation data to a file, creating parent dirs first"""
267323 logger .info (f'Writing to { filename } ' )
@@ -388,6 +444,21 @@ def _fix_dimensions(flat_observations):
388444 return sorted (headers ), flat_observations
389445
390446
447+ def _strip_tzinfo (dataset : Dataset ) -> Dataset :
448+ """Strip timezone info from all datetime values in a tablib Dataset, for compatibility with
449+ openpyxl
450+ """
451+ for col in dataset .headers or []:
452+ col_idx = dataset .headers .index (col )
453+ for row_idx , row in enumerate (dataset ):
454+ val = row [col_idx ]
455+ if isinstance (val , datetime ) and val .tzinfo is not None :
456+ dataset [row_idx ] = tuple (
457+ v .replace (tzinfo = None ) if i == col_idx else v for i , v in enumerate (row )
458+ )
459+ return dataset
460+
461+
391462def _is_dataframe (obj ) -> bool :
392463 try :
393464 from pandas import DataFrame
0 commit comments