geopandas
diff --git a/‎README.md‎
Lines changed: 20 additions & 15 deletions b/‎README.md‎
Lines changed: 20 additions & 15 deletions
diff --git a/‎ci/dev.yaml‎
Lines changed: 2 additions & 0 deletions b/‎ci/dev.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ci/latest.yaml‎
Lines changed: 2 additions & 0 deletions b/‎ci/latest.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/contributing.md‎
Lines changed: 8 additions & 2 deletions b/‎doc/source/contributing.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎geodatasets/api.py‎
Lines changed: 44 additions & 4 deletions b/‎geodatasets/api.py‎
Lines changed: 44 additions & 4 deletions
@@ -36,25 +36,28 @@ In [1]: import geodatasets
 In [2]: geodatasets.data
 Out[2]:
 {'geoda': {'airbnb': {'url': 'https://geodacenter.github.io/data-and-lab//data/airbnb.zip',
-   'license': 'CC-0',
-   'attribution': 'GeoDa Data and Lab',
+   'license': 'NA',
+   'attribution': 'Center for Spatial Data Science, University of Chicago',
    'name': 'geoda.airbnb',
    'description': 'Airbnb rentals, socioeconomics, and crime in Chicago',
+   'geometry_type': 'Polygon',
    'nrows': 77,
-   'ncols': 20,
+   'ncols': 21,
    'details': 'https://geodacenter.github.io/data-and-lab//airbnb/',
    'hash': 'a2ab1e3f938226d287dd76cde18c00e2d3a260640dd826da7131827d9e76c824',
    'filename': 'airbnb.zip'},
   'atlanta': {'url': 'https://geodacenter.github.io/data-and-lab//data/atlanta_hom.zip',
-   'license': 'CC-0',
-   'attribution': 'GeoDa Data and Lab',
+   'license': 'NA',
+   'attribution': 'Center for Spatial Data Science, University of Chicago',
    'name': 'geoda.atlanta',
    'description': 'Atlanta, GA region homicide counts and rates',
+   'geometry_type': 'Polygon',
    'nrows': 90,
-   'ncols': 23,
+   'ncols': 24,
    'details': 'https://geodacenter.github.io/data-and-lab//atlanta_old/',
-   'hash': 'missing',
-   'filename': 'atlanta_hom.zip'},
+   'hash': 'a33a76e12168fe84361e60c88a9df4856730487305846c559715c89b1a2b5e09',
+   'filename': 'atlanta_hom.zip',
+   'members': ['atlanta_hom/atl_hom.geojson']},
    ...
 ```
 
@@ -69,8 +72,8 @@ And one to get the local path. If the file is not available in the cache, it wil
 downloaded first.
 
 ```py
-Out[4]: '/Users/martin/Library/Caches/geodatasets/airbnb.zip'
 In [4]: geodatasets.get_path('geoda airbnb')
+Out[4]: '/Users/martin/Library/Caches/geodatasets/airbnb.zip'
 ```
 
 You can also get all the details:
@@ -79,12 +82,13 @@ You can also get all the details:
 In [5]: geodatasets.data.geoda.airbnb
 Out[5]:
 {'url': 'https://geodacenter.github.io/data-and-lab//data/airbnb.zip',
- 'license': 'CC-0',
- 'attribution': 'GeoDa Data and Lab',
+ 'license': 'NA',
+ 'attribution': 'Center for Spatial Data Science, University of Chicago',
  'name': 'geoda.airbnb',
  'description': 'Airbnb rentals, socioeconomics, and crime in Chicago',
+ 'geometry_type': 'Polygon',
  'nrows': 77,
- 'ncols': 20,
+ 'ncols': 21,
  'details': 'https://geodacenter.github.io/data-and-lab//airbnb/',
  'hash': 'a2ab1e3f938226d287dd76cde18c00e2d3a260640dd826da7131827d9e76c824',
  'filename': 'airbnb.zip'}
@@ -96,12 +100,13 @@ Or using the name query:
 In [6]: geodatasets.data.query_name('geoda airbnb')
 Out[6]:
 {'url': 'https://geodacenter.github.io/data-and-lab//data/airbnb.zip',
- 'license': 'CC-0',
- 'attribution': 'GeoDa Data and Lab',
+ 'license': 'NA',
+ 'attribution': 'Center for Spatial Data Science, University of Chicago',
  'name': 'geoda.airbnb',
  'description': 'Airbnb rentals, socioeconomics, and crime in Chicago',
+ 'geometry_type': 'Polygon',
  'nrows': 77,
- 'ncols': 20,
+ 'ncols': 21,
  'details': 'https://geodacenter.github.io/data-and-lab//airbnb/',
  'hash': 'a2ab1e3f938226d287dd76cde18c00e2d3a260640dd826da7131827d9e76c824',
  'filename': 'airbnb.zip'}
 
@@ -6,6 +6,8 @@ dependencies:
   # tests
   - pytest
   - pytest-cov
+  - geopandas-base
+  - pyogrio
   - pip
   - pip:
       - git+https://github.com/fatiando/pooch.git@main
@@ -7,3 +7,5 @@ dependencies:
   # tests
   - pytest
   - pytest-cov
+  - geopandas-base
+  - pyogrio
@@ -22,6 +22,7 @@ schema to add a single dataset:
         "attribution": "University of Github",
         "name": "dataset_name",
         "description": "Contents of my file",
+        "geometry_type": "Polygon",
         "nrows": 77,
         "ncols": 20,
         "details": "https://your-site.com/link-to-explanantion/",
@@ -43,6 +44,7 @@ you can group then within a `Bunch` using the following schema:
             "attribution": "University of Github",
             "name": "dataset_name",
             "description": "Contents of my file",
+            "geometry_type": "Polygon",
             "nrows": 77,
             "ncols": 20,
             "details": "https://your-site.com/link-to-explanantion/",
@@ -55,11 +57,13 @@ you can group then within a `Bunch` using the following schema:
             "attribution": "University of Github",
             "name": "dataset_name",
             "description": "Contents of my file",
+            "geometry_type": "Point",
             "nrows": 77,
             "ncols": 20,
             "details": "https://your-site.com/link-to-explanantion/",
             "hash": "a2ab1e3f938226d287dd76cde18c00e2d3a260640dd826da7131827d9e76c824",
-            "filename": "my_file.zip"
+            "filename": "my_file.zip",
+            "members": ["use_only_this.geojson"]
       }
    },
 }
@@ -68,7 +72,9 @@ you can group then within a `Bunch` using the following schema:
 It is mandatory to always specify at least `name`, `url`, `hash` and `filename`. `hash`
 is a sha256 hash of the file to check that a user gets the expected file and a
 `filename` specifies how the downloaded file will be called. Ensure that it has a correct
-suffix. Don't forget to add any other custom attributes you'd like.
+suffix. Don't forget to add any other custom attributes you'd like. Attribute `members` has
+a specific meaning and specifies file (or files in case of ESRI Shapefile) that shall be
+extracted from the archive and used.
 
 ## Code and documentation
 
 
@@ -55,6 +55,8 @@ def get_path(name):
     contain the same letters in the same order as the item's name irrespective
     of the letter case, spaces, dashes and other characters.
 
+    For Datasets containing multiple files, the archive is automatically extracted.
+
     Parameters
     ----------
     name : str
@@ -81,7 +83,20 @@ def get_path(name):
     >>> path2
     '/Users/martin/Library/Caches/geodatasets/airbnb.zip'
     """
-    return CACHE.fetch(data.query_name(name).filename)
+    dataset = data.query_name(name)
+    if "members" in dataset.keys():
+        unzipped_files = CACHE.fetch(
+            dataset.filename, processor=pooch.Unzip(members=dataset.members)
+        )
+        if len(unzipped_files) == 1:
+            return unzipped_files[0]
+        elif len(unzipped_files) > 1:  # shapefile
+            return [f for f in unzipped_files if f.endswith(".shp")][0]
+        else:
+            raise
+
+    else:
+        return CACHE.fetch(dataset.filename)
 
 
 def fetch(name):
@@ -94,6 +109,8 @@ def fetch(name):
     contain the same letters in the same order as the item's name irrespective
     of the letter case, spaces, dashes and other characters.
 
+    For Datasets containing multiple files, the archive is automatically extracted.
+
     Parameters
     ----------
     name : str, list
@@ -106,18 +123,41 @@ def fetch(name):
     Examples
     --------
     >>> geodatasets.fetch('nybb')
-    Downloading file 'nybb_22c.zip' from 'https://data.cityofnewyork.us/api/geospatial/\
-tqmj-j8zm?method=export&format=Original' to '/Users/martin/Library/Caches/geodatasets'.
+    Downloading file 'nybb_22c.zip' from 'https://data.cityofnewyork.us/api/geospatial\
+/tqmj-j8zm?method=export&format=Original' to '/Users/martin/Library/Caches/geodatasets'.
+    Extracting 'nybb_22c/nybb.shp' from '/Users/martin/Library/Caches/geodatasets/nybb_\
+22c.zip' to '/Users/martin/Library/Caches/geodatasets/nybb_22c.zip.unzip'
+    Extracting 'nybb_22c/nybb.shx' from '/Users/martin/Library/Caches/geodatasets/nybb_\
+22c.zip' to '/Users/martin/Library/Caches/geodatasets/nybb_22c.zip.unzip'
+    Extracting 'nybb_22c/nybb.dbf' from '/Users/martin/Library/Caches/geodatasets/nybb_\
+22c.zip' to '/Users/martin/Library/Caches/geodatasets/nybb_22c.zip.unzip'
+    Extracting 'nybb_22c/nybb.prj' from '/Users/martin/Library/Caches/geodatasets/nybb_\
+22c.zip' to '/Users/martin/Library/Caches/geodatasets/nybb_22c.zip.unzip'
 
     >>> geodatasets.fetch(['geoda airbnb', 'geoda guerry'])
     Downloading file 'airbnb.zip' from 'https://geodacenter.github.io/data-and-lab//dat\
 a/airbnb.zip' to '/Users/martin/Library/Caches/geodatasets'.
     Downloading file 'guerry.zip' from 'https://geodacenter.github.io/data-and-lab//dat\
 a/guerry.zip' to '/Users/martin/Library/Caches/geodatasets'.
+    Extracting 'guerry/guerry.shp' from '/Users/martin/Library/Caches/geodatasets/guerr\
+y.zip' to '/Users/martin/Library/Caches/geodatasets/guerry.zip.unzip'
+    Extracting 'guerry/guerry.dbf' from '/Users/martin/Library/Caches/geodatasets/guerr\
+y.zip' to '/Users/martin/Library/Caches/geodatasets/guerry.zip.unzip'
+    Extracting 'guerry/guerry.shx' from '/Users/martin/Library/Caches/geodatasets/guerr\
+y.zip' to '/Users/martin/Library/Caches/geodatasets/guerry.zip.unzip'
+    Extracting 'guerry/guerry.prj' from '/Users/martin/Library/Caches/geodatasets/guerr\
+y.zip' to '/Users/martin/Library/Caches/geodatasets/guerry.zip.unzip'
 
     """
     if isinstance(name, str):
         name = [name]
 
     for n in name:
-        _ = CACHE.fetch(data.query_name(n).filename)
+        dataset = data.query_name(n)
+        if "members" in dataset.keys():
+            _ = CACHE.fetch(
+                data.query_name(n).filename,
+                processor=pooch.Unzip(members=dataset.members),
+            )
+        else:
+            _ = CACHE.fetch(data.query_name(n).filename)