Skip to content

Commit cb8c3cc

Browse files
authored
Merge pull request #34 from CU-ESIIL/codex/update-fired-md-with-fixed-python-code
docs: update FIRED python example
2 parents 225ba39 + 7cc7eb7 commit cb8c3cc

File tree

2 files changed

+216
-81
lines changed

2 files changed

+216
-81
lines changed

docs/hazards/FIRED/FIRED.md

Lines changed: 107 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -79,48 +79,113 @@ ggplot(fired) +
7979

8080
![](FIRED_files/figure-gfm/unnamed-chunk-5-1.png)
8181

82-
In Python, we need 5 libraries to download and visualize the data.
82+
In Python, the following code downloads the FIRED data set from CU Scholar,
83+
handles 403 errors using a session, and loads the data into a GeoDataFrame.
84+
It also shows how to access event- and daily-level layers.
8385

8486
``` python
85-
import requests
86-
import zipfile
87-
import geopandas as gpd
88-
import matplotlib.pyplot as plt
89-
import seaborn as sns
87+
# Programmatically fetch FIRED from CU Scholar (handles 403) and load as GeoDataFrame.
88+
# No firedpy fallback.
89+
90+
import os, io, zipfile, tempfile, warnings
91+
from pathlib import Path
92+
import requests
93+
import geopandas as gpd
94+
95+
def load_fired_conus_ak(
96+
dataset_page: str = "https://scholar.colorado.edu/concern/datasets/d504rm74m",
97+
download_id: str = "h702q749s", # file id (works for Nov 2001–Mar 2021 CONUS+AK events/daily)
98+
which: str = "events", # "events" or "daily"
99+
prefer: str = "gpkg", # "gpkg" or "shp"
100+
timeout: int = 180,
101+
) -> gpd.GeoDataFrame:
102+
"""
103+
Returns a GeoDataFrame with FIRED polygons (CONUS+AK, Nov 2001–Mar 2021) in EPSG:4326.
104+
Downloads the ZIP from CU Scholar using a session + referer to avoid 403,
105+
extracts in a temporary directory, and reads the requested layer.
106+
107+
Parameters
108+
----------
109+
dataset_page : str
110+
CU Scholar dataset landing page (used as Referer).
111+
download_id : str
112+
The /downloads/<id> token for the ZIP on that page.
113+
which : {"events","daily"}
114+
Choose event-level polygons or daily polygons.
115+
prefer : {"gpkg","shp"}
116+
Prefer GeoPackage or Shapefile when both exist.
117+
timeout : int
118+
Seconds for HTTP requests.
119+
120+
Raises
121+
------
122+
RuntimeError if the download or layer selection fails.
123+
"""
124+
assert which in ("events", "daily")
125+
assert prefer in ("gpkg", "shp")
126+
127+
inner = {
128+
("events","gpkg"): "fired_conus-ak_events_nov2001-march2021.gpkg",
129+
("events","shp") : "fired_conus-ak_events_nov2001-march2021.shp",
130+
("daily","gpkg") : "fired_conus-ak_daily_nov2001-march2021.gpkg",
131+
("daily","shp") : "fired_conus-ak_daily_nov2001-march2021.shp",
132+
}
133+
want_primary = inner[(which, prefer)]
134+
want_alternate = inner[(which, "shp" if prefer=="gpkg" else "gpkg")]
135+
136+
sess = requests.Session()
137+
headers = {
138+
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
139+
"(KHTML, like Gecko) Chrome/125.0 Safari/537.36",
140+
"Accept": "*/*",
141+
"Referer": dataset_page,
142+
"Connection": "keep-alive",
143+
}
144+
r0 = sess.get(dataset_page, headers=headers, timeout=timeout)
145+
r0.raise_for_status()
146+
147+
zip_url = f"https://scholar.colorado.edu/downloads/{download_id}"
148+
resp = sess.get(zip_url, headers=headers, stream=True, timeout=timeout, allow_redirects=True)
149+
resp.raise_for_status()
150+
151+
ct = resp.headers.get("Content-Type", "")
152+
cd = resp.headers.get("Content-Disposition", "")
153+
if ("zip" not in ct.lower()) and (".zip" not in cd.lower()):
154+
warnings.warn("Response did not clearly indicate a ZIP; proceeding anyway.")
155+
156+
with tempfile.TemporaryDirectory() as td:
157+
zpath = Path(td) / "fired.zip"
158+
with open(zpath, "wb") as f:
159+
for chunk in resp.iter_content(chunk_size=1<<20):
160+
if chunk:
161+
f.write(chunk)
162+
163+
with zipfile.ZipFile(zpath, "r") as z:
164+
names = z.namelist()
165+
chosen = None
166+
for n in (want_primary, want_alternate):
167+
if n in names:
168+
chosen = n
169+
break
170+
if chosen is None:
171+
for n in names:
172+
if which in n and (n.endswith(".gpkg") or n.endswith(".shp")):
173+
chosen = n
174+
break
175+
if chosen is None:
176+
raise RuntimeError("Could not find a FIRED layer inside the ZIP "
177+
f"(looked for '{want_primary}' / '{want_alternate}').")
178+
179+
z.extract(chosen, path=td)
180+
gdf = gpd.read_file(Path(td) / chosen)
181+
if gdf.crs:
182+
gdf = gdf.to_crs("EPSG:4326")
183+
else:
184+
gdf.set_crs("EPSG:4326", inplace=True)
185+
return gdf
186+
187+
# -------------------- Example usage --------------------
188+
fired_events = load_fired_conus_ak(which="events", prefer="gpkg")
189+
fired_daily = load_fired_conus_ak(which="daily", prefer="gpkg")
190+
print(fired_events.head())
90191
```
91-
92-
Download the data set:
93-
94-
``` python
95-
url = "https://scholar.colorado.edu/downloads/zw12z650d"
96-
fired = requests.get(url)
97-
data_file = "fired.zip"
98-
with open(data_file, 'wb') as f:
99-
f.write(fired.content)
100-
101-
# Unzip the file
102-
```
103-
104-
``` python
105-
with zipfile.ZipFile(data_file, 'r') as zip_ref:
106-
zip_ref.extractall()
107-
```
108-
109-
Read it:
110-
111-
``` python
112-
fired = gpd.read_file("fired_conus_ak_to_January_2022_gpkg_shp/conus_ak_to2022001_events.shp")
113-
```
114-
115-
Plot fire duration as a function of ignition day:
116-
117-
``` python
118-
plt.figure()
119-
sns.scatterplot(data=fired, x='ig_day', y='event_dur')
120-
sns.set_style('whitegrid')
121-
plt.xlabel('Day')
122-
plt.ylabel('Event duration (days)')
123-
plt.show()
124-
```
125-
126-
![](FIRED_files/figure-gfm/unnamed-chunk-9-1.png)

docs/hazards/FIRED/FIRED.qmd

Lines changed: 109 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -56,43 +56,113 @@ ggplot(fired) +
5656
ylab('Event duration (days)')
5757
```
5858

59-
In Python, we need 5 libraries to download and visualize the data.
60-
61-
```{python, cache=TRUE, message=FALSE, warning=FALSE, results='hide'}
62-
import requests
63-
import zipfile
64-
import geopandas as gpd
65-
import matplotlib.pyplot as plt
66-
import seaborn as sns
67-
```
68-
69-
Download the data set:
70-
71-
```{python, cache=TRUE, message=FALSE, warning=FALSE, results='hide'}
72-
url = "https://scholar.colorado.edu/downloads/zw12z650d"
73-
fired = requests.get(url)
74-
data_file = "fired.zip"
75-
with open(data_file, 'wb') as f:
76-
f.write(fired.content)
77-
78-
# Unzip the file
79-
with zipfile.ZipFile(data_file, 'r') as zip_ref:
80-
zip_ref.extractall()
81-
```
82-
83-
Read it:
84-
85-
```{python, cache=TRUE, message=FALSE, warning=FALSE, results='hide'}
86-
fired = gpd.read_file("fired_conus_ak_to_January_2022_gpkg_shp/conus_ak_to2022001_events.shp")
87-
```
88-
89-
Plot fire duration as a function of ignition day:
90-
91-
```{python, cache=TRUE, message=FALSE, warning=FALSE, results='hide'}
92-
plt.figure()
93-
sns.scatterplot(data=fired, x='ig_day', y='event_dur')
94-
sns.set_style('whitegrid')
95-
plt.xlabel('Day')
96-
plt.ylabel('Event duration (days)')
97-
plt.show()
59+
In Python, the following code downloads the FIRED data set from CU Scholar using a
60+
session to avoid 403 errors and loads it into a GeoDataFrame. The snippet also
61+
shows how to access event- and daily-level layers.
62+
63+
```{python, cache=TRUE, message=FALSE, warning=FALSE}
64+
# Programmatically fetch FIRED from CU Scholar (handles 403) and load as GeoDataFrame.
65+
# No firedpy fallback.
66+
67+
import os, io, zipfile, tempfile, warnings
68+
from pathlib import Path
69+
import requests
70+
import geopandas as gpd
71+
72+
def load_fired_conus_ak(
73+
dataset_page: str = "https://scholar.colorado.edu/concern/datasets/d504rm74m",
74+
download_id: str = "h702q749s", # file id (works for Nov 2001–Mar 2021 CONUS+AK events/daily)
75+
which: str = "events", # "events" or "daily"
76+
prefer: str = "gpkg", # "gpkg" or "shp"
77+
timeout: int = 180,
78+
) -> gpd.GeoDataFrame:
79+
"""
80+
Returns a GeoDataFrame with FIRED polygons (CONUS+AK, Nov 2001–Mar 2021) in EPSG:4326.
81+
Downloads the ZIP from CU Scholar using a session + referer to avoid 403,
82+
extracts in a temporary directory, and reads the requested layer.
83+
84+
Parameters
85+
----------
86+
dataset_page : str
87+
CU Scholar dataset landing page (used as Referer).
88+
download_id : str
89+
The /downloads/<id> token for the ZIP on that page.
90+
which : {"events","daily"}
91+
Choose event-level polygons or daily polygons.
92+
prefer : {"gpkg","shp"}
93+
Prefer GeoPackage or Shapefile when both exist.
94+
timeout : int
95+
Seconds for HTTP requests.
96+
97+
Raises
98+
------
99+
RuntimeError if the download or layer selection fails.
100+
"""
101+
assert which in ("events", "daily")
102+
assert prefer in ("gpkg", "shp")
103+
104+
inner = {
105+
("events","gpkg"): "fired_conus-ak_events_nov2001-march2021.gpkg",
106+
("events","shp") : "fired_conus-ak_events_nov2001-march2021.shp",
107+
("daily","gpkg") : "fired_conus-ak_daily_nov2001-march2021.gpkg",
108+
("daily","shp") : "fired_conus-ak_daily_nov2001-march2021.shp",
109+
}
110+
want_primary = inner[(which, prefer)]
111+
want_alternate = inner[(which, "shp" if prefer=="gpkg" else "gpkg")]
112+
113+
sess = requests.Session()
114+
headers = {
115+
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
116+
"(KHTML, like Gecko) Chrome/125.0 Safari/537.36",
117+
"Accept": "*/*",
118+
"Referer": dataset_page,
119+
"Connection": "keep-alive",
120+
}
121+
r0 = sess.get(dataset_page, headers=headers, timeout=timeout)
122+
r0.raise_for_status()
123+
124+
zip_url = f"https://scholar.colorado.edu/downloads/{download_id}"
125+
resp = sess.get(zip_url, headers=headers, stream=True, timeout=timeout, allow_redirects=True)
126+
resp.raise_for_status()
127+
128+
ct = resp.headers.get("Content-Type", "")
129+
cd = resp.headers.get("Content-Disposition", "")
130+
if ("zip" not in ct.lower()) and (".zip" not in cd.lower()):
131+
warnings.warn("Response did not clearly indicate a ZIP; proceeding anyway.")
132+
133+
with tempfile.TemporaryDirectory() as td:
134+
zpath = Path(td) / "fired.zip"
135+
with open(zpath, "wb") as f:
136+
for chunk in resp.iter_content(chunk_size=1<<20):
137+
if chunk:
138+
f.write(chunk)
139+
140+
with zipfile.ZipFile(zpath, "r") as z:
141+
names = z.namelist()
142+
chosen = None
143+
for n in (want_primary, want_alternate):
144+
if n in names:
145+
chosen = n
146+
break
147+
if chosen is None:
148+
for n in names:
149+
if which in n and (n.endswith(".gpkg") or n.endswith(".shp")):
150+
chosen = n
151+
break
152+
if chosen is None:
153+
raise RuntimeError("Could not find a FIRED layer inside the ZIP "
154+
f"(looked for '{want_primary}' / '{want_alternate}').")
155+
156+
z.extract(chosen, path=td)
157+
gdf = gpd.read_file(Path(td) / chosen)
158+
if gdf.crs:
159+
gdf = gdf.to_crs("EPSG:4326")
160+
else:
161+
gdf.set_crs("EPSG:4326", inplace=True)
162+
return gdf
163+
164+
# -------------------- Example usage --------------------
165+
fired_events = load_fired_conus_ak(which="events", prefer="gpkg")
166+
fired_daily = load_fired_conus_ak(which="daily", prefer="gpkg")
167+
print(fired_events.head())
98168
```

0 commit comments

Comments
 (0)