Skip to content

Commit 3f7ed29

Browse files
author
williamstravis
committed
Added an attempt parallel downloading
2 parents 16f14bb + 4939ee0 commit 3f7ed29

File tree

2 files changed

+28
-28
lines changed

2 files changed

+28
-28
lines changed
-38.5 KB
Binary file not shown.

firedpy/functions.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def buildEvents(dest, data_dir, tiles, spatial_param=5, temporal_param=11):
134134
ys = flttn(ys)
135135
xs = flttn(xs)
136136
dates = flttn(dates)
137-
edf = pd.DataFrame(OrderedDict({"id": events, "date": dates,
137+
edf = pd.DataFrame(OrderedDict({"id": events, "date": dates,
138138
"x": xs, "y": ys, "edge": edges,
139139
"tile": tile_id}))
140140
if not os.path.exists(os.path.join(data_dir, "tables/events")):
@@ -189,7 +189,7 @@ def toDays(date, base):
189189
pass
190190

191191
# If there aren"t events close enough in time the list will be empty
192-
edf2 = edf2[(abs(edf2["days"] - d1) < temporal_param) |
192+
edf2 = edf2[(abs(edf2["days"] - d1) < temporal_param) |
193193
(abs(edf2["days"] - d2) < temporal_param)]
194194
eids2 = list(edf2["id"].unique())
195195

@@ -273,7 +273,7 @@ def buildPolygons(src, daily_shp_path, event_shp_path, data_dir):
273273

274274
# Now add the first date of each event and merge daily event detections
275275
print("Dissolving polygons...")
276-
gdf["start_date"] = gdf.groupby("id")["date"].transform("min")
276+
gdf["start_date"] = gdf.groupby("id")["date"].transform("min")
277277
gdfd = gdf.dissolve(by="did", as_index=False)
278278
gdfd["year"] = gdfd["start_date"].apply(lambda x: x[:4])
279279
gdfd["month"] = gdfd["start_date"].apply(lambda x: x[5:7])
@@ -509,7 +509,7 @@ def toKms(p, res):
509509
def istarmap(self, func, iterable, chunksize=1):
510510
"""
511511
starmap progress bar patch from darkonaut:
512-
512+
513513
https://stackoverflow.com/users/9059420/darkonaut
514514
https://stackoverflow.com/questions/57354700/starmap-combined-with-tqdm/
515515
"""
@@ -548,7 +548,7 @@ def downloadBA(hdf, hdf_path):
548548

549549
# Check worker into site
550550
ftp = ftplib.FTP("fuoco.geog.umd.edu", user="fire", passwd="burnt")
551-
551+
552552
# Infer and move into the remote folder
553553
ftp_folder = "/MCD64A1/C6/" + tile
554554
ftp.cwd(ftp_folder)
@@ -619,7 +619,7 @@ def getBurns(self):
619619
tiles = self.tiles
620620
else:
621621
tiles = ftp.nlst()
622-
tiles = [t for t in tiles if "h" in t]
622+
tiles = [t for t in tiles if "h" in t]
623623

624624
# Download the available files and catch failed downloads
625625
for tile in tiles:
@@ -628,7 +628,7 @@ def getBurns(self):
628628
ftp.cwd(ftp_folder)
629629
hdfs = ftp.nlst()
630630
hdfs = [h for h in hdfs if ".hdf" in h]
631-
631+
632632
# Make sure local target folder exists
633633
folder = os.path.join(self.hdf_path, tile)
634634
if not os.path.exists(folder):
@@ -642,12 +642,12 @@ def getBurns(self):
642642

643643
# Create pool
644644
pool = Pool(5)
645-
645+
646646
# Zip arguments together
647647
args = zip(hdfs, np.repeat(self.hdf_path, len(hdfs)))
648648

649649
# Try to dl in parallel using istarmap patch for progress bar
650-
for _ in tqdm(pool.istarmap(downloadBA, args),
650+
for _ in tqdm(pool.istarmap(downloadBA, args),
651651
total=len(hdfs), position=0):
652652
pass
653653

@@ -750,7 +750,7 @@ def getLandcover(self):
750750
Processes Distributed Active Archive Center, which is an Earthdata
751751
thing. You"ll need register for a username and password, but that"s
752752
free. Fortunately, there is a tutorial on how to get this data:
753-
753+
754754
https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+
755755
Python
756756
@@ -798,7 +798,7 @@ def getLandcover(self):
798798
url = ("https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.006/" + year +
799799
".01.01/")
800800
r = urllib2.urlopen(url)
801-
soup = BeautifulSoup(r, features="lxml",
801+
soup = BeautifulSoup(r, features="lxml",
802802
from_encoding=r.info().get_param("charset")
803803
)
804804
names = [link["href"] for link in soup.find_all("a", href=True)]
@@ -821,7 +821,7 @@ def getLandcover(self):
821821
for year in years:
822822
print("Stitching together landcover tiles for year " + year)
823823
lc_tiles = glob(os.path.join(self.landcover_path, year, "*hdf"))
824-
dss = [rasterio.open(f).subdatasets[0] for f in lc_tiles]
824+
dss = [rasterio.open(f).subdatasets[0] for f in lc_tiles]
825825
tiles = [rasterio.open(d) for d in dss]
826826
mosaic, transform = merge(tiles)
827827
crs = tiles[0].meta.copy()
@@ -875,13 +875,13 @@ def getShapes(self):
875875
conus.to_file(os.path.join(self.data_dir, "shapefiles/conus.shp"))
876876

877877
# Contiguous United States - MODIS Sinusoidal
878-
if not os.path.exists(os.path.join(self.data_dir,
878+
if not os.path.exists(os.path.join(self.data_dir,
879879
"shapefiles/conus_modis.shp")):
880880
print("Reprojecting state shapefile to MODIS Sinusoidal...")
881881
conus = gpd.read_file(os.path.join(self.data_dir,
882882
"shapefiles/conus.shp"))
883883
modis_conus = conus.to_crs(modis_crs)
884-
modis_conus.to_file(os.path.join(self.data_dir,
884+
modis_conus.to_file(os.path.join(self.data_dir,
885885
"shapefiles/conus_modis.shp"))
886886

887887
# Level III Omernick Ecoregions - USGS North American Albers
@@ -892,7 +892,7 @@ def getShapes(self):
892892
eco_l3 = gpd.read_file("ftp://ftp.epa.gov/wed/ecoregions/us/" +
893893
"us_eco_l3.zip")
894894
eco_l3.crs = {"init": "epsg:5070"}
895-
eco_l3.to_file(os.path.join(self.data_dir,
895+
eco_l3.to_file(os.path.join(self.data_dir,
896896
"shapefiles/ecoregion/us_eco_l3.shp"))
897897
eco_l3 = eco_l3.to_crs(modis_crs)
898898
eco_l3.to_file(
@@ -924,7 +924,7 @@ def cap(string):
924924
self.data_dir, "shapefiles/modis_world_grid.shp")
925925

926926
# Getting the extent regardless of existing files from other runs
927-
template1 = gpd.read_file(extent_template_file)
927+
template1 = gpd.read_file(extent_template_file)
928928
template1["h"] = template1["h"].apply(lambda x: "{:02d}".format(x))
929929
template1["v"] = template1["v"].apply(lambda x: "{:02d}".format(x))
930930
template1["tile"] = "h" + template1["h"] + "v" + template1["v"]
@@ -1034,11 +1034,11 @@ def buildNCs(self, files):
10341034
proj = hdf.GetProjection()
10351035
data = hdf.GetRasterBand(1)
10361036
crs = osr.SpatialReference()
1037-
1037+
10381038
# Get the proj4 string usign the WKT
10391039
crs.ImportFromWkt(proj)
10401040
proj4 = crs.ExportToProj4()
1041-
1041+
10421042
# Use one tif (one array) for spatial attributes
10431043
array = data.ReadAsArray()
10441044
ny, nx = array.shape
@@ -1048,15 +1048,15 @@ def buildNCs(self, files):
10481048
# Todays date for attributes
10491049
todays_date = dt.datetime.today()
10501050
today = np.datetime64(todays_date)
1051-
1051+
10521052
# Create Dataset
10531053
nco = Dataset(file_name, mode="w", format="NETCDF4", clobber=True)
1054-
1054+
10551055
# Dimensions
10561056
nco.createDimension("y", ny)
10571057
nco.createDimension("x", nx)
10581058
nco.createDimension("time", None)
1059-
1059+
10601060
# Variables
10611061
y = nco.createVariable("y", np.float64, ("y",))
10621062
x = nco.createVariable("x", np.float64, ("x",))
@@ -1066,8 +1066,8 @@ def buildNCs(self, files):
10661066
fill_value=-9999, zlib=True)
10671067
variable.standard_name = "day"
10681068
variable.long_name = "Burn Days"
1069-
1070-
# Appending the CRS information
1069+
1070+
# Appending the CRS information
10711071
# Check "https://cf-trac.llnl.gov/trac/ticket/77"
10721072
crs = nco.createVariable("crs", "c")
10731073
variable.setncattr("grid_mapping", "crs")
@@ -1089,15 +1089,15 @@ def buildNCs(self, files):
10891089
y.standard_name = "projection_y_coordinate"
10901090
y.long_name = "y coordinate of projection"
10911091
y.units = "m"
1092-
1092+
10931093
# Other attributes
10941094
nco.title = "Burn Days"
10951095
nco.subtitle = "Burn Days Detection by MODIS since 1970."
10961096
nco.description = "The day that a fire is detected."
10971097
nco.date = pd.to_datetime(str(today)).strftime("%Y-%m-%d")
10981098
nco.projection = "MODIS Sinusoidal"
10991099
nco.Conventions = "CF-1.6"
1100-
1100+
11011101
# Variable Attrs
11021102
times.units = "days since 1970-01-01"
11031103
times.standard_name = "time"
@@ -1110,12 +1110,12 @@ def buildNCs(self, files):
11101110
dates.append(date)
11111111
deltas = [d - dt.datetime(1970, 1, 1) for d in dates]
11121112
days = np.array([d.days for d in deltas])
1113-
1113+
11141114
# Write dimension data
11151115
x[:] = xs
11161116
y[:] = ys
11171117
times[:] = days
1118-
1118+
11191119
# One file a time, write the arrays
11201120
tidx = 0
11211121
for f in tqdm(files, position=0, file=sys.stdout):
@@ -1133,7 +1133,7 @@ def buildNCs(self, files):
11331133
blank = np.zeros((ny, nx))
11341134
variable[tidx, :, :] = blank
11351135
tidx += 1
1136-
1136+
11371137
# Done
11381138
nco.close()
11391139

0 commit comments

Comments
 (0)