Skip to content

Commit b0bca1c

Browse files
committed
major refactor for better use of collections
1 parent 97f34fa commit b0bca1c

File tree

9 files changed

+550
-291
lines changed

9 files changed

+550
-291
lines changed

README.md

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ bbox = aoi1_polygon.union(aoi2_polygon).bounds
7676

7777
2. Configure Rasteret
7878

79-
Set up basic parameters for data collection:
79+
Set up basic parameters for data collection, and check for existing collection
80+
in your workspace directory, if they were created earlier.
8081

8182
```python
8283
# Collection configuration
@@ -88,44 +89,51 @@ data_source = DataSources.LANDSAT
8889
workspace_dir = Path.home() / "rasteret_workspace"
8990
workspace_dir.mkdir(exist_ok=True)
9091
)
92+
93+
# List existing collections
94+
collections = Rasteret.list_collections()
95+
for c in collections:
96+
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")
97+
9198
```
9299
3. Initialize and Create Collection
93100

94-
Set up Rasteret processor and create a local collection:
101+
Create or Load a local collection:
95102
Containing internal COG metadata of scenes, and its STAC metadata
96103

97104
```python
98-
# Initialize processor
99-
processor = Rasteret(
100-
data_source=data_source,
101-
output_dir=workspace_dir,
102-
custom_name=custom_name,
103-
date_range=date_range
104-
)
105-
106-
# Create local collection if not exists
107-
if processor._collection is None:
105+
# Try loading existing collection
106+
try:
107+
# example name
108+
processor = Rasteret.load_collection("bangalore_202401-12_landsat")
109+
except ValueError:
110+
# Create new collection
111+
processor = Rasteret(
112+
custom_name="bangalore",
113+
data_source=DataSources.LANDSAT,
114+
date_range=("2024-01-01", "2024-01-31")
115+
)
108116
processor.create_collection(
109117
bbox=bbox,
110-
date_range=date_range,
111-
cloud_cover_lt=90,
112-
platform={"in": ["LANDSAT_8"]}
118+
cloud_cover_lt=20,
119+
platform={"in": ["LANDSAT_8"]}
113120
)
114121
```
115122

116-
4. Query and Process Data
123+
4. Query collection and Process Data
117124

118-
Query the collection and process data:
119125

120126
```python
121-
# Query collection with filters
127+
# Query collection with filters to get the data you want
122128
ds = processor.get_xarray(
123129
geometries=[aoi1_polygon,aoi2_polygon],
124130
bands=["B4", "B5"],
125131
cloud_cover_lt=20,
126132
date_range=["2024-01-10", "2024-01-30"]
127133
)
128134

135+
# returns an xarray dataset with the data for the geometries and bands specified
136+
129137
# Calculate NDVI
130138
ndvi_ds = (ds.B5 - ds.B4) / (ds.B5 + ds.B4)
131139
ndvi_ds = xr.Dataset(
@@ -134,13 +142,15 @@ ndvi_ds = xr.Dataset(
134142
attrs=ds.attrs,
135143
)
136144

137-
# Save results from xarray to geotiff files
145+
# Save results from xarray to geotiff files, each geometry's data will be stored in
146+
# its own folder
138147
output_files = save_per_geometry(ndvi_ds, output_dir, file_prefix="ndvi", data_var="NDVI")
139148

140149
for geom_id, filepath in output_files.items():
141150
print(f"Geometry {geom_id}: {filepath}")
142151
```
143152

153+
144154
## Why this library?
145155

146156
Details on why this library was made, and how it reads multiple COGs efficiently and fast -

examples/basic_workflow_gdf.py

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,65 @@
33
from shapely.geometry import Polygon
44

55
from rasteret import Rasteret
6-
6+
from rasteret.constants import DataSources
77

88
def main():
9+
910
"""Demonstrate core workflows with Rasteret."""
10-
# 1. Define parameters
11-
12-
custom_name = "bangalore3"
13-
date_range = ("2024-01-01", "2024-01-31")
14-
data_source = "landsat-c2l2-sr"
15-
11+
# 1. Setup workspace and parameters
12+
1613
workspace_dir = Path.home() / "rasteret_workspace"
1714
workspace_dir.mkdir(exist_ok=True)
1815

19-
print("1. Defining Area of Interest")
20-
print("--------------------------")
21-
22-
# Define area and time of interest
23-
aoi_polygon = Polygon(
24-
[(77.55, 13.01), (77.58, 13.01), (77.58, 13.08), (77.55, 13.08), (77.55, 13.01)]
25-
)
26-
27-
aoi_polygon2 = Polygon(
28-
[(77.56, 13.02), (77.59, 13.02), (77.59, 13.09), (77.56, 13.09), (77.56, 13.02)]
29-
)
30-
31-
# get total bounds of all polygons above
32-
bbox = aoi_polygon.union(aoi_polygon2).bounds
33-
34-
print("\n2. Creating and Loading Collection")
35-
print("--------------------------")
36-
37-
# 2. Initialize processor - name generated automatically
38-
processor = Rasteret(
39-
custom_name=custom_name,
40-
data_source=data_source,
41-
output_dir=workspace_dir,
42-
date_range=date_range,
43-
)
44-
45-
# Create index if needed
46-
if processor._collection is None:
47-
processor.create_index(
48-
bbox=bbox, date_range=date_range, query={"cloud_cover_lt": 20}
49-
)
16+
custom_name = "bangalore"
17+
date_range = ("2024-01-01", "2024-01-31")
18+
data_source = DataSources.LANDSAT
5019

51-
# List existing collections
52-
collections = Rasteret.list_collections(dir=workspace_dir)
53-
print("Available collections:")
20+
# 2. List existing collections
21+
print("1. Available Collections")
22+
print("----------------------")
23+
collections = Rasteret.list_collections(workspace_dir=workspace_dir)
5424
for c in collections:
55-
print(f"- {c['name']}: {c['size']} scenes")
25+
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")
26+
27+
# 3. Define areas of interest
28+
print("\n2. Defining Areas of Interest")
29+
print("---------------------------")
30+
aoi1_polygon = Polygon([
31+
(77.55, 13.01), (77.58, 13.01), (77.58, 13.08),
32+
(77.55, 13.08), (77.55, 13.01)
33+
])
34+
aoi2_polygon = Polygon([
35+
(77.56, 13.02), (77.59, 13.02), (77.59, 13.09),
36+
(77.56, 13.09), (77.56, 13.02)
37+
])
38+
bbox = aoi1_polygon.union(aoi2_polygon).bounds
39+
40+
# 4. Load or create collection
41+
print("\n3. Loading/Creating Collection")
42+
print("---------------------------")
43+
try:
44+
processor = Rasteret.load_collection(f"{custom_name}_202401_landsat")
45+
except ValueError:
46+
processor = Rasteret(
47+
custom_name=custom_name,
48+
data_source=data_source,
49+
output_dir=workspace_dir,
50+
date_range=date_range
51+
)
52+
processor.create_collection(
53+
bbox=bbox,
54+
date_range=date_range,
55+
cloud_cover_lt=20,
56+
platform={"in": ["LANDSAT_8"]}
57+
)
5658

57-
print("\n3. Processing Data")
59+
# 5. Process data
60+
print("\n4. Processing Data")
5861
print("----------------")
5962

6063
df = processor.get_gdf(
61-
geometries=[aoi_polygon, aoi_polygon2], bands=["B4", "B5"], cloud_cover_lt=20
64+
geometries=[aoi1_polygon, aoi2_polygon], bands=["B4", "B5"], cloud_cover_lt=20
6265
)
6366

6467
print(f"Columns: {df.columns}")

examples/basic_workflow_xarray.py

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,14 @@
99

1010

1111
def main():
12-
13-
# 1. Define parameters
14-
custom_name = "bangalore"
15-
date_range = ("2024-01-01", "2024-01-31")
16-
data_source = DataSources.LANDSAT # or SENTINEL2
17-
12+
"""Example of Rasteret workflow with xarray output."""
13+
# 1. Setup workspace and parameters
1814
workspace_dir = Path.home() / "rasteret_workspace"
1915
workspace_dir.mkdir(exist_ok=True)
2016

21-
print("1. Defining Area of Interest")
22-
print("--------------------------")
17+
custom_name = "bangalore"
18+
date_range = ("2024-01-01", "2024-03-31")
19+
data_source = DataSources.LANDSAT
2320

2421
# Define area and time of interest
2522
aoi1_polygon = Polygon(
@@ -33,35 +30,33 @@ def main():
3330
# get total bounds of all polygons above for stac search and stac index creation
3431
bbox = aoi1_polygon.union(aoi2_polygon).bounds
3532

36-
print("\n2. Creating and Loading Collection")
37-
print("--------------------------")
38-
39-
# 2. Initialize processor - name generated automatically
40-
processor = Rasteret(
41-
custom_name=custom_name,
42-
data_source=data_source,
43-
output_dir=workspace_dir,
44-
date_range=date_range,
45-
)
4633

47-
# Create index if collection is not present
48-
if processor._collection is None:
34+
# 2. List existing collections
35+
print("1. Available Collections")
36+
print("----------------------")
37+
collections = Rasteret.list_collections(workspace_dir=workspace_dir)
38+
for c in collections:
39+
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")
40+
41+
# 3. Try loading existing collection or create new
42+
try:
43+
processor = Rasteret.load_collection(f"{custom_name}_202401-03_landsat")
44+
except ValueError:
45+
print("\n2. Creating New Collection")
46+
print("-------------------------")
47+
processor = Rasteret(
48+
custom_name=custom_name,
49+
data_source=data_source,
50+
output_dir=workspace_dir,
51+
date_range=date_range
52+
)
4953
processor.create_collection(
5054
bbox=bbox,
5155
date_range=date_range,
5256
cloud_cover_lt=20,
53-
# add platform filter for Landsat 9, 8, 7, 5, 4 if needed,
54-
# else remove it for all platforms
55-
# This is unique to Landsat STAC endpoint
56-
platform={"in": ["LANDSAT_8"]},
57+
platform={"in": ["LANDSAT_8"]}
5758
)
5859

59-
# List existing collections
60-
collections = Rasteret.list_collections(dir=workspace_dir)
61-
print("Available collections:")
62-
for c in collections:
63-
print(f"- {c['name']}: {c['size']} scenes")
64-
6560
print("\n3. Processing Data")
6661
print("----------------")
6762

0 commit comments

Comments
 (0)