Skip to content

Commit 429b8b1

Browse files
committed
Undo unrelated changes
1 parent 1f669f5 commit 429b8b1

File tree

5 files changed

+71
-272
lines changed

5 files changed

+71
-272
lines changed

.vscode/settings.json

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,11 @@
22
"yaml.schemas": {
33
"https://squidfunk.github.io/mkdocs-material/schema.json": "mkdocs.yml"
44
},
5-
"yaml.customTags": [
5+
"yaml.customTags": [
66
"!ENV scalar",
77
"!ENV sequence",
88
"tag:yaml.org,2002:python/name:material.extensions.emoji.to_svg",
99
"tag:yaml.org,2002:python/name:material.extensions.emoji.twemoji",
1010
"tag:yaml.org,2002:python/name:pymdownx.superfences.fence_code_format"
11-
],
12-
"json.schemas": [
13-
{
14-
"url": "./demo/keto/.schemas/relation_tuple_file.schema.json",
15-
"fileMatch": [
16-
"demo/keto/relationships.json"
17-
]
18-
}
1911
]
2012
}

demo/cmip6/generate_cmip6_items.ipynb

Lines changed: 46 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -9,52 +9,20 @@
99
"\n",
1010
"This notebook walks through generating STAC items from [NEX GDDP CMIP6 COGs on AWS](https://aws.amazon.com/marketplace/pp/prodview-k6adk576fiwmm#resources).\n",
1111
"\n",
12-
"As-is it uses daily data from the `GISS-E2-1-G` model, the `tas` variable and loads data from 1950 and 1951. The bucket has other data available. It includes monthly aggregates, other models, other variables and more years. The scripts below can easily be modified to STAC-ify other data in the nex-gddp-cmip6-cog bucket.\n"
12+
"As-is it uses daily data from the `GISS-E2-1-G` model, the `tas` variable and loads data from 1950 and 1951. The bucket has other data available. It includes monthly aggregates, other models, other variables and more years. The scripts below can easily be modified to STAC-ify other data in the nex-gddp-cmip6-cog bucket."
1313
]
1414
},
1515
{
1616
"cell_type": "code",
1717
"execution_count": 1,
1818
"id": "6f788363",
1919
"metadata": {},
20-
"outputs": [
21-
{
22-
"name": "stdout",
23-
"output_type": "stream",
24-
"text": [
25-
"Requirement already satisfied: boto3 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (1.34.55)\n",
26-
"Requirement already satisfied: fsspec in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (2024.2.0)\n",
27-
"Requirement already satisfied: pystac in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (1.9.0)\n",
28-
"Requirement already satisfied: rio-stac in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (0.9.0)\n",
29-
"Requirement already satisfied: s3fs in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (0.4.2)\n",
30-
"Requirement already satisfied: botocore<1.35.0,>=1.34.55 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from boto3) (1.34.55)\n",
31-
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from boto3) (1.0.1)\n",
32-
"Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from boto3) (0.10.0)\n",
33-
"Requirement already satisfied: python-dateutil>=2.7.0 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from pystac) (2.8.2)\n",
34-
"Requirement already satisfied: rasterio in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rio-stac) (1.3.9)\n",
35-
"Requirement already satisfied: urllib3<2.1,>=1.25.4 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from botocore<1.35.0,>=1.34.55->boto3) (2.0.7)\n",
36-
"Requirement already satisfied: six>=1.5 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from python-dateutil>=2.7.0->pystac) (1.16.0)\n",
37-
"Requirement already satisfied: affine in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (2.4.0)\n",
38-
"Requirement already satisfied: attrs in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (23.2.0)\n",
39-
"Requirement already satisfied: certifi in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (2023.11.17)\n",
40-
"Requirement already satisfied: click>=4.0 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (8.1.7)\n",
41-
"Requirement already satisfied: cligj>=0.5 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (0.7.2)\n",
42-
"Requirement already satisfied: numpy in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (1.26.4)\n",
43-
"Requirement already satisfied: snuggs>=1.4.1 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (1.4.7)\n",
44-
"Requirement already satisfied: click-plugins in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (1.1.1)\n",
45-
"Requirement already satisfied: setuptools in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from rasterio->rio-stac) (67.6.1)\n",
46-
"Requirement already satisfied: pyparsing>=2.1.6 in /Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages (from snuggs>=1.4.1->rasterio->rio-stac) (3.1.1)\n",
47-
"\n",
48-
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
49-
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
50-
]
51-
}
52-
],
20+
"outputs": [],
5321
"source": [
54-
"!pip install boto3 fsspec pystac rio-stac s3fs\n",
22+
"import boto3\n",
5523
"import fsspec\n",
5624
"import json\n",
57-
"from pystac import Collection, Asset\n",
25+
"from pystac import Catalog, Collection, Item, Asset, MediaType\n",
5826
"from datetime import datetime\n",
5927
"import rio_stac\n",
6028
"from pprint import pprint\n",
@@ -83,7 +51,7 @@
8351
"id": "762e1e50-46e6-4dab-8462-38d31060e202",
8452
"metadata": {},
8553
"source": [
86-
"## Discover the COG files on S3 using fsspec and `.glob`\n"
54+
"## Discover the COG files on S3 using fsspec and `.glob`"
8755
]
8856
},
8957
{
@@ -102,21 +70,7 @@
10270
"execution_count": 4,
10371
"id": "a7caab29",
10472
"metadata": {},
105-
"outputs": [
106-
{
107-
"name": "stderr",
108-
"output_type": "stream",
109-
"text": [
110-
"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/fsspec/registry.py:273: UserWarning: Your installed version of s3fs is very old and known to cause\n",
111-
"severe performance issues, see also https://github.com/dask/dask/issues/10276\n",
112-
"\n",
113-
"To fix, you should specify a lower version bound on s3fs, or\n",
114-
"update the current installation.\n",
115-
"\n",
116-
" warnings.warn(s3_msg)\n"
117-
]
118-
}
119-
],
73+
"outputs": [],
12074
"source": [
12175
"fs_read = fsspec.filesystem(\"s3\", anon=anon)"
12276
]
@@ -147,7 +101,7 @@
147101
"source": [
148102
"## Subset the data so we don't process all historical data\n",
149103
"\n",
150-
"But you can if you want!\n"
104+
"But you can if you want!"
151105
]
152106
},
153107
{
@@ -158,17 +112,15 @@
158112
"outputs": [],
159113
"source": [
160114
"# Here we prepend the prefix 's3://', which points to AWS.\n",
161-
"subset_files = sorted(\n",
162-
" [\"s3://\" + f for f in file_paths if \"_1950_\" in f or \"_1951_\" in f]\n",
163-
")"
115+
"subset_files = sorted([\"s3://\" + f for f in file_paths if \"_1950_\" in f or \"_1951_\" in f])"
164116
]
165117
},
166118
{
167119
"cell_type": "markdown",
168120
"id": "3bae56f1-1ea6-4755-84b3-149666e84d3d",
169121
"metadata": {},
170122
"source": [
171-
"## Double check we discovered some files\n"
123+
"## Double check we discovered some files"
172124
]
173125
},
174126
{
@@ -189,9 +141,7 @@
189141
"if len(subset_files) == 0:\n",
190142
" raise Exception(f\"No files to process. Do COGs for the {model} model exist?\")\n",
191143
"else:\n",
192-
" print(\n",
193-
" f\"Subseted data to files for 1950 and 1951. {len(subset_files)} files to process.\"\n",
194-
" )"
144+
" print(f\"Subseted data to files for 1950 and 1951. {len(subset_files)} files to process.\")"
195145
]
196146
},
197147
{
@@ -201,7 +151,7 @@
201151
"source": [
202152
"## Setup the collection and items\n",
203153
"\n",
204-
"The collection is statically defined in a json file, but can be modified as desired. Then, iterate throug all the files in S3 and create STAC Item JSON using `rio_stac`. Write all the JSON to an `ndjson` file for inserting.\n"
154+
"The collection is statically defined in a json file, but can be modified as desired. Then, iterate throug all the files in S3 and create STAC Item JSON using `rio_stac`. Write all the JSON to an `ndjson` file for inserting."
205155
]
206156
},
207157
{
@@ -213,7 +163,7 @@
213163
"source": [
214164
"file_prefix = f\"CMIP6_daily_{model}_{variable}\"\n",
215165
"stac_items_file = f\"{file_prefix}_stac_items.ndjson\"\n",
216-
"collection_json = json.loads(open(f\"{file_prefix}_collection.json\").read())\n",
166+
"collection_json = json.loads(open(f'{file_prefix}_collection.json').read())\n",
217167
"collection = Collection.from_dict(collection_json)"
218168
]
219169
},
@@ -227,7 +177,7 @@
227177
"outputs": [],
228178
"source": [
229179
"# clear the ndjson items file\n",
230-
"with open(stac_items_file, \"w\") as file:\n",
180+
"with open(stac_items_file, 'w') as file:\n",
231181
" pass"
232182
]
233183
},
@@ -240,44 +190,43 @@
240190
"source": [
241191
"def process_item(s3_file, file, lock):\n",
242192
" print(f\"Processing {s3_file}\")\n",
243-
" filename = s3_file.split(\"/\")[-1]\n",
244-
" year, month, day = filename.split(\"_\")[-3:]\n",
245-
" day = day.replace(\".tif\", \"\")\n",
246-
" datetime_ = datetime.strptime(f\"{year}{month}{day}\", \"%Y%m%d\")\n",
193+
" filename = s3_file.split('/')[-1]\n",
194+
" year, month, day = filename.split('_')[-3:]\n",
195+
" day = day.replace('.tif', '')\n",
196+
" datetime_ = datetime.strptime(f'{year}{month}{day}', '%Y%m%d') \n",
247197
" # Create a new Item\n",
248198
" item = rio_stac.create_stac_item(\n",
249-
" id=filename,\n",
250-
" source=s3_file,\n",
251-
" collection=collection.id,\n",
252-
" input_datetime=datetime_,\n",
253-
" with_proj=True,\n",
254-
" with_raster=True,\n",
255-
" asset_name=\"data\",\n",
256-
" asset_roles=[\"data\"],\n",
257-
" asset_media_type=\"image/tiff; application=geotiff; profile=cloud-optimized\",\n",
258-
" assets={\n",
259-
" \"tiling\": Asset(\n",
260-
" href=s3_file,\n",
261-
" roles=[\"virtual\", \"tiling\"],\n",
262-
" title=\"tiling\",\n",
263-
" description=\"Virtual asset for tiling\",\n",
264-
" extra_fields={\n",
265-
" \"compose:rescale\": [210, 330],\n",
266-
" \"compose:colormap_name\": \"hot\",\n",
267-
" },\n",
268-
" )\n",
269-
" },\n",
199+
" id=filename,\n",
200+
" source=s3_file,\n",
201+
" collection=collection.id,\n",
202+
" input_datetime=datetime_,\n",
203+
" with_proj=True,\n",
204+
" with_raster=True,\n",
205+
" asset_name=\"data\",\n",
206+
" asset_roles=[\"data\"],\n",
207+
" asset_media_type=\"image/tiff; application=geotiff; profile=cloud-optimized\"\n",
208+
" )\n",
209+
" tiling_asset = Asset(\n",
210+
" href=s3_file,\n",
211+
" roles=['virtual', 'tiling'],\n",
212+
" title='tiling',\n",
213+
" description='Virtual asset for tiling',\n",
214+
" extra_fields={\n",
215+
" 'compose:rescale': [210, 330],\n",
216+
" 'compose:colormap_name': 'hot'\n",
217+
" }\n",
270218
" )\n",
219+
" item.assets['tiling'] = tiling_asset\n",
271220
" with lock:\n",
272-
" file.write(json.dumps(item.to_dict()) + \"\\n\")"
221+
" file.write(json.dumps(item.to_dict()) + '\\n')"
273222
]
274223
},
275224
{
276225
"cell_type": "markdown",
277226
"id": "f38a5953-a195-4106-b172-26ba2bce9533",
278227
"metadata": {},
279228
"source": [
280-
"NOTE: This can take awhile if processing all 730 file which is why it is subset to only 2 files below, for demonstration purposes.\n"
229+
"NOTE: This can take awhile if processing all 730 file which is why it is subset to only 2 files below, for demonstration purposes."
281230
]
282231
},
283232
{
@@ -290,18 +239,16 @@
290239
"name": "stdout",
291240
"output_type": "stream",
292241
"text": [
293-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_01.tifProcessing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n",
294-
"\n"
242+
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_01.tif\n",
243+
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n"
295244
]
296245
}
297246
],
298247
"source": [
299248
"lock = threading.Lock()\n",
300-
"file = open(stac_items_file, \"a\")\n",
249+
"file = open(stac_items_file, 'a')\n",
301250
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
302-
" futures = [\n",
303-
" executor.submit(process_item, obj, file, lock) for obj in subset_files[0:2]\n",
304-
" ]\n",
251+
" futures = [executor.submit(process_item, obj, file, lock) for obj in subset_files[0:2]]\n",
305252
" [future.result() for future in concurrent.futures.as_completed(futures)]\n",
306253
"file.close()"
307254
]
@@ -311,7 +258,7 @@
311258
"id": "8e062949-16c9-4a79-b2ee-1579f244d74f",
312259
"metadata": {},
313260
"source": [
314-
"# Final step - seed the database\n"
261+
"# Final step - seed the database"
315262
]
316263
},
317264
{
@@ -326,63 +273,7 @@
326273
"text": [
327274
"postgresql://postgres:password@localhost:5432/postgres\n",
328275
"Inserting collection from CMIP6_daily_GISS-E2-1-G_tas_collection.json\n",
329-
"error connecting in 'pool-1': connection failed: FATAL: role \"postgres\" does not exist\n",
330-
"error connecting in 'pool-1': connection failed: FATAL: role \"postgres\" does not exist\n",
331-
"error connecting in 'pool-1': connection failed: FATAL: role \"postgres\" does not exist\n",
332-
"error connecting in 'pool-1': connection failed: FATAL: role \"postgres\" does not exist\n",
333-
"^C\n",
334-
"Traceback (most recent call last):\n",
335-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/bin/pypgstac\", line 8, in <module>\n",
336-
" sys.exit(cli())\n",
337-
" ^^^^^\n",
338-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/pypgstac.py\", line 125, in cli\n",
339-
" fire.Fire(PgstacCLI)\n",
340-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/fire/core.py\", line 141, in Fire\n",
341-
" component_trace = _Fire(component, args, parsed_flag_args, context, name)\n",
342-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
343-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/fire/core.py\", line 466, in _Fire\n",
344-
" component, remaining_args = _CallAndUpdateTrace(\n",
345-
" ^^^^^^^^^^^^^^^^^^^^\n",
346-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/fire/core.py\", line 681, in _CallAndUpdateTrace\n",
347-
" component = fn(*varargs, **kwargs)\n",
348-
" ^^^^^^^^^^^^^^^^^^^^^^\n",
349-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/pypgstac.py\", line 74, in load\n",
350-
" loader.load_collections(file, method)\n",
351-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/load.py\", line 200, in load_collections\n",
352-
" self.check_version()\n",
353-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/load.py\", line 158, in check_version\n",
354-
" db_version = self.db.version\n",
355-
" ^^^^^^^^^^^^^^^\n",
356-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/db.py\", line 251, in version\n",
357-
" version = self.query_one(\n",
358-
" ^^^^^^^^^^^^^^^\n",
359-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/db.py\", line 228, in query_one\n",
360-
" r = next(self.query(*args, **kwargs))\n",
361-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
362-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/db.py\", line 200, in query\n",
363-
" conn = self.connect()\n",
364-
" ^^^^^^^^^^^^^^\n",
365-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/pypgstac/db.py\", line 108, in connect\n",
366-
" self.connection = pool.getconn()\n",
367-
" ^^^^^^^^^^^^^^\n",
368-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/psycopg_pool/pool.py\", line 200, in getconn\n",
369-
" return self._getconn_with_check_loop(deadline)\n",
370-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
371-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/psycopg_pool/pool.py\", line 212, in _getconn_with_check_loop\n",
372-
" conn = self._getconn_unchecked(deadline - monotonic())\n",
373-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
374-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/psycopg_pool/pool.py\", line 253, in _getconn_unchecked\n",
375-
" conn = pos.wait(timeout=timeout)\n",
376-
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
377-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/psycopg_pool/pool.py\", line 844, in wait\n",
378-
" raise self.error\n",
379-
" File \"/Users/alukach/Projects/devseed/eoapi/jupyterhub-auth/.venv/lib/python3.11/site-packages/psycopg_pool/pool.py\", line 833, in wait\n",
380-
" if not self._cond.wait(timeout):\n",
381-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
382-
" File \"/opt/homebrew/Cellar/[email protected]/3.11.4/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py\", line 324, in wait\n",
383-
" gotit = waiter.acquire(True, timeout)\n",
384-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
385-
"KeyboardInterrupt\n"
276+
"Inserting items from CMIP6_daily_GISS-E2-1-G_tas_stac_items.ndjson\n"
386277
]
387278
}
388279
],
@@ -412,7 +303,7 @@
412303
"name": "python",
413304
"nbconvert_exporter": "python",
414305
"pygments_lexer": "ipython3",
415-
"version": "3.11.4"
306+
"version": "3.10.12"
416307
}
417308
},
418309
"nbformat": 4,

0 commit comments

Comments
 (0)