diff --git a/notebooks/test_raster_index.ipynb b/notebooks/test_raster_index.ipynb new file mode 100644 index 0000000..9932447 --- /dev/null +++ b/notebooks/test_raster_index.ipynb @@ -0,0 +1,3750 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fcc18135-13cb-4fd8-b04b-c5cc9836ee74", + "metadata": {}, + "source": [ + "# RioXarray raster index examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1b4ca507-2fa4-4809-a66e-28a907eda00e", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import rioxarray as rio\n", + "\n", + "xr.set_options(display_expand_indexes=True);" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "efcd1d28-ed85-46a0-8729-f90bb2b63070", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: rasterix pyproject\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from rasterix import RasterIndex" + ] + }, + { + "cell_type": "markdown", + "id": "9cbecd44-d3a7-4efa-8ac1-9840788bed81", + "metadata": {}, + "source": [ + "## Example with rectilinear and no rotation affine transform\n", + "\n", + "Both x and y coordinates are 1-dimensional." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "01685f92-546b-454a-908f-12e64c1a4631", + "metadata": {}, + "outputs": [], + "source": [ + "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"" + ] + }, + { + "cell_type": "markdown", + "id": "b12bfaff-fe20-43d1-b827-812a6299b87f", + "metadata": {}, + "source": [ + "### Load and inspect the datasets, with and without `RasterIndex`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "628796f4-0cd8-405d-99de-ff7a1de976ef", + "metadata": {}, + "outputs": [], + "source": [ + "da_no_raster_index = xr.open_dataarray(source, engine=\"rasterio\")\n", + "\n", + "\n", + "def set_raster_index(obj):\n", + " \"\"\"Return a new DataArray or Dataset with a RasterIndex.\"\"\"\n", + " x_dim = obj.rio.x_dim\n", + " y_dim = obj.rio.y_dim\n", + "\n", + " index = RasterIndex.from_transform(\n", + " obj.rio.transform(),\n", + " obj.sizes[x_dim],\n", + " obj.sizes[y_dim],\n", + " x_dim=x_dim,\n", + " y_dim=y_dim,\n", + " )\n", + "\n", + " # drop-in replacement of explicit x/y coordinates for now\n", + " coords = xr.Coordinates.from_xindex(index)\n", + " return obj.assign_coords(coords)\n", + "\n", + "\n", + "da_raster_index = set_raster_index(da_no_raster_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "68328dfd-2885-4bd9-8346-44c2f01d5deb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, y: 332, x: 316)> Size: 420kB\n",
+       "[104912 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "  * x            (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n",
+       "  * y            (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n",
+       "    spatial_ref  int64 8B ...\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 420kB\n", + "[104912 values with dtype=float32]\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " * x (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n", + " * y (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n", + " spatial_ref int64 8B ...\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_no_raster_index" + ] + }, + { + "cell_type": "markdown", + "id": "28f1f83c-37e7-478f-b676-9800c7eae768", + "metadata": {}, + "source": [ + "The \"x\" and \"y\" coordinates with a raster index are lazy! The repr below shows a few values for each coordinate (those have been computed on-the-fly) but clicking on the database icon doesn't show any value in the spatial coordinate data reprs." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5f07a272-895f-40ac-8ae3-07a8d3e07521", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, y: 332, x: 316)> Size: 420kB\n",
+       "[104912 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B ...\n",
+       "  * x            (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n",
+       "  * y            (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n",
+       "Indexes:\n",
+       "  ┌ x        RasterIndex\n",
+       "  └ y\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 420kB\n", + "[104912 values with dtype=float32]\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B ...\n", + " * x (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n", + " * y (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n", + "Indexes:\n", + " ┌ x RasterIndex\n", + " └ y\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_raster_index" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9ab08edf-c413-451b-9ddb-b9b894ea139c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "xarray.core.indexing.CoordinateTransformIndexingAdapter" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(da_raster_index.coords.variables[\"x\"]._data)" + ] + }, + { + "cell_type": "markdown", + "id": "81ccec5b-cfbc-4532-846d-725f62a9fb9b", + "metadata": {}, + "source": [ + "### Compare and align the datasets with and without `RasterIndex`\n", + "\n", + "`equals` compares variable values without relying on Xarray coordinate indexes. Both dataarrays should thus be equal." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "95309ec5-b317-4171-85ba-bd01cc9bd649", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_raster_index.equals(da_no_raster_index)" + ] + }, + { + "cell_type": "markdown", + "id": "faa19ff9-404f-4885-90a6-5188c6c2e585", + "metadata": {}, + "source": [ + "Xarray alignment relies on Xarray coordinate indexes. Trying to align both datasets fails here since they each have different index types.\n", + "\n", + "Maybe Xarray should try aligning the datasets based on coordinate variable data in this case? I don't think this would be easy to implement in a general way... Xarray's alignment logic is already complex! Also the alignment failure here is not necessarily a bad thing (cf. alignment issues with explicit floating-point coordinates)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ac2d1532-2441-4a86-8339-a4d5ad78dd01", + "metadata": {}, + "outputs": [], + "source": [ + "# this fails!\n", + "\n", + "# da_raster_index + da_no_raster_index" + ] + }, + { + "cell_type": "markdown", + "id": "909e425e-178b-401a-b754-f198f59dce5e", + "metadata": {}, + "source": [ + "### Indexing the dataarray with `RasterIndex`" + ] + }, + { + "cell_type": "markdown", + "id": "e0bd8ecb-25c1-4ce9-9af8-b354b636d445", + "metadata": {}, + "source": [ + "#### Integer-based selection (isel)" + ] + }, + { + "cell_type": "markdown", + "id": "6832b3b2-c632-492b-ba51-582c010384eb", + "metadata": {}, + "source": [ + "- *Slicing both x and y*" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f2d90e00-72c5-4312-95e8-b9a23fe5d5e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, y: 166, x: 3)> Size: 2kB\n",
+       "array([[[0., 0., 0.],\n",
+       "        [0., 0., 0.],\n",
+       "        ...,\n",
+       "        [0., 0., 0.],\n",
+       "        [0., 0., 0.]]], shape=(1, 166, 3), dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "  * x            (x) float64 24B -3.912e+06 -3.888e+06 -3.862e+06\n",
+       "  * y            (y) float64 1kB 4.338e+06 4.288e+06 ... -3.862e+06 -3.912e+06\n",
+       "Indexes:\n",
+       "  ┌ x        RasterIndex\n",
+       "  └ y\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 2kB\n", + "array([[[0., 0., 0.],\n", + " [0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0.],\n", + " [0., 0., 0.]]], shape=(1, 166, 3), dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " * x (x) float64 24B -3.912e+06 -3.888e+06 -3.862e+06\n", + " * y (y) float64 1kB 4.338e+06 4.288e+06 ... -3.862e+06 -3.912e+06\n", + "Indexes:\n", + " ┌ x RasterIndex\n", + " └ y\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_sliced = da_raster_index.isel(x=slice(1, 4), y=slice(None, None, 2))\n", + "da_sliced" + ] + }, + { + "cell_type": "markdown", + "id": "baf29b0e-3062-426d-920d-d0b293143422", + "metadata": {}, + "source": [ + "Slicing keeps both coordinates lazy (it computes a new affine transform):" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "513ebddc-bc55-4893-8f80-881e1bfa56c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RasterIndex\n", + "'x':\n", + " \n", + "'y':\n", + " \n", + "RasterIndex\n", + "'x':\n", + " \n", + "'y':\n", + " \n" + ] + } + ], + "source": [ + "print(da_sliced.xindexes[\"x\"])\n", + "print(da_sliced.xindexes[\"y\"])" + ] + }, + { + "cell_type": "markdown", + "id": "dbafcfdc-4ffe-4c0c-985d-4f760a81b0bd", + "metadata": {}, + "source": [ + "- *Outer indexing with arbitrary array values*" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "be610609-4eed-4971-8fb3-bf1fc324d128", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, y: 3, x: 3)> Size: 36B\n",
+       "array([[[0., 0., 0.],\n",
+       "        [0., 0., 0.],\n",
+       "        [0., 0., 0.]]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "  * x            (x) float64 24B -3.938e+06 -3.888e+06 -3.838e+06\n",
+       "  * y            (y) float64 24B 4.338e+06 4.338e+06 4.312e+06\n",
+       "Indexes:\n",
+       "  ┌ x        RasterIndex\n",
+       "  └ y\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 36B\n", + "array([[[0., 0., 0.],\n", + " [0., 0., 0.],\n", + " [0., 0., 0.]]], dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " * x (x) float64 24B -3.938e+06 -3.888e+06 -3.838e+06\n", + " * y (y) float64 24B 4.338e+06 4.338e+06 4.312e+06\n", + "Indexes:\n", + " ┌ x RasterIndex\n", + " └ y\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_outer = da_raster_index.isel(x=[0, 2, 4], y=[0, 0, 1])\n", + "da_outer" + ] + }, + { + "cell_type": "markdown", + "id": "ce3502c0-3af5-4470-ba58-b46743ab3d7e", + "metadata": {}, + "source": [ + "We cannot compute a new affine transfrom given arbitrary array positions. To allow further data selection, pandas indexes are created for indexed spatial coordinates:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9695d517-99f8-4708-85a0-0aedc7a0386b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RasterIndex\n", + "'x':\n", + " PandasIndex(Index([-3937500.0, -3887500.0, -3837500.0], dtype='float64', name='x'))\n", + "'y':\n", + " PandasIndex(Index([4337500.0, 4337500.0, 4312500.0], dtype='float64', name='y'))\n", + "RasterIndex\n", + "'x':\n", + " PandasIndex(Index([-3937500.0, -3887500.0, -3837500.0], dtype='float64', name='x'))\n", + "'y':\n", + " PandasIndex(Index([4337500.0, 4337500.0, 4312500.0], dtype='float64', name='y'))\n" + ] + } + ], + "source": [ + "print(da_outer.xindexes[\"x\"])\n", + "print(da_outer.xindexes[\"y\"])" + ] + }, + { + "cell_type": "markdown", + "id": "a1b7a75b-94dc-4fad-b901-a7d2e563c5ff", + "metadata": {}, + "source": [ + "- *Basic indexing with scalars*" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "69a94f72-781d-412d-b02f-8068bef2e0a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1)> Size: 4B\n",
+       "array([0.], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "    x            float64 8B -3.938e+06\n",
+       "    y            float64 8B 4.312e+06\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 4B\n", + "array([0.], dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " x float64 8B -3.938e+06\n", + " y float64 8B 4.312e+06\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_scalar = da_raster_index.isel(x=0, y=1)\n", + "da_scalar" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "55148b6b-6f4c-4d2b-aded-1c7895a65bb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, y: 332)> Size: 1kB\n",
+       "array([[0., 0., 0., ..., 0., 0., 0.]], shape=(1, 332), dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "    x            float64 8B -3.938e+06\n",
+       "    y            (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 1kB\n", + "array([[0., 0., 0., ..., 0., 0., 0.]], shape=(1, 332), dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " x float64 8B -3.938e+06\n", + " y (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_xscalar = da_raster_index.isel(x=0)\n", + "da_xscalar" + ] + }, + { + "cell_type": "markdown", + "id": "859268ae-cb9f-49bb-ab4d-a716af7e7f1f", + "metadata": {}, + "source": [ + "**FIXME** The RasterIndex should be preserved in case of partial dimension reduction." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8ad95332-8643-462d-a395-c34deaa06f28", + "metadata": {}, + "outputs": [], + "source": [ + "# da_xscalar.xindexes[\"y\"] # should return an index" + ] + }, + { + "cell_type": "markdown", + "id": "1402935c-3e70-4ec3-a2e2-8912158a0b11", + "metadata": {}, + "source": [ + "- *Vectorized (fancy) indexing*\n", + "\n", + "Indexing the spatial coordinates with Xarray `Variable` objects returns a `RasterIndex` (wrapping `PandasIndex`) for 1-dimensional variables and no index for scalar or n-dimensional variables." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "8e707c5b-6f7b-4027-be1b-683b10d24179", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, z: 2)> Size: 8B\n",
+       "array([[0., 0.]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "  * x            (z) float64 16B -3.938e+06 -3.912e+06\n",
+       "  * y            (z) float64 16B 4.312e+06 4.312e+06\n",
+       "Dimensions without coordinates: z\n",
+       "Indexes:\n",
+       "  ┌ x        RasterIndex\n",
+       "  └ y\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 8B\n", + "array([[0., 0.]], dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " * x (z) float64 16B -3.938e+06 -3.912e+06\n", + " * y (z) float64 16B 4.312e+06 4.312e+06\n", + "Dimensions without coordinates: z\n", + "Indexes:\n", + " ┌ x RasterIndex\n", + " └ y\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_points = da_raster_index.isel(x=xr.Variable(\"z\", [0, 1]), y=xr.Variable(\"z\", [1, 1]))\n", + "da_points" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "36cafddb-a167-4e7f-97d6-f4b3420f1597", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'band_data' (band: 1, u: 2, v: 2)> Size: 16B\n",
+       "array([[[0., 0.],\n",
+       "        [0., 0.]]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "    spatial_ref  int64 8B 0\n",
+       "    x            (u, v) float64 32B -3.938e+06 -3.912e+06 -3.888e+06 -3.862e+06\n",
+       "    y            (u, v) float64 32B 4.312e+06 4.312e+06 4.288e+06 4.288e+06\n",
+       "Dimensions without coordinates: u, v\n",
+       "Attributes:\n",
+       "    AREA_OR_POINT:  Area
" + ], + "text/plain": [ + " Size: 16B\n", + "array([[[0., 0.],\n", + " [0., 0.]]], dtype=float32)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " spatial_ref int64 8B 0\n", + " x (u, v) float64 32B -3.938e+06 -3.912e+06 -3.888e+06 -3.862e+06\n", + " y (u, v) float64 32B 4.312e+06 4.312e+06 4.288e+06 4.288e+06\n", + "Dimensions without coordinates: u, v\n", + "Attributes:\n", + " AREA_OR_POINT: Area" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "da_points2d = da_raster_index.isel(\n", + " x=xr.Variable((\"u\", \"v\"), [[0, 1], [2, 3]]),\n", + " y=xr.Variable((\"u\", \"v\"), [[1, 1], [2, 2]]),\n", + ")\n", + "da_points2d" + ] + }, + { + "cell_type": "markdown", + "id": "5e9c553c-a056-4d31-9102-51c44ac1cd37", + "metadata": {}, + "source": [ + "#### Label-based selection (sel)" + ] + }, + { + "cell_type": "markdown", + "id": "5ecd7d5a-a20e-4cfd-bbf2-87fac330bd08", + "metadata": {}, + "source": [ + "TODO" + ] + }, + { + "cell_type": "markdown", + "id": "5bdbb1ae-2715-47b7-9743-144fabd335d3", + "metadata": {}, + "source": [ + "## Example with complex affine transformation\n", + "\n", + "x and y coordinates are both 2-dimensional." + ] + }, + { + "cell_type": "markdown", + "id": "fb02ebda-7a09-48b2-b95d-7e78a3fd147c", + "metadata": {}, + "source": [ + "TODO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b468192f-f199-4c0f-a4d3-0aa33e2ad873", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (Pixi)", + "language": "python", + "name": "pixi-kernel-python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rasterix/__init__.py b/rasterix/__init__.py new file mode 100644 index 0000000..bb595b0 --- /dev/null +++ b/rasterix/__init__.py @@ -0,0 +1,4 @@ +from .raster_index import RasterIndex + + +__all__ = ["RasterIndex"] diff --git a/rasterix/raster_index.py b/rasterix/raster_index.py new file mode 100644 index 0000000..cbe9d88 --- /dev/null +++ b/rasterix/raster_index.py @@ -0,0 +1,435 @@ +import textwrap +from collections.abc import Hashable, Mapping +from typing import Any + +import numpy as np +import pandas as pd +from affine import Affine +from xarray import DataArray, Index, Variable +from xarray.core.coordinate_transform import CoordinateTransform + +# TODO: import from public API once it is available +from xarray.core.indexes import CoordinateTransformIndex, PandasIndex +from xarray.core.indexing import IndexSelResult, merge_sel_results + + +class AffineTransform(CoordinateTransform): + """Affine 2D transform wrapper.""" + + affine: Affine + xy_dims: tuple[str, str] + + def __init__( + self, + affine: Affine, + width: int, + height: int, + x_coord_name: Hashable = "xc", + y_coord_name: Hashable = "yc", + x_dim: str = "x", + y_dim: str = "y", + dtype: Any = np.dtype(np.float64), + ): + super().__init__( + (x_coord_name, y_coord_name), {x_dim: width, y_dim: height}, dtype=dtype + ) + self.affine = affine + + # array dimensions in reverse order (y = rows, x = cols) + self.xy_dims = self.dims[0], self.dims[1] + self.dims = self.dims[1], self.dims[0] + + def forward(self, dim_positions): + positions = tuple(dim_positions[dim] for dim in self.xy_dims) + x_labels, y_labels = self.affine * positions + + results = {} + for name, labels in zip(self.coord_names, [x_labels, y_labels]): + results[name] = labels + + return results + + def reverse(self, coord_labels): + labels = tuple(coord_labels[name] for name in self.coord_names) + x_positions, y_positions = ~self.affine * labels + + results = {} + for dim, positions in zip(self.xy_dims, [x_positions, y_positions]): + results[dim] = positions + + return results + + def equals(self, other): + if not isinstance(other, AffineTransform): + return False + return self.affine == other.affine and self.dim_size == other.dim_size + + +class AxisAffineTransform(CoordinateTransform): + """Axis-independent wrapper of an affine 2D transform with no skew/rotation.""" + + affine: Affine + is_xaxis: bool + coord_name: Hashable + dim: str + size: int + + def __init__( + self, + affine: Affine, + size: int, + coord_name: Hashable, + dim: str, + is_xaxis: bool, + dtype: Any = np.dtype(np.float64), + ): + assert affine.is_rectilinear and (affine.b == affine.d == 0) + + super().__init__((coord_name,), {dim: size}, dtype=dtype) + self.affine = affine + self.is_xaxis = is_xaxis + self.coord_name = coord_name + self.dim = dim + self.size = size + + def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: + positions = np.asarray(dim_positions[self.dim]) + + if self.is_xaxis: + labels, _ = self.affine * (positions, np.zeros_like(positions)) + else: + _, labels = self.affine * (np.zeros_like(positions), positions) + + return {self.coord_name: labels} + + def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: + labels = np.asarray(coord_labels[self.coord_name]) + + if self.is_xaxis: + positions, _ = ~self.affine * (labels, np.zeros_like(labels)) + else: + _, positions = ~self.affine * (np.zeros_like(labels), labels) + + return {self.dim: positions} + + def equals(self, other): + if not isinstance(other, AxisAffineTransform): + return False + + # only compare the affine parameters of the relevant axis + if self.is_xaxis: + affine_match = ( + self.affine.a == other.affine.a and self.affine.c == other.affine.c + ) + else: + affine_match = ( + self.affine.e == other.affine.e and self.affine.f == other.affine.f + ) + + return affine_match and self.size == other.size + + def generate_coords( + self, dims: tuple[str, ...] | None = None + ) -> dict[Hashable, Any]: + assert dims is None or dims == self.dims + return self.forward({self.dim: np.arange(self.size)}) + + def slice(self, slice: slice) -> "AxisAffineTransform": + start = max(slice.start or 0, 0) + stop = min(slice.stop or self.size, self.size) + step = slice.step or 1 + + # TODO: support reverse transform (i.e., start > stop)? + assert start < stop + + size = (stop - start) // step + scale = float(step) + + if self.is_xaxis: + affine = ( + self.affine * Affine.translation(start, 0.0) * Affine.scale(scale, 1.0) + ) + else: + affine = ( + self.affine * Affine.translation(0.0, start) * Affine.scale(1.0, scale) + ) + + return type(self)( + affine, + size, + self.coord_name, + self.dim, + is_xaxis=self.is_xaxis, + dtype=self.dtype, + ) + + +class AxisAffineTransformIndex(CoordinateTransformIndex): + """Axis-independent Xarray Index for an affine 2D transform with no + skew/rotation. + + For internal use only. + + This Index class provides specific behavior on top of + Xarray's `CoordinateTransformIndex`: + + - Data slicing computes a new affine transform and returns a new + `AxisAffineTransformIndex` object + + - Otherwise data selection creates and returns a new Xarray + `PandasIndex` object for non-scalar indexers + + - The index can be converted to a `pandas.Index` object (useful for Xarray + operations that don't work with Xarray indexes yet). + + """ + + axis_transform: AxisAffineTransform + dim: str + + def __init__(self, transform: AxisAffineTransform): + assert isinstance(transform, AxisAffineTransform) + super().__init__(transform) + self.axis_transform = transform + self.dim = transform.dim + + def isel( # type: ignore[override] + self, indexers: Mapping[Any, int | slice | np.ndarray | Variable] + ) -> "AxisAffineTransformIndex | PandasIndex | None": + idxer = indexers[self.dim] + + # generate a new index with updated transform if a slice is given + if isinstance(idxer, slice): + return AxisAffineTransformIndex(self.axis_transform.slice(idxer)) + # no index for vectorized (fancy) indexing with n-dimensional Variable + elif isinstance(idxer, Variable) and idxer.ndim > 1: + return None + # no index for scalar value + elif np.ndim(idxer) == 0: + return None + # otherwise return a PandasIndex with values computed by forward transformation + else: + values = self.axis_transform.forward({self.dim: idxer})[ + self.axis_transform.coord_name + ] + if isinstance(idxer, Variable): + new_dim = idxer.dims[0] + else: + new_dim = self.dim + return PandasIndex(values, new_dim, coord_dtype=values.dtype) + + def sel(self, labels, method=None, tolerance=None): + coord_name = self.axis_transform.coord_name + label = labels[coord_name] + + if isinstance(label, slice): + if label.step is None: + # continuous interval slice indexing (preserves the index) + pos = self.transform.reverse( + {coord_name: np.array([label.start, label.stop])} + ) + pos = np.round(pos[self.dim]).astype("int") + new_start = max(pos[0], 0) + new_stop = min(pos[1], self.axis_transform.size) + return IndexSelResult({self.dim: slice(new_start, new_stop)}) + else: + # otherwise convert to basic (array) indexing + label = np.arange(label.start, label.stop, label.step) + + # support basic indexing (in the 1D case basic vs. vectorized indexing + # are pretty much similar) + unwrap_xr = False + if not isinstance(label, Variable | DataArray): + # basic indexing -> either scalar or 1-d array + try: + var = Variable("_", label) + except ValueError: + var = Variable((), label) + labels = {self.dim: var} + unwrap_xr = True + + result = super().sel(labels, method=method, tolerance=tolerance) + + if unwrap_xr: + dim_indexers = {self.dim: result.dim_indexers[self.dim].values} + result = IndexSelResult(dim_indexers) + + return result + + def to_pandas_index(self) -> pd.Index: + values = self.transform.generate_coords() + return pd.Index(values[self.dim]) + + +# The types of Xarray indexes that may be wrapped by RasterIndex +WrappedIndex = AxisAffineTransformIndex | PandasIndex | CoordinateTransformIndex +WrappedIndexCoords = Hashable | tuple[Hashable, Hashable] + + +def _filter_dim_indexers(index: WrappedIndex, indexers: Mapping) -> Mapping: + if isinstance(index, CoordinateTransformIndex): + dims = index.transform.dims + else: + # PandasIndex + dims = (str(index.dim),) + + return {dim: indexers[dim] for dim in dims if dim in indexers} + + +class RasterIndex(Index): + """Xarray index for raster coordinates. + + RasterIndex is itself a wrapper around one or more Xarray indexes associated + with either the raster x or y axis coordinate or both, depending on the + affine transformation and prior data selection (if any): + + - The affine transformation is not rectilinear or has rotation: this index + encapsulates a single `CoordinateTransformIndex` object for both the x and + y axis (2-dimensional) coordinates. + + - The affine transformation is rectilinear ands has no rotation: this index + encapsulates one or two index objects for either the x or y axis or both + (1-dimensional) coordinates. The index type is either a subclass of + `CoordinateTransformIndex` that supports slicing or `PandasIndex` (e.g., + after data selection at arbitrary locations). + + """ + + _wrapped_indexes: dict[WrappedIndexCoords, WrappedIndex] + + def __init__(self, indexes: Mapping[WrappedIndexCoords, WrappedIndex]): + idx_keys = list(indexes) + idx_vals = list(indexes.values()) + + # either one or the other configuration (dependent vs. independent x/y axes) + axis_dependent = ( + len(indexes) == 1 + and isinstance(idx_keys[0], tuple) + and isinstance(idx_vals[0], CoordinateTransformIndex) + ) + axis_independent = len(indexes) in (1, 2) and all( + isinstance(idx, AxisAffineTransformIndex | PandasIndex) for idx in idx_vals + ) + assert axis_dependent ^ axis_independent + + self._wrapped_indexes = dict(indexes) + + @classmethod + def from_transform( + cls, affine: Affine, width: int, height: int, x_dim: str = "x", y_dim: str = "y" + ) -> "RasterIndex": + indexes: dict[ + WrappedIndexCoords, AxisAffineTransformIndex | CoordinateTransformIndex + ] + + # pixel centered coordinates + affine = affine * Affine.translation(0.5, 0.5) + + if affine.is_rectilinear and affine.b == affine.d == 0: + x_transform = AxisAffineTransform(affine, width, "x", x_dim, is_xaxis=True) + y_transform = AxisAffineTransform( + affine, height, "y", y_dim, is_xaxis=False + ) + indexes = { + "x": AxisAffineTransformIndex(x_transform), + "y": AxisAffineTransformIndex(y_transform), + } + else: + xy_transform = AffineTransform( + affine, width, height, x_dim=x_dim, y_dim=y_dim + ) + indexes = {("x", "y"): CoordinateTransformIndex(xy_transform)} + + return cls(indexes) + + @classmethod + def from_variables( + cls, + variables: Mapping[Any, Variable], + *, + options: Mapping[str, Any], + ) -> "RasterIndex": + # TODO: compute bounds, resolution and affine transform from explicit coordinates. + raise NotImplementedError( + "Creating a RasterIndex from existing coordinates is not yet supported." + ) + + def create_variables( + self, variables: Mapping[Any, Variable] | None = None + ) -> dict[Hashable, Variable]: + new_variables: dict[Hashable, Variable] = {} + + for index in self._wrapped_indexes.values(): + new_variables.update(index.create_variables()) + + return new_variables + + def isel( + self, indexers: Mapping[Any, int | slice | np.ndarray | Variable] + ) -> "RasterIndex | None": + new_indexes: dict[WrappedIndexCoords, WrappedIndex] = {} + + for coord_names, index in self._wrapped_indexes.items(): + index_indexers = _filter_dim_indexers(index, indexers) + if not index_indexers: + # no selection to perform: simply propagate the index + # TODO: uncomment when https://github.com/pydata/xarray/issues/10063 is fixed + # new_indexes[coord_names] = index + ... + else: + new_index = index.isel(index_indexers) + if new_index is not None: + new_indexes[coord_names] = new_index + + if new_indexes: + # TODO: if there's only a single PandasIndex can we just return it? + # (maybe better to keep it wrapped if we plan to later make RasterIndex CRS-aware) + return RasterIndex(new_indexes) + else: + return None + + def sel( + self, labels: dict[Any, Any], method=None, tolerance=None + ) -> IndexSelResult: + results = [] + + for coord_names, index in self._wrapped_indexes.items(): + if not isinstance(coord_names, tuple): + coord_names = (coord_names,) + index_labels = {k: v for k, v in labels if k in coord_names} + if index_labels: + results.append( + index.sel(index_labels, method=method, tolerance=tolerance) + ) + + return merge_sel_results(results) + + def equals(self, other: Index) -> bool: + if not isinstance(other, RasterIndex): + return False + if set(self._wrapped_indexes) != set(other._wrapped_indexes): + return False + + return all( + index.equals(other._wrapped_indexes[k]) # type: ignore[arg-type] + for k, index in self._wrapped_indexes.items() + ) + + def to_pandas_index(self) -> pd.Index: + # conversion is possible only if this raster index encapsulates + # exactly one AxisAffineTransformIndex or a PandasIndex associated + # to either the x or y axis (1-dimensional) coordinate. + if len(self._wrapped_indexes) == 1: + index = next(iter(self._wrapped_indexes.values())) + if isinstance(index, AxisAffineTransformIndex | PandasIndex): + return index.to_pandas_index() + + raise ValueError("Cannot convert RasterIndex to pandas.Index") + + def __repr__(self): + items: list[str] = [] + + for coord_names, index in self._wrapped_indexes.items(): + items += [repr(coord_names) + ":", textwrap.indent(repr(index), " ")] + + return "RasterIndex\n" + "\n".join(items)