Skip to content

Commit a659756

Browse files
committed
Various PR improvements
Signed-off-by: Jason T. Brown <[email protected]>
1 parent 55d2bf8 commit a659756

File tree

10 files changed

+141
-25
lines changed

10 files changed

+141
-25
lines changed

core/src/main/scala/org/locationtech/rasterframes/functions/TileFunctions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ trait TileFunctions {
151151
withTypedAlias(s"rf_make_ones_tile($cols, $rows, $cellTypeName)")(constTile)
152152
}
153153

154-
/** Construct a `proj_raster` structure from individual CRS, Extent, and Tile columns. */
154+
/** Construct a `proj_raster` structure from individual Tile, Extent, and CRS columns. */
155155
def rf_proj_raster(tile: Column, extent: Column, crs: Column): TypedColumn[Any, ProjectedRasterTile] =
156156
CreateProjectedRaster(tile, extent, crs)
157157

core/src/main/scala/org/locationtech/rasterframes/util/DataFrameRenderers.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import geotrellis.raster.render.ColorRamps
2525
import org.apache.spark.sql.Dataset
2626
import org.apache.spark.sql.functions.{base64, concat, concat_ws, length, lit, substring, when}
2727
import org.apache.spark.sql.jts.JTSTypes
28-
import org.apache.spark.sql.types.{StringType, StructField}
28+
import org.apache.spark.sql.types.{StringType, StructField, BinaryType}
2929
import org.locationtech.rasterframes.expressions.DynamicExtractors
3030
import org.locationtech.rasterframes.{rfConfig, rf_render_png, rf_resample}
3131
import org.apache.spark.sql.rf.WithTypeConformity
@@ -48,6 +48,12 @@ trait DataFrameRenderers {
4848
base64(rf_render_png(rf_resample(resolved, 0.5), ColorRamps.Viridis)), // TODO: how to expose?
4949
lit("\"></img>")
5050
)
51+
else if (renderTiles && c.dataType == BinaryType)
52+
when(
53+
substring(resolved, 0, 8) === lit(Array[Byte](137.asInstanceOf[Byte], 80, 78, 71, 13, 10, 26, 10)),
54+
concat(lit("<img src=\"data:image/png;base64,"), base64(resolved), lit("\"></img>"))
55+
)
56+
.otherwise(resolved.cast(StringType))
5157
else {
5258
val isGeom = WithTypeConformity(c.dataType).conformsTo(JTSTypes.GeometryTypeInstance)
5359
val str = resolved.cast(StringType)

docs/src/main/paradox/reference.md

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ Fetches the extent (bounding box or envelope) of a `ProjectedRasterTile` or `Ras
120120

121121
Fetch CRS structure representing the coordinate reference system of a `ProjectedRasterTile` or `RasterSource` type tile columns, or from a column of strings in the form supported by @ref:[`rf_mk_crs`](reference.md#rf-mk-crs).
122122

123+
### rf_proj_raster
124+
125+
ProjectedRasterTile rf_proj_raster(Tile tile, Extent extent, CRS crs)
126+
127+
Construct a `proj_raster` structure from individual Tile, Extent, and CRS columns.
128+
123129
### rf_mk_crs
124130

125131
Struct rf_mk_crs(String crsText)
@@ -628,6 +634,18 @@ Aggregates over the `tile` and returns statistical summaries of cell values: num
628634

629635
Aggregates over all of the rows in DataFrame of `tile` and returns a count of each cell value to create a histogram with values are plotted on the x-axis and counts on the y-axis. Related is the @ref:[`rf_tile_histogram`](reference.md#rf-tile-histogram) function which operates on a single row at a time.
630636

637+
### rf_agg_extent
638+
639+
Extent rf_agg_extent(Extent extent)
640+
641+
Compute the naive aggregate extent over a column. Assumes CRS homogeneity. With mixed CRS in the column, or if you are unsure, use @ref:[`rf_agg_reprojected_extent`](reference.md#rf-agg-reprojected-extent).
642+
643+
644+
### rf_agg_reprojected_extent
645+
646+
Extent rf_agg_reprojected_extent(Extent extent, CRS source_crs, String dest_crs)
647+
648+
Compute the aggregate extent over the `extent` and `source_crs` columns. The `dest_crs` is given as a string. Each row's extent will be reprojected to the `dest_crs` before aggregating.
631649

632650
## Tile Local Aggregate Statistics
633651

@@ -710,21 +728,13 @@ Pretty print the tile values as plain text.
710728

711729
String rf_render_matrix(Tile tile)
712730

713-
Render Tile cell values as numeric values, for debugging purposes.
714-
715-
716-
### rf_rgb_composite
717-
718-
Tile rf_rgb_composite(Tile red, Tile green, Tile blue)
719-
720-
Merges three bands into a single byte-packed RGB composite. It first scales each cell to fit into an unsigned byte, in the range 0-255, and then merges all three channels to fit into a 32-bit unsigned integer. This is useful when you want an RGB tile to render or to process with other color imagery tools.
721-
731+
Render Tile cell values as a string of numeric values, for debugging purposes.
722732

723733
### rf_render_png
724734

725735
Array rf_render_png(Tile red, Tile green, Tile blue)
726736

727-
Runs [`rf_rgb_composite`](reference.md#rf-rgb-composite) on the given tile columns and then encodes the result as a PNG byte array.
737+
Converts three tile columns to a three-channel PNG-encoded image `bytearray`. First evaluates [`rf_rgb_composite`](reference.md#rf-rgb-composite) on the given tile columns, and then encodes the result. For more about rendering these in a Jupyter or IPython environment, see @[Writing Raster Data](raster-write.md#rendering-samples-with-color).
728738

729739
### rf_render_color_ramp_png
730740

@@ -755,16 +765,20 @@ Converts given tile into a PNG image, using a color ramp of the given name to co
755765
* "Greyscale128"
756766
* "Greyscale256"
757767

758-
Further descriptions of these color ramps can be found in the [Geotrellis Documentation](https://geotrellis.readthedocs.io/en/latest/guide/rasters.html#built-in-color-ramps).
768+
Further descriptions of these color ramps can be found in the [Geotrellis Documentation](https://geotrellis.readthedocs.io/en/latest/guide/rasters.html#built-in-color-ramps). For more about rendering these in a Jupyter or IPython environment, see @[Writing Raster Data](raster-write.md#rendering-samples-with-color).
759769

760770
### rf_agg_overview_raster
761771

762772
Tile rf_agg_overview_raster(Tile proj_raster_col, int cols, int rows, Extent aoi)
763773
Tile rf_agg_overview_raster(Tile tile_col, int cols, int rows, Extent aoi, Extent tile_extent_col, CRS tile_crs_col)
764774

765-
Construct an overview raster of size `cols`x`rows` where data in `proj_raster` intersects the `aoi` bound box in web-mercator. Uses bi-linear sampling method.
775+
Construct an overview _tile_ of size `cols` by `rows`. Data is filtered to the specified `aoi` which is given in web mercator. Uses bi-linear sampling method. The `tile_extent_col` and `tile_crs_col` arguments are optional if the first argument has its Extent and CRS embedded.
766776

777+
### rf_rgb_composite
767778

779+
Tile rf_rgb_composite(Tile red, Tile green, Tile blue)
780+
781+
Merges three bands into a single byte-packed RGB composite. It first scales each cell to fit into an unsigned byte, in the range 0-255, and then merges all three channels to fit into a 32-bit unsigned integer. This is useful when you want an RGB tile to render or to process with other color imagery tools.
768782

769783
[RasterFunctions]: org.locationtech.rasterframes.RasterFunctions
770784
[scaladoc]: latest/api/index.html

docs/src/main/paradox/release-notes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
* Added `toDF` extension method to `MultibandGeoTiff`
1212
* Added `rf_agg_extent` and `rf_agg_reprojected_extent` to compute the aggregate extent of a column
1313
* Added `rf_proj_raster` for constructing a `proj_raster` structure from individual CRS, Extent, and Tile columns.
14+
* Added `rf_render_color_ramp_png` to compute PNG byte array for a single tile column, with specified color ramp.
15+
* In `rf_ipython`, improved rendering of dataframe binary contents with PNG preamble.
1416
* Throw an `IllegalArgumentException` when attempting to apply a mask to a `Tile` whose `CellType` has no NoData defined. ([#409](https://github.com/locationtech/rasterframes/issues/384))
1517

1618
### 0.8.4

pyrasterframes/src/main/python/docs/raster-write.pymd

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ We have some convenience methods to quickly visualize tiles (see discussion of t
2121
In an IPython or Jupyter interpreter, a `Tile` object will be displayed as an image with limited metadata.
2222

2323
```python tile_sample
24+
import pyrasterframes.rf_ipython
25+
2426
def scene(band):
2527
b = str(band).zfill(2) # converts int 2 to '02'
2628
return 'https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/' \
@@ -39,18 +41,61 @@ display(tile) # IPython.display function
3941
Within an IPython or Jupyter interpreter, a Spark and Pandas DataFrames containing a column of _tiles_ will be rendered as the samples discussed above. Simply import the `rf_ipython` submodule to enable enhanced HTML rendering of these DataFrame types.
4042

4143
```python to_samples, evaluate=True
42-
import pyrasterframes.rf_ipython
4344

4445
samples = spark_df \
4546
.select(
4647
rf_extent('proj_raster').alias('extent'),
4748
rf_tile('proj_raster').alias('tile'),
4849
)\
49-
.select('extent.*', 'tile') \
50+
.select('tile', 'extent.*') \
5051
.limit(3)
5152
samples
5253
```
5354

55+
## Rendering Samples with Color
56+
57+
By default the IPython visualizations use the Viridis color map for each single channel tile. There are other options for reasoning about how color should be applied in the results.
58+
59+
60+
### Color Composites
61+
62+
Rendering three different bands of imagery together is called a _color composite_. The bands selected are mapped to the red, green, and blue channels of the resulting display. If the bands chosen are red, green, and blue, the composite is called a true-color composite. Otherwise it is a false-color composite.
63+
64+
Using the @ref:[`rf_rgb_composite`](reference.md#rf-rgb-composite) function, we will compute a three band PNG image as a `bytearray`. The resulting `bytearray` will be displayed as an image in either a Spark or pandas DataFrame display if `rf_ipython` has been imported.
65+
66+
```python, color-composite
67+
# Select red, green, and blue, respectively
68+
composite_df = spark.read.raster([[scene(1), scene(4), scene(3)]],
69+
tile_dimensions=(256, 256))
70+
composite_df = composite_df.withColumn('png',
71+
rf_render_png('proj_raster_0', 'proj_raster_1', 'proj_raster_2'))
72+
composite_df.select('png').limit(3)
73+
```
74+
75+
76+
Alternatively the `bytearray` result can be displayed with [`pillow`](https://pillow.readthedocs.io/en/stable/).
77+
78+
```python, single_tile_pil
79+
import io
80+
from PIL.Image import open as PIL_open
81+
png_bytearray = composite_df.first()['png']
82+
pil_image = PIL_open(io.BytesIO(png_bytearray))
83+
pil_image
84+
```
85+
86+
```python, display_pil, echo=False
87+
display(pil_image)
88+
```
89+
90+
### Custom Color Map
91+
92+
You can also apply a different color map to a single-channel Tile using the @ref[`rf_render_color_ramp_png`](reference.md#rf-render-color-ramp-png) function. See the function documentation for information about the available color maps.
93+
94+
```python, color-map
95+
samples.select(rf_render_color_ramp_png('tile', 'Magma'))
96+
```
97+
98+
5499
## GeoTIFFs
55100

56101
GeoTIFF is one of the most common file formats for spatial data, providing flexibility in data encoding, representation, and storage. RasterFrames provides a specialized Spark DataFrame writer for rendering a RasterFrame to a GeoTIFF.
@@ -86,15 +131,28 @@ os.remove(outfile)
86131

87132
## Overview Rasters
88133

89-
In cases where writing and reading to/from a GeoTIFF isn't convenient, RasterFrames provides the `rf_agg_overview_raster` aggregate function, where you can construct a single raster (rendered as a tile) downsampled from all or a subset of the dataframe. This allows you to effectively construct the same operations the GeoTIFF writer performs, but without the file I/O.
134+
In cases where writing and reading to/from a GeoTIFF isn't convenient, RasterFrames provides the @ref:[`rf_agg_overview_raster`](reference.md#rf-agg-overview-raster) aggregate function, where you can construct a single raster (rendered as a tile) downsampled from all or a subset of the DataFrame. This allows you to effectively construct the same operations the GeoTIFF writer performs, but without the file I/O.
90135

91-
Because a Dataframe may contain data with varying CRSs, and the rendered raster needs to have a single CRS, an "Area of Interest" (AOI) is required in a predetermined CRS. In the case of `rf_agg_reprojected_extent`, the AOI needs to be in commonly used ["web mercator"](https://en.wikipedia.org/wiki/Web_Mercator_projection) CRS.
136+
The `rf_agg_overview_raster` function will reproject data to the commonly used ["web mercator"](https://en.wikipedia.org/wiki/Web_Mercator_projection) CRS. You must specify an "Area of Interest" (AOI) in web mercator. You can use @ref:[`rf_agg_reprojected_extent`](reference.md#rf-agg-reprojected-extent) to compute the extent of a DataFrame in any CRS or mix of CRSs.
92137

93138
```python, overview
94139
from pyrasterframes.rf_types import Extent
95-
target = spark_df.withColumn('extent', rf_extent('proj_raster')).withColumn('crs', rf_crs('proj_raster'))
96-
aoi = Extent.from_row(target.select(rf_agg_reprojected_extent('extent', 'crs', 'EPSG:3857')).first()[0])
97-
target.select(rf_agg_overview_raster(rf_tile('proj_raster'), 512, 512, aoi, 'extent', 'crs')).first()[0]
140+
wm_extent = spark_df.agg(
141+
rf_agg_reprojected_extent(rf_extent('proj_raster'), rf_crs('proj_raster'), 'EPSG:3857')
142+
).first()[0]
143+
aoi = Extent.from_row(wm_extent)
144+
print(aoi)
145+
aspect = aoi.width / aoi.height
146+
147+
ov = spark_df.agg(
148+
rf_agg_overview_raster('proj_raster', int(512 * aspect), 512, aoi)
149+
).first()[0]
150+
print("`ov` is of type", type(ov))
151+
ov
152+
```
153+
154+
```python, echo=False
155+
display(ov)
98156
```
99157

100158
## GeoTrellis Layers

pyrasterframes/src/main/python/pyrasterframes/rasterfunctions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def rf_agg_extent(extent_col):
340340

341341
def rf_agg_reprojected_extent(extent_col, src_crs_col, dest_crs):
342342
"""Compute the aggregate extent over a column, first projecting from the row CRS to the destination CRS. """
343-
return Column(RFContext.call('rf_agg_reprojected_extent', _to_java_column(extent_col), _to_java_column(src_crs_col),CRS(dest_crs).__jvm__))
343+
return Column(RFContext.call('rf_agg_reprojected_extent', _to_java_column(extent_col), _to_java_column(src_crs_col), CRS(dest_crs).__jvm__))
344344

345345

346346
def rf_agg_overview_raster(tile_col: Column, cols: int, rows: int, aoi: Extent,
@@ -676,6 +676,12 @@ def rf_tile(proj_raster_col):
676676
return _apply_column_function('rf_tile', proj_raster_col)
677677

678678

679+
def rf_proj_raster(tile, extent, crs):
680+
"""
681+
Construct a `proj_raster` structure from individual CRS, Extent, and Tile columns
682+
"""
683+
return _apply_column_function('rf_proj_raster', tile, extent, crs)
684+
679685
def st_geometry(geom_col):
680686
"""Convert the given extent/bbox to a polygon"""
681687
return _apply_column_function('st_geometry', geom_col)

pyrasterframes/src/main/python/pyrasterframes/rf_ipython.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
import numpy as np
2525

26+
_png_header = bytearray([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])
27+
2628

2729
def plot_tile(tile, normalize=True, lower_percentile=1, upper_percentile=99, axis=None, **imshow_args):
2830
"""
@@ -115,6 +117,19 @@ def tile_to_html(tile, fig_size=None):
115117
return b64_img_html.format(b64_png)
116118

117119

120+
def binary_to_html(blob):
121+
""" When using rf_render_png, the result from the JVM is a byte string with special PNG header
122+
Look for this header and return base64 encoded HTML for Jupyter display
123+
"""
124+
import base64
125+
if blob[:8] == _png_header:
126+
b64_img_html = '<img src="data:image/png;base64,{}" />'
127+
b64_png = base64.b64encode(blob).decode('utf-8').replace('\n', '')
128+
return b64_img_html.format(b64_png)
129+
else:
130+
return blob
131+
132+
118133
def pandas_df_to_html(df):
119134
"""Provide HTML formatting for pandas.DataFrame with rf_types.Tile in the columns. """
120135
import pandas as pd
@@ -129,11 +144,14 @@ def pandas_df_to_html(df):
129144

130145
tile_cols = []
131146
geom_cols = []
147+
bytearray_cols = []
132148
for c in df.columns:
133149
if isinstance(df.iloc[0][c], pyrasterframes.rf_types.Tile): # if the first is a Tile try formatting
134150
tile_cols.append(c)
135151
elif isinstance(df.iloc[0][c], BaseGeometry): # if the first is a Geometry try formatting
136152
geom_cols.append(c)
153+
elif isinstance(df.iloc[0][c], bytearray):
154+
bytearray_cols.append(c)
137155

138156
def _safe_tile_to_html(t):
139157
if isinstance(t, pyrasterframes.rf_types.Tile):
@@ -152,9 +170,16 @@ def _safe_geom_to_html(g):
152170
else:
153171
return g.__repr__()
154172

173+
def _safe_bytearray_to_html(b):
174+
if isinstance(b, bytearray):
175+
return binary_to_html(b)
176+
else:
177+
return b.__repr__()
178+
155179
# dict keyed by column with custom rendering function
156180
formatter = {c: _safe_tile_to_html for c in tile_cols}
157181
formatter.update({c: _safe_geom_to_html for c in geom_cols})
182+
formatter.update({c: _safe_bytearray_to_html for c in bytearray_cols})
158183

159184
# This is needed to avoid our tile being rendered as `<img src="only up to fifty char...`
160185
pd.set_option('display.max_colwidth', -1)

pyrasterframes/src/main/python/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ geopandas>=0.6.2,<0.7
1010
descartes>=1.1.0,<1.2
1111
pytz
1212
matplotlib
13-
rtree
13+
rtree
14+
Pillow

pyrasterframes/src/main/python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def _divided(msg):
4646
divider = ('-' * 50)
4747
return divider + '\n' + msg + '\n' + divider
4848

49-
# Should we move to https://github.com/aaren/notedown? It allows converstion without evaluation...
49+
5050
class PweaveDocs(distutils.cmd.Command):
5151
"""A custom command to run documentation scripts through pweave."""
5252
description = 'Pweave PyRasterFrames documentation scripts'

pyrasterframes/src/main/python/tests/RasterFunctionsTests.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,5 +512,9 @@ def test_rf_agg_overview_raster(self):
512512
# with open('/tmp/test_rf_agg_overview_raster.png', 'wb') as f:
513513
# f.write(png)
514514

515-
515+
def test_rf_proj_raster(self):
516+
df = self.prdf.select(rf_proj_raster(rf_tile('proj_raster'),
517+
rf_extent('proj_raster'),
518+
rf_crs('proj_raster')).alias('roll_your_own'))
519+
'tile_context' in df.schema['roll_your_own'].dataType.fieldNames()
516520

0 commit comments

Comments
 (0)