Skip to content

Commit 3541cd6

Browse files
authored
Merge pull request #286 from s22s/feature/tile-show
Better tile rendering
2 parents 6ac3d47 + eb9da2c commit 3541cd6

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

pyrasterframes/src/main/python/docs/nodata-handling.pymd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## What is NoData?
44

5-
In raster operations, the preservation and correct processing of missing observations is very important. In [most DataFrames and scientific computing](https://www.oreilly.com/learning/handling-missing-data), the idea of missing data is expressed as a `null` or `NaN` value. However, a great deal of raster data is stored for space efficiency, which typically leads to use of integral values with a ["sentinel" value](https://en.wikipedia.org/wiki/Sentinel_value) designated to represent missing observations. This sentinel value varies across data products and is usually called the "NoData" value.
5+
In raster operations, the preservation and correct processing of missing observations is very important. In [most DataFrames and in scientific computing](https://www.oreilly.com/learning/handling-missing-data), the idea of missing data is expressed as a `null` or `NaN` value. However, a great deal of raster data is stored for space efficiency, which typically leads to use of integral values with a ["sentinel" value](https://en.wikipedia.org/wiki/Sentinel_value) designated to represent missing observations. This sentinel value varies across data products and is usually called the "NoData" value.
66

77
RasterFrames provides a variety of functions to inspect and manage NoData within _tiles_.
88

pyrasterframes/src/main/python/docs/numpy-pandas.pymd

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ import pyrasterframes.rf_ipython
4545
from pyspark.sql.functions import lit, col
4646

4747
cat = spark.read.format('aws-pds-modis-catalog').load() \
48-
.filter(
49-
(col('granule_id') == 'h11v04') &
50-
(col('acquisition_date') > lit('2018-02-19')) &
51-
(col('acquisition_date') < lit('2018-02-22'))
52-
)
48+
.filter(
49+
(col('granule_id') == 'h11v04') &
50+
(col('acquisition_date') > lit('2018-02-19')) &
51+
(col('acquisition_date') < lit('2018-02-22'))
52+
)
5353

5454
spark_df = spark.read.raster(catalog=cat, catalog_col_names=['B01']) \
5555
.select(
@@ -92,7 +92,7 @@ np.abs(diff.cells).max()
9292
We can also inspect an image of the difference between the two _tiles_, which is just random noise. Both _tiles_ have the same structure of NoData, as exhibited by the white areas.
9393

9494
```python udf_diff_noise_tile
95-
display(diff)
95+
diff.show(0, 100)
9696
```
9797

9898
## Creating a Spark DataFrame
@@ -105,12 +105,11 @@ The example below will create a Pandas DataFrame with ten rows of noise _tiles_
105105
import pandas as pd
106106
from shapely.geometry import Point
107107

108-
pandas_df = pd.DataFrame([
109-
{
110-
'tile': Tile(np.random.randn(100, 100)),
111-
'geom': Point(-90 + 90 * np.random.random((2, 1)))
112-
} for _ in range(10)
113-
])
108+
pandas_df = pd.DataFrame([{
109+
'tile': Tile(np.random.randn(100, 100)),
110+
'geom': Point(-90 + 90 * np.random.random((2, 1)))
111+
} for _ in range(10)
112+
])
114113

115114
spark_df = spark.createDataFrame(pandas_df)
116115

pyrasterframes/src/main/python/pyrasterframes/rf_ipython.py

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,58 @@
1919
#
2020

2121
import pyrasterframes.rf_types
22+
import numpy as np
23+
24+
25+
def plot_tile(tile, normalize, lower_percentile=1, upper_percentile=99, axis=None, **imshow_args):
26+
"""
27+
Display an image of the tile
28+
29+
Parameters
30+
----------
31+
normalize: if True, will normalize the data between using
32+
lower_percentile and upper_percentile as bounds
33+
lower_percentile: between 0 and 100 inclusive.
34+
Specifies to clip values below this percentile
35+
upper_percentile: between 0 and 100 inclusive.
36+
Specifies to clip values above this percentile
37+
axis : matplotlib axis object to plot onto. Creates new axis if None
38+
imshow_args : parameters to pass into matplotlib.pyplot.imshow
39+
see https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.imshow.html
40+
Returns
41+
-------
42+
created or modified axis object
43+
"""
44+
45+
if axis is None:
46+
import matplotlib.pyplot as plt
47+
axis = plt.gca()
48+
49+
arr = tile.cells
50+
51+
def normalize_cells(cells):
52+
assert upper_percentile > lower_percentile, 'invalid upper and lower percentiles {}, {}'.format(lower_percentile, upper_percentile)
53+
sans_mask = np.array(cells)
54+
lower = np.nanpercentile(sans_mask, lower_percentile)
55+
upper = np.nanpercentile(sans_mask, upper_percentile)
56+
cells_clipped = np.clip(cells, lower, upper)
57+
return (cells_clipped - lower) / (upper - lower)
2258

59+
axis.set_aspect('equal')
60+
axis.xaxis.set_ticks([])
61+
axis.yaxis.set_ticks([])
62+
63+
if normalize:
64+
cells = normalize_cells(arr)
65+
else:
66+
cells = arr
67+
68+
axis.imshow(cells, **imshow_args)
2369

24-
def tile_to_png(tile, fig_size=None):
70+
return axis
71+
72+
73+
def tile_to_png(tile, lower_percentile=1, upper_percentile=99, title=None, fig_size=None):
2574
""" Provide image of Tile."""
2675
if tile.cells is None:
2776
return None
@@ -31,23 +80,24 @@ def tile_to_png(tile, fig_size=None):
3180
from matplotlib.figure import Figure
3281

3382
# Set up matplotlib objects
34-
nominal_size = 2 # approx full size for a 256x256 tile
83+
nominal_size = 3 # approx full size for a 256x256 tile
3584
if fig_size is None:
3685
fig_size = (nominal_size, nominal_size)
3786

3887
fig = Figure(figsize=fig_size)
3988
canvas = FigureCanvas(fig)
4089
axis = fig.add_subplot(1, 1, 1)
4190

42-
data = tile.cells
43-
44-
axis.imshow(data)
91+
plot_tile(tile, True, lower_percentile, upper_percentile, axis=axis)
4592
axis.set_aspect('equal')
4693
axis.xaxis.set_ticks([])
4794
axis.yaxis.set_ticks([])
4895

49-
axis.set_title('{}, {}'.format(tile.dimensions(), tile.cell_type.__repr__()),
50-
fontsize=fig_size[0]*4) # compact metadata as title
96+
if title is None:
97+
axis.set_title('{}, {}'.format(tile.dimensions(), tile.cell_type.__repr__()),
98+
fontsize=fig_size[0]*4) # compact metadata as title
99+
else:
100+
axis.set_title(title, fontsize=fig_size[0]*4) # compact metadata as title
51101

52102
with io.BytesIO() as output:
53103
canvas.print_png(output)
@@ -58,7 +108,7 @@ def tile_to_html(tile, fig_size=None):
58108
""" Provide HTML string representation of Tile image."""
59109
import base64
60110
b64_img_html = '<img src="data:image/png;base64,{}" />'
61-
png_bits = tile_to_png(tile, fig_size)
111+
png_bits = tile_to_png(tile, fig_size=fig_size)
62112
b64_png = base64.b64encode(png_bits).decode('utf-8').replace('\n', '')
63113
return b64_img_html.format(b64_png)
64114

@@ -102,6 +152,7 @@ def _safe_tile_to_html(t):
102152
pd.set_option('display.max_colwidth', default_max_colwidth)
103153
return return_html
104154

155+
105156
def spark_df_to_markdown(df, num_rows=5, truncate=True, vertical=False):
106157
from pyrasterframes import RFContext
107158
return RFContext.active().call("_dfToMarkdown", df._jdf, num_rows, truncate)
@@ -122,14 +173,14 @@ def spark_df_to_markdown(df, num_rows=5, truncate=True, vertical=False):
122173
markdown_formatter = ip.display_formatter.formatters['text/markdown']
123174
html_formatter.for_type(pyspark.sql.DataFrame, spark_df_to_markdown)
124175

125-
Tile.show = lambda t: display_png(t._repr_png_(), raw=True)
176+
Tile.show = lambda tile, normalize=False, lower_percentile=1, upper_percentile=99, axis=None, **imshow_args: \
177+
plot_tile(tile, normalize, lower_percentile, upper_percentile, axis, **imshow_args)
178+
Tile.show.__doc__ = plot_tile.__doc__
126179

127180
# See if we're in documentation mode and register a custom show implementation.
128181
if 'InProcessInteractiveShell' in ip.__class__.__name__:
129182
pyspark.sql.DataFrame._repr_markdown_ = spark_df_to_markdown
130183
pyspark.sql.DataFrame.show = lambda df, num_rows=5, truncate=True: display_markdown(spark_df_to_markdown(df, num_rows, truncate), raw=True)
131184

132185
except ImportError as e:
133-
print(e)
134-
raise e
135186
pass

0 commit comments

Comments
 (0)