locationtech
diff --git a/‎core/src/main/scala/org/locationtech/rasterframes/util/package.scala‎
Lines changed: 19 additions & 0 deletions b/‎core/src/main/scala/org/locationtech/rasterframes/util/package.scala‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala‎
Lines changed: 10 additions & 0 deletions b/‎core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/aggregation.pymd‎
Lines changed: 14 additions & 13 deletions b/‎pyrasterframes/src/main/python/docs/aggregation.pymd‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/getting-started.pymd‎
Lines changed: 4 additions & 4 deletions b/‎pyrasterframes/src/main/python/docs/getting-started.pymd‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/index.md‎
Lines changed: 2 additions & 0 deletions b/‎pyrasterframes/src/main/python/docs/index.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/languages.pymd‎
Lines changed: 8 additions & 7 deletions b/‎pyrasterframes/src/main/python/docs/languages.pymd‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/nodata-handling.pymd‎
Lines changed: 34 additions & 24 deletions b/‎pyrasterframes/src/main/python/docs/nodata-handling.pymd‎
Lines changed: 34 additions & 24 deletions
diff --git a/‎pyrasterframes/src/main/python/docs/raster-catalogs.pymd‎
Lines changed: 4 additions & 4 deletions b/‎pyrasterframes/src/main/python/docs/raster-catalogs.pymd‎
Lines changed: 4 additions & 4 deletions
@@ -203,6 +203,25 @@ package object util {
         .mkString("| ", " |\n| ", " |")
       header + body
     }
+
+    def toHTML(numRows: Int = 5, truncate: Boolean = false): String = {
+      import df.sqlContext.implicits._
+      val cols = df.columns
+      val header = "<thead>\n" + cols.mkString("<tr><th>", "</th><th>", "</th></tr>\n") + "</thead>\n"
+      val stringifiers = cols
+        .map(c => s"`$c`")
+        .map(c => df.col(c).cast(StringType))
+        .map(c => if (truncate) substring(c, 1, 40) else c)
+      val cat = concat_ws("</td><td>", stringifiers: _*)
+      val body = df
+        .select(cat).limit(numRows)
+        .as[String]
+        .collect()
+        .mkString("<tr><td>", "</td></tr>\n<tr><td>", "</td></tr>\n")
+
+
+      "<table>\n" + header + "<tbody>\n" + body + "</tbody>\n" + "</table>"
+    }
   }
 
   object Shims {
 
@@ -28,6 +28,8 @@ import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata}
 import org.apache.spark.sql.Encoders
 import org.locationtech.rasterframes.util.SubdivideSupport
 
+import scala.xml.parsing.XhtmlParser
+
 /**
  * Tests miscellaneous extension methods.
  *
@@ -114,5 +116,13 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu
       import org.locationtech.rasterframes.util._
       rf.toMarkdown().count(_ == '|') shouldBe >=(3 * 5)
     }
+
+    it("should render HTML") {
+      import org.locationtech.rasterframes.util._
+
+      noException shouldBe thrownBy {
+        XhtmlParser(scala.io.Source.fromString(rf.toHTML()))
+      }
+    }
   }
 }
@@ -33,14 +33,16 @@ print(tiles[1]['tile'].cells)
 
 We use the @ref:[`rf_tile_mean`](reference.md#rf-tile-mean) function to compute the _tile_ aggregate mean of cells in each row of column `tile`. The mean of each _tile_ is computed separately, so the first mean is 1.0 and the second mean is 3.0. Notice that the number of rows in the DataFrame is the same before and after the aggregation.
 
-```python, tile_mean, results='raw'
-rf.select(F.col('id'), rf_tile_mean(F.col('tile'))).show()
+```python, tile_mean
+means = rf.select(F.col('id'), rf_tile_mean(F.col('tile')))
+display(means)
 ```
 
 We use the @ref:[`rf_agg_mean`](reference.md#rf-agg-mean) function to compute the DataFrame aggregate, which averages 25 values of 1.0 and 25 values of 3.0, across the fifty cells in two rows. Note that only a single row is returned since the average is computed over the full DataFrame.
 
-```python, agg_mean, results='raw'
-rf.agg(rf_agg_mean(F.col('tile'))).show()
+```python, agg_mean
+mean = rf.agg(rf_agg_mean(F.col('tile')))
+display(mean)
 ```
 
 We use the @ref:[`rf_agg_local_mean`](reference.md#rf-agg-local-mean) function to compute the element-wise local aggregate mean across the two rows. For this aggregation, we are computing the mean of one value of 1.0 and one value of 3.0 to arrive at the element-wise mean, but doing so twenty-five times, one for each position in the _tile_.
@@ -57,11 +59,10 @@ print(t.cells)
 
 We can also count the total number of data and NoData cells over all the _tiles_ in a DataFrame using @ref:[`rf_agg_data_cells`](reference.md#rf-agg-data-cells) and @ref:[`rf_agg_no_data_cells`](reference.md#rf-agg-no-data-cells). There are ~3.8 million data cells and ~1.9 million NoData cells in this DataFrame. See the section on @ref:["NoData" handling](nodata-handling.md) for additional discussion on handling missing data.
 
-```python, cell_counts, results='raw'
+```python, cell_counts
 rf = spark.read.raster('https://s22s-test-geotiffs.s3.amazonaws.com/MCD43A4.006/11/05/2018233/MCD43A4.A2018233.h11v05.006.2018242035530_B02.TIF')
 stats = rf.agg(rf_agg_data_cells('proj_raster'), rf_agg_no_data_cells('proj_raster'))
-
-stats.show()
+display(stats)
 ```
 
 ## Statistical Summaries
@@ -77,16 +78,16 @@ stats = rf.select(rf_tile_stats('proj_raster').alias('stats'))
 stats.printSchema()
 ```
 
-```python, show_stats, results='raw'
-stats.select('stats.min', 'stats.max', 'stats.mean', 'stats.variance').show(10, truncate=False)
+```python, show_stats
+display(stats.select('stats.min', 'stats.max', 'stats.mean', 'stats.variance'))
 ```
 
 The @ref:[`rf_agg_stats`](reference.md#rf-agg-stats) function aggregates over all of the _tiles_ in a DataFrame and returns a statistical summary of all cell values as shown below.
 
-```python, agg_stats, results='raw'
-rf.agg(rf_agg_stats('proj_raster').alias('stats')) \
-    .select('stats.min', 'stats.max', 'stats.mean', 'stats.variance') \
-    .show()
+```python, agg_stats
+stats = rf.agg(rf_agg_stats('proj_raster').alias('stats')) \
+    .select('stats.min', 'stats.max', 'stats.mean', 'stats.variance')
+display(stats)   
 ```
 
 The @ref:[`rf_agg_local_stats`](reference.md#rf-agg-local-stats) function computes the element-wise local aggregate statistical summary as shown below. The DataFrame used in the previous two code blocks has unequal _tile_ dimensions, so a different DataFrame is used in this code block to avoid a runtime error.
 
@@ -34,17 +34,17 @@ spark = pyrasterframes.get_spark_session()
 
 Then, you can read a raster and work with it in a Spark DataFrame.
 
-```python, local_add, results='raw'
+```python, local_add
 from pyrasterframes.rasterfunctions import *
 from pyspark.sql.functions import lit
 
 # Read a MODIS surface reflectance granule
 df = spark.read.raster('https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF')
 
 # Add 3 element-wise, show some rows of the DataFrame
-df.withColumn('added', rf_local_add(df.proj_raster, lit(3))) \
-  .select(rf_crs('added'), rf_extent('added'), rf_tile('added')) \
-  .show(3)
+sample = df.withColumn('added', rf_local_add(df.proj_raster, lit(3))) \
+  .select(rf_crs('added'), rf_extent('added'), rf_tile('added'))
+display(sample)
 ```
 
 This example is extended in the [getting started Jupyter notebook](https://nbviewer.jupyter.org/github/locationtech/rasterframes/blob/develop/rf-notebook/src/main/notebooks/Getting%20Started.ipynb).
 
@@ -10,6 +10,8 @@ The source code can be found on GitHub at [locationtech/rasterframes](https://gi
 
 <img src="RasterFramePipeline.png" width="600px"/>
 
+RasterFrames is released under the [Apache 2.0 License](https://github.com/locationtech/rasterframes/blob/develop/LICENSE).
+
 <hr/>
 
@@@ div { .md-left}
 
@@ -50,7 +50,7 @@ red_nir_tiles_monthly_2017 = spark.read.raster(
 
 ### Step 4: Compute aggregates
 
-```python, step_4_python, results='raw'
+```python, step_4_python
 result = red_nir_tiles_monthly_2017 \
     .where(st_intersects(
         st_reproject(rf_geometry(col('red')), rf_crs(col('red')).crsProj4, rf_mk_crs('EPSG:4326')),
@@ -60,7 +60,7 @@ result = red_nir_tiles_monthly_2017 \
     .agg(rf_agg_stats(rf_normalized_difference(col('nir'), col('red'))).alias('ndvi_stats')) \
     .orderBy(col('month')) \
     .select('month', 'ndvi_stats.*')
-result.show()
+display(result)
 ```
 
 ## SQL
@@ -80,14 +80,14 @@ sql("CREATE OR REPLACE TEMPORARY VIEW modis USING `aws-pds-modis-catalog`")
 
 ### Step 2: Down-select data by month
 
-```python, step_2_sql, results='raw'
+```python, step_2_sql
 sql("""
 CREATE OR REPLACE TEMPORARY VIEW red_nir_monthly_2017 AS
 SELECT granule_id, month(acquisition_date) as month, B01 as red, B02 as nir
 FROM modis
 WHERE year(acquisition_date) = 2017 AND day(acquisition_date) = 15 AND granule_id = 'h21v09'
 """)
-sql('DESCRIBE red_nir_monthly_2017').show()
+display(sql('DESCRIBE red_nir_monthly_2017'))
 ```
 
 ### Step 3: Read tiles
@@ -106,16 +106,17 @@ OPTIONS (
 
 ### Step 4: Compute aggregates
 
-```python, step_4_sql, results='raw'
-sql("""
+```python, step_4_sql
+grouped = sql("""
 SELECT month, ndvi_stats.* FROM (
     SELECT month, rf_agg_stats(rf_normalized_difference(nir, red)) as ndvi_stats
     FROM red_nir_tiles_monthly_2017
     WHERE st_intersects(st_reproject(rf_geometry(red), rf_crs(red), 'EPSG:4326'), st_makePoint(34.870605, -4.729727))
     GROUP BY month
     ORDER BY month
 )
-""").show()
+""")
+display(grouped)
 ```
 
 ## Scala
 
@@ -38,9 +38,10 @@ CellType.float64()
 
 We can also inspect the cell type of a given _tile_ or `proj_raster` column.
 
-```python, ct_from_sen, results='raw'
-spark.read.raster('https://s22s-test-geotiffs.s3.amazonaws.com/luray_snp/B02.tif') \
-    .select(rf_cell_type('proj_raster')).distinct().show()
+```python, ct_from_sen
+cell_types = spark.read.raster('https://s22s-test-geotiffs.s3.amazonaws.com/luray_snp/B02.tif') \
+    .select(rf_cell_type('proj_raster')).distinct()
+display(cell_types)    
 ```
 
 ### Understanding Cell Types and NoData
@@ -93,13 +94,14 @@ unmasked = spark.read.raster(catalog=cat, catalog_col_names=['blue', 'scl'])
 unmasked.printSchema()
 ```
 
-```python, show_cell_types, results='raw'
-unmasked.select(rf_cell_type('blue'), rf_cell_type('scl')).distinct().show()
+```python, show_cell_types
+cell_types = unmasked.select(rf_cell_type('blue'), rf_cell_type('scl')).distinct()
+display(cell_types)
 ```
 
 Drawing on @ref:[local map algebra](local-algebra.md) techniques, we will create new _tile_ columns that are indicators of unwanted pixels, as defined above. Since the mask column is an integer type, the addition is equivalent to a logical or, so the boolean true values are 1.
 
-```python, def_mask, results='raw'
+```python, def_mask
 from pyspark.sql.functions import lit
 
 mask_part = unmasked.withColumn('nodata', rf_local_equal('scl', lit(0))) \
@@ -113,13 +115,15 @@ one_mask = mask_part.withColumn('mask', rf_local_add('nodata', 'defect')) \
                     .withColumn('mask', rf_local_add('mask', 'cloud9')) \
                     .withColumn('mask', rf_local_add('mask', 'cirrus'))
 
-one_mask.select(rf_cell_type('mask')).distinct().show()
+cell_types = one_mask.select(rf_cell_type('mask')).distinct()
+display(cell_types)
 ```
 
 Because there is not a NoData already defined, we will choose one. In this particular example, the minimum value is greater than zero, so we can use 0 as the NoData value.
 
-```python, pick_nd, results='raw'
-one_mask.agg(rf_agg_stats('blue').min.alias('blue_min')).show()
+```python, pick_nd
+blue_min = one_mask.agg(rf_agg_stats('blue').min.alias('blue_min'))
+display(blue_min)
 ```
 
 We can now construct the cell type string for our blue band's cell type, designating 0 as NoData.
@@ -135,14 +139,15 @@ Now we will use the @ref:[`rf_mask_by_value`](reference.md#rf-mask-by-value) to
 ```python, mask_blu
 with_nd = rf_convert_cell_type('blue', masked_blue_ct)
 masked = one_mask.withColumn('blue_masked',
-                             rf_mask_by_value(with_nd, 'mask', lit(1))) \
-                 .drop('nodata', 'defect', 'cloud8', 'cloud9', 'cirrus', 'blue')
+    rf_mask_by_value(with_nd, 'mask', lit(1))) \
+    .drop('nodata', 'defect', 'cloud8', 'cloud9', 'cirrus', 'blue')
 ```
 
 We can verify that the number of NoData cells in the resulting `blue_masked` column matches the total of the boolean `mask` _tile_ to ensure our logic is correct.
 
-```python, show_masked, results='raw'
-masked.select(rf_no_data_cells('blue_masked'), rf_tile_sum('mask')).show(10)
+```python, show_masked
+counts = masked.select(rf_no_data_cells('blue_masked'), rf_tile_sum('mask'))
+display(counts)
 ```
 
 It's also nice to view a sample. The white regions are areas of NoData.
@@ -247,22 +252,24 @@ RasterFrames supports having _tile_ columns with different cell types in a singl
 
 Let's first create a RasterFrame that has columns of `float` and `int` cell type.
 
-```python, show_1, results='raw'
+```python, show_1
 x = Tile((np.ones((100, 100))*2), CellType.float64())
 y = Tile((np.ones((100, 100))*3), CellType.int32())
 rf = spark.createDataFrame([Row(x=x, y=y)])
 
-rf.select(rf_cell_type('x'), rf_cell_type('y')).distinct().show()
+cell_types = rf.select(rf_cell_type('x'), rf_cell_type('y')).distinct()
+display(cell_types)
 ```
 
 When performing a local operation between _tile_ columns with cell types `int` and  `float`, the resulting _tile_ cell type will be `float`. In local algebra over two _tiles_ of different "sized" cell types, the resulting cell type will be the larger of the two input _tiles'_ cell types.
 
-```python, show_2, results='raw'
-rf.select(
+```python, show_2
+sums = rf.select(
     rf_cell_type('x'),
     rf_cell_type('y'),
     rf_cell_type(rf_local_add('x', 'y')).alias('xy_sum'),
-    ).show(1)
+    )
+display(sums)    
 ```
 
 Combining _tile_ columns of different cell types gets a little trickier when user defined NoData cell types are involved. Let's create two _tile_ columns: one with a NoData value of 1, and one with a NoData value of 2 (using our previously defined `get_nodata_ct` function).
@@ -275,16 +282,18 @@ rf_nd = spark.createDataFrame([Row(x_nd_1=x_nd_1, x_nd_2=x_nd_2)])
 
 Let's try adding the _tile_ columns with different NoData values. When there is an inconsistent NoData value in the two columns, the NoData value of the right-hand side of the sum is kept. In this case, this means the result has a NoData value of 1.
 
-```python, show_3, results='raw'
+```python, show_3
 rf_nd_sum = rf_nd.withColumn('x_nd_sum', rf_local_add('x_nd_2', 'x_nd_1'))
-rf_nd_sum.select(rf_cell_type('x_nd_sum')).distinct().show()
+cell_types = rf_nd_sum.select(rf_cell_type('x_nd_sum')).distinct()
+display(cell_types)
 ```
 
 Reversing the order of the sum changes the NoData value of the resulting column to 2.
 
-```python, show_4, results='raw'
+```python, show_4
 rf_nd_sum = rf_nd.withColumn('x_nd_sum', rf_local_add('x_nd_1', 'x_nd_2'))
-rf_nd_sum.select(rf_cell_type('x_nd_sum')).distinct().show()
+cell_types = rf_nd_sum.select(rf_cell_type('x_nd_sum')).distinct()
+display(cell_types)
 ```
 
 ## NoData Values in Aggregation
@@ -313,6 +322,7 @@ masked_rf = rf.withColumn('tile_nd_1',
 
 The results of `rf_tile_sum` vary on the _tiles_ that were masked. This is because any cells with NoData values are ignored in the aggregation. Note that `tile_nd_2` has the lowest sum, since it has the fewest amount of data cells.
 
-```python, show_5, results='raw'
-masked_rf.select(rf_tile_sum('tile'), rf_tile_sum('tile_nd_1'), rf_tile_sum('tile_nd_2')).show()
+```python, show_5
+sums = masked_rf.select(rf_tile_sum('tile'), rf_tile_sum('tile_nd_1'), rf_tile_sum('tile_nd_2'))
+display(sums)
 ```
@@ -94,7 +94,7 @@ two_d_cat_df.show(truncate=False)
 
 The concept of a _catalog_ is much more powerful when we consider examples beyond constructing the DataFrame, and instead read the data from an external source. Here's an extended example of reading a cloud-hosted CSV file containing MODIS scene metadata and transforming it into a _catalog_. The metadata describing the content of each URL is an important aspect of processing raster data.
 
-```python, remote_csv, results='raw'
+```python, remote_csv
 from pyspark import SparkFiles
 from pyspark.sql import functions as F
 
@@ -104,20 +104,20 @@ scene_list = spark.read \
     .format("csv") \
     .option("header", "true") \
     .load(SparkFiles.get("2018-07-04_scenes.txt"))
-scene_list.show(4, truncate=False)
+display(scene_list)
 ```
 
 Observe the scenes list file has URIs to `index.html` files in the download_url column. The image URI's are in the same directory. The filenames are of the form `${gid}_B${band}.TIF`. The next code chunk builds these URIs, which completes our catalog.
 
-```python, show_remote_catalog, results='raw'
+```python, show_remote_catalog
 modis_catalog = scene_list \
     .withColumn('base_url',
         F.concat(F.regexp_replace('download_url', 'index.html$', ''), 'gid',)
     ) \
     .withColumn('B01' , F.concat('base_url', F.lit("_B01.TIF"))) \
     .withColumn('B02' , F.concat('base_url', F.lit("_B02.TIF"))) \
     .withColumn('B03' , F.concat('base_url', F.lit("_B03.TIF")))
-modis_catalog.show(4, truncate=True)
+display(modis_catalog)
 ```
 
 ## Using Built-in Catalogs
Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,8 @@ import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata}`
`28`	`28`	`import org.apache.spark.sql.Encoders`
`29`	`29`	`import org.locationtech.rasterframes.util.SubdivideSupport`
`30`	`30`
	`31`	`+import scala.xml.parsing.XhtmlParser`
	`32`	`+`
`31`	`33`	`/**`
`32`	`34`	`* Tests miscellaneous extension methods.`
`33`	`35`	`*`
`@@ -114,5 +116,13 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu`
`114`	`116`	`import org.locationtech.rasterframes.util._`
`115`	`117`	`rf.toMarkdown().count(_ == '\|') shouldBe >=(3 * 5)`
`116`	`118`	`}`
	`119`	`+`
	`120`	`+ it("should render HTML") {`
	`121`	`+ import org.locationtech.rasterframes.util._`
	`122`	`+`
	`123`	`+ noException shouldBe thrownBy {`
	`124`	`+ XhtmlParser(scala.io.Source.fromString(rf.toHTML()))`
	`125`	`+ }`
	`126`	`+ }`
`117`	`127`	`}`
`118`	`128`	`}`