Skip to content

Commit 92d9a1a

Browse files
committed
GeoTIFF writing post-walkthrough comments.
1 parent 935312a commit 92d9a1a

File tree

6 files changed

+46
-35
lines changed

6 files changed

+46
-35
lines changed

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,8 @@ object ProjectedLayerMetadataAggregate {
9494
// Ordering must match InputRecord schema
9595
new ProjectedLayerMetadataAggregate(destCRS, destDims)(extent, crs, cellType, tileSize).as[TileLayerMetadata[SpatialKey]]
9696

97-
9897
private[expressions]
99-
case class InputRecord(extent: Extent, crs: CRS, cellType: CellType, tileSize: TileDimensions) { self
98+
case class InputRecord(extent: Extent, crs: CRS, cellType: CellType, tileSize: TileDimensions) {
10099
def toBufferRecord(destCRS: CRS): BufferRecord = {
101100
val transform = Transform(crs, destCRS)
102101

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class TileRasterizerAggregate(prd: ProjectedRasterDefinition) extends UserDefine
5858
override def dataType: DataType = schemaOf[Raster[Tile]]
5959

6060
override def initialize(buffer: MutableAggregationBuffer): Unit = {
61-
buffer(0) = ArrayTile.empty(prd.cellType, prd.cols, prd.rows)
61+
buffer(0) = ArrayTile.empty(prd.cellType, prd.totalCols, prd.totalRows)
6262
}
6363

6464
override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
@@ -90,13 +90,14 @@ class TileRasterizerAggregate(prd: ProjectedRasterDefinition) extends UserDefine
9090
object TileRasterizerAggregate {
9191
val nodeName = "rf_tile_rasterizer_aggregate"
9292
/** Convenience grouping of parameters needed for running aggregate. */
93-
case class ProjectedRasterDefinition(cols: Int, rows: Int, cellType: CellType, crs: CRS, extent: Extent, sampler: ResampleMethod = ResampleMethod.DEFAULT)
93+
case class ProjectedRasterDefinition(totalCols: Int, totalRows: Int, cellType: CellType, crs: CRS, extent: Extent, sampler: ResampleMethod = ResampleMethod.DEFAULT)
9494

9595
object ProjectedRasterDefinition {
9696
def apply(tlm: TileLayerMetadata[_]): ProjectedRasterDefinition = apply(tlm, ResampleMethod.DEFAULT)
9797

9898
def apply(tlm: TileLayerMetadata[_], sampler: ResampleMethod): ProjectedRasterDefinition = {
99-
val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent)
99+
// Try to determine the actual dimensions of our data coverage
100+
val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent) // <--- Do we have the math right here?
100101
val cols = actualSize.width
101102
val rows = actualSize.height
102103
new ProjectedRasterDefinition(cols, rows, tlm.cellType, tlm.crs, tlm.extent, sampler)

core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,20 @@
2222
package org.locationtech.rasterframes.extensions
2323

2424
import geotrellis.proj4.CRS
25-
import geotrellis.raster.{MultibandTile, Tile}
2625
import geotrellis.spark.io._
2726
import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata}
2827
import geotrellis.util.MethodExtensions
29-
import geotrellis.vector.{Extent, ProjectedExtent}
30-
import org.apache.spark.rdd.RDD
28+
import geotrellis.vector.Extent
3129
import org.apache.spark.sql.catalyst.expressions.Attribute
3230
import org.apache.spark.sql.types.{MetadataBuilder, StructField}
3331
import org.apache.spark.sql.{Column, DataFrame, TypedColumn}
34-
import org.locationtech.rasterframes.{MetadataKeys, RasterFrameLayer}
3532
import org.locationtech.rasterframes.StandardColumns._
3633
import org.locationtech.rasterframes.encoders.CatalystSerializer._
3734
import org.locationtech.rasterframes.encoders.StandardEncoders._
3835
import org.locationtech.rasterframes.expressions.DynamicExtractors
3936
import org.locationtech.rasterframes.tiles.ProjectedRasterTile
4037
import org.locationtech.rasterframes.util._
38+
import org.locationtech.rasterframes.{MetadataKeys, RasterFrameLayer}
4139
import spray.json.JsonFormat
4240

4341
import scala.util.Try

datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,18 @@ package org.locationtech.rasterframes.datasource.geotiff
2424
import java.net.URI
2525

2626
import _root_.geotrellis.proj4.CRS
27+
import _root_.geotrellis.raster._
2728
import _root_.geotrellis.raster.io.geotiff.compression._
2829
import _root_.geotrellis.raster.io.geotiff.tags.codes.ColorSpace
2930
import _root_.geotrellis.raster.io.geotiff.{GeoTiffOptions, MultibandGeoTiff, Tags, Tiled}
30-
import _root_.geotrellis.raster._
31+
import _root_.geotrellis.spark._
3132
import com.typesafe.scalalogging.LazyLogging
32-
import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
3333
import org.apache.spark.sql._
34-
import org.apache.spark.sql.functions._
35-
import org.apache.spark.sql.rf.TileUDT
34+
import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
3635
import org.locationtech.rasterframes._
3736
import org.locationtech.rasterframes.datasource._
38-
import org.locationtech.rasterframes.expressions.aggregates.{ProjectedLayerMetadataAggregate, TileRasterizerAggregate}
3937
import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition
38+
import org.locationtech.rasterframes.expressions.aggregates.{ProjectedLayerMetadataAggregate, TileRasterizerAggregate}
4039
import org.locationtech.rasterframes.model.{LazyCRS, TileDimensions}
4140
import org.locationtech.rasterframes.util._
4241

@@ -96,32 +95,37 @@ class GeoTiffDataSource extends DataSourceRegister
9695
require(parameters.crs.nonEmpty, "A destination CRS must be provided")
9796
require(tileCols.nonEmpty, "need at least one tile column")
9897

98+
// Grab CRS to project into
9999
val destCRS = parameters.crs.get
100100

101-
val (extCol, crsCol) = {
101+
// Select the anchoring Tile, Extent and CRS columns
102+
val (extCol, crsCol, tileCol) = {
103+
// Favor "ProjectedRaster" columns
102104
val prCols = df.projRasterColumns
103105
if(prCols.nonEmpty) {
104-
(rf_extent(prCols.head), rf_crs(prCols.head))
106+
(rf_extent(prCols.head), rf_crs(prCols.head), rf_tile(prCols.head))
105107
}
106108
else {
109+
// If no "ProjectedRaster" column, look for single Extent and CRS columns.
107110
val crsCols = df.crsColumns
108111
require(crsCols.size == 1, "Exactly one CRS column must be in DataFrame")
109112
val extentCols = df.extentColumns
110113
require(extentCols.size == 1, "Exactly one Extent column must be in DataFrame")
111-
(extentCols.head, crsCols.head)
114+
(extentCols.head, crsCols.head, tileCols.head)
112115
}
113116
}
114117

115-
val tlm = df
118+
// Scan table and constuct what the TileLayerMetadata would be in the specified destination CRS.
119+
val tlm: TileLayerMetadata[SpatialKey] = df
116120
.select(ProjectedLayerMetadataAggregate(
117-
destCRS, extCol, crsCol, rf_cell_type(tileCols.head), rf_dimensions(tileCols.head)
121+
destCRS, extCol, crsCol, rf_cell_type(tileCol), rf_dimensions(tileCol)
118122
))
119123
.first()
120124

121125
val c = ProjectedRasterDefinition(tlm)
122126

123127
val config = parameters.rasterDimensions.map { dims =>
124-
c.copy(cols = dims.cols, rows = dims.rows)
128+
c.copy(totalCols = dims.cols, totalRows = dims.rows)
125129
}.getOrElse(c)
126130

127131
val aggs = tileCols

pyrasterframes/src/main/python/docs/raster-write.pymd

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ GeoTIFF is one of the most common file formats for spatial data, providing flexi
6363

6464
One downside to GeoTIFF is that it is not a big data native format. To create a GeoTIFF all the data to be encoded has to be in the memory of one compute node (in Spark parlance, this is a "collect"), limiting it's maximum size substantially compared to that of a full cluster environment. When rendering GeoTIFFs in RasterFrames, you either need to specify the dimensions of the output raster, or be aware of how big the collected data will end up being.
6565

66-
Fortunately, we can use the cluster computing capability to downlample the data into a more manageble saze. For sake of example, let's render a simple RGB overview image of our scene as a small raster:
66+
Fortunately, we can use the cluster computing capability to downlample the data (using nearest-neighbor) into a more manageble size. For sake of example, let's render a simple RGB overview image of our scene as a small raster, reprojecting it to latitude and longitude coordinates on the [WGS84](https://en.wikipedia.org/wiki/World_Geodetic_System) reference ellipsoid (aka [EPSG:4326](https://spatialreference.org/ref/epsg/4326/)):
6767

6868
```python write_geotiff
6969
import os.path
@@ -94,10 +94,9 @@ with rasterio.open(outfile) as src:
9494

9595
## GeoTrellis Layers
9696

97-
[GeoTrellis][GeoTrellis] is one of the key libraries that RasterFrames builds upon. It provides a Scala language API to working with large raster data with Apache Spark. Ingesting raster data into a Layer is one of the key concepts for creating a dataset for processing on Spark. RasterFrames write data from an appropriate DataFrame into a GeoTrellis Layer.
98-
99-
\[ More details see https://s22s.myjetbrains.com/youtrack/issue/RF-72 \]
97+
[GeoTrellis][GeoTrellis] is one of the key libraries that RasterFrames builds upon. It provides a Scala language API to working with large raster data with Apache Spark. Ingesting raster data into a Layer is one of the key concepts for creating a dataset for processing on Spark. RasterFrames write data from an appropriate DataFrame into a [GeoTrellis Layer](https://geotrellis.readthedocs.io/en/latest/guide/tile-backends.html). RasterFrames provides a `geotrellis` DataSource that supports both reading and writing of GeoTrellis layers.
10098

99+
> An example is forthcoming.
101100

102101
## Parquet
103102

pyrasterframes/src/main/python/pyrasterframes/__init__.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -180,18 +180,25 @@ def _geotiff_writer(
180180
raster_dimensions=None,
181181
**options):
182182

183+
def set_dims(parts):
184+
parts = [int(p) for p in parts]
185+
assert(len(parts) == 2, "Expected dimensions specification to have exactly two components")
186+
assert(all([p > 0 for p in parts]), "Expected all components in dimensions to be positive integers")
187+
options.update({
188+
"imageWidth": parts[0],
189+
"imageHeight": parts[1]
190+
})
191+
try:
192+
parts = [int(p) for p in parts]
193+
assert(all([p > 0 for p in parts]), 'nice message')
194+
except ValueError:
195+
raise
196+
183197
if raster_dimensions is not None:
184-
if isinstance(raster_dimensions, tuple):
185-
options.update({
186-
"imageWidth": raster_dimensions[0],
187-
"imageHeight": raster_dimensions[1]
188-
})
198+
if isinstance(raster_dimensions, (list, tuple)):
199+
set_dims(raster_dimensions)
189200
elif isinstance(raster_dimensions, str):
190-
parts = raster_dimensions.split(',')
191-
options.update({
192-
"imageWidth": parts[0],
193-
"imageHeight": parts[1]
194-
})
201+
set_dims(raster_dimensions.split(','))
195202

196203
if crs is not None:
197204
options.update({
@@ -200,8 +207,11 @@ def _geotiff_writer(
200207

201208
return _aliased_writer(df_writer, "geotiff", path, **options)
202209

203-
# Patch new method on SparkSession to mirror Scala approach
210+
211+
# Patch RasterFrames initialization method on SparkSession to mirror Scala approach
204212
SparkSession.withRasterFrames = _rf_init
213+
214+
# Patch Kryo serialization initialization method on SparkSession.Builder to mirror Scala approach
205215
SparkSession.Builder.withKryoSerialization = _kryo_init
206216

207217
# Add the 'asLayer' method to pyspark DataFrame

0 commit comments

Comments
 (0)