Skip to content

Commit 2bfa9af

Browse files
committed
Merge branch 'release/0.6.2-RC2'
2 parents 679f48d + d94bb15 commit 2bfa9af

File tree

95 files changed

+2960
-549
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+2960
-549
lines changed

.sbtopts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
-Djava.awt.headless=true
2-
-J-Xmx2g
31
-J-XX:MaxMetaspaceSize=1g

bench/src/main/scala/astraea/spark/rasterframes/bench/HistogramEncodeBench.scala

Lines changed: 0 additions & 76 deletions
This file was deleted.

build.sbt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,33 @@ addCommandAlias("console", "datasource/console")
33

44
lazy val root = project
55
.in(file("."))
6-
.withId("RF")
7-
.aggregate(core, datasource)
8-
.settings(publishArtifact := false)
6+
.withId("RasterFrames")
7+
.aggregate(core, datasource, pyrasterframes, experimental)
8+
.settings(publish / skip := true)
99
.settings(releaseSettings)
1010

1111
lazy val core = project
12+
.disablePlugins(SparkPackagePlugin)
13+
14+
lazy val pyrasterframes = project
15+
.dependsOn(core, datasource)
16+
.settings(assemblySettings)
1217

1318
lazy val datasource = project
1419
.dependsOn(core % "test->test;compile->compile")
20+
.disablePlugins(SparkPackagePlugin)
21+
22+
lazy val experimental = project
23+
.dependsOn(core % "test->test;compile->compile")
24+
.dependsOn(datasource % "test->test;compile->compile")
25+
.disablePlugins(SparkPackagePlugin)
1526

1627
lazy val docs = project
1728
.dependsOn(core, datasource)
29+
.disablePlugins(SparkPackagePlugin)
1830

1931
lazy val bench = project
2032
.dependsOn(core)
21-
33+
.disablePlugins(SparkPackagePlugin)
34+
.settings(publish / skip := true)
2235

core/build.sbt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ moduleName := "rasterframes"
44

55
libraryDependencies ++= Seq(
66
"com.chuusai" %% "shapeless" % "2.3.2",
7-
"org.locationtech.geomesa" %% "geomesa-z3" % "1.3.5",
8-
"org.locationtech.geomesa" %% "geomesa-spark-jts" % "2.0.0-astraea.1" exclude("jgridshift", "jgridshift"),
7+
"org.locationtech.geomesa" %% "geomesa-z3" % rfGeoMesaVersion.value,
8+
"org.locationtech.geomesa" %% "geomesa-spark-jts" % rfGeoMesaVersion.value exclude("jgridshift", "jgridshift"),
99
spark("core").value % Provided,
1010
spark("mllib").value % Provided,
1111
spark("sql").value % Provided,
@@ -19,7 +19,7 @@ libraryDependencies ++= Seq(
1919
)
2020

2121
buildInfoKeys ++= Seq[BuildInfoKey](
22-
name, version, scalaVersion, sbtVersion, rfGeotrellisVersion, rfSparkVersion
22+
name, version, scalaVersion, sbtVersion, rfGeoTrellisVersion, rfGeoMesaVersion, rfSparkVersion
2323
)
2424

2525
buildInfoPackage := "astraea.spark.rasterframes"
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package astraea.spark.rasterframes
2+
3+
import astraea.spark.rasterframes.util._
4+
import geotrellis.raster.{MultibandTile, Tile, TileFeature}
5+
import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalKey}
6+
import org.apache.spark.rdd.RDD
7+
import org.apache.spark.sql._
8+
import org.apache.spark.sql.gt.types.TileUDT
9+
import org.apache.spark.sql.types._
10+
11+
import scala.annotation.implicitNotFound
12+
13+
/**
14+
* Typeclass for converting a Pair RDD into a dataframe.
15+
*
16+
* @since 4/8/18
17+
*/
18+
@implicitNotFound("An RDD converter is required create a RasterFrame. " +
19+
"Please provide an implementation of PairRDDConverter[${K}, ${V}].")
20+
trait PairRDDConverter[K, V] extends Serializable {
21+
val schema: StructType
22+
def toDataFrame(rdd: RDD[(K, V)])(implicit spark: SparkSession): DataFrame
23+
}
24+
25+
object PairRDDConverter {
26+
/** Enrichment over a pair RDD for converting it to a DataFrame given a converter. */
27+
implicit class RDDCanBeDataFrame[K, V](rdd: RDD[(K, V)])(implicit spark: SparkSession, converter: PairRDDConverter[K, V]) {
28+
def toDataFrame: DataFrame = converter.toDataFrame(rdd)
29+
}
30+
31+
// Hack around Spark bug when singletons are used in schemas
32+
private val serializableTileUDT = new TileUDT()
33+
34+
/** Fetch converter from implicit scope. */
35+
def apply[K, V](implicit sp: PairRDDConverter[K, V]) = sp
36+
37+
/** Enables conversion of `RDD[(SpatialKey, Tile)]` to DataFrame. */
38+
implicit val spatialTileConverter = new PairRDDConverter[SpatialKey, Tile] {
39+
val schema: StructType = {
40+
StructType(Seq(
41+
StructField(SPATIAL_KEY_COLUMN.columnName, spatialKeyEncoder.schema, nullable = false),
42+
StructField(TILE_COLUMN.columnName, serializableTileUDT, nullable = false)
43+
))
44+
}
45+
46+
def toDataFrame(rdd: RDD[(SpatialKey, Tile)])(implicit spark: SparkSession): DataFrame = {
47+
import spark.implicits._
48+
rdd.toDF(schema.fields.map(_.name): _*)
49+
}
50+
}
51+
52+
/** Enables conversion of `RDD[(SpaceTimeKey, Tile)]` to DataFrame. */
53+
implicit val spaceTimeTileConverter = new PairRDDConverter[SpaceTimeKey, Tile] {
54+
val schema: StructType = {
55+
val base = spatialTileConverter.schema
56+
val addedFields = Seq(StructField(TEMPORAL_KEY_COLUMN.columnName, temporalKeyEncoder.schema, nullable = false))
57+
StructType(base.fields.patch(1, addedFields, 0))
58+
}
59+
60+
def toDataFrame(rdd: RDD[(SpaceTimeKey, Tile)])(implicit spark: SparkSession): DataFrame = {
61+
import spark.implicits._
62+
rdd.map{ case (k, v) (k.spatialKey, k.temporalKey, v)}.toDF(schema.fields.map(_.name): _*)
63+
}
64+
}
65+
66+
/** Enables conversion of `RDD[(SpatialKey, TileFeature[Tile, D])]` to DataFrame. */
67+
implicit def spatialTileFeatureConverter[D: Encoder] = new PairRDDConverter[SpatialKey, TileFeature[Tile, D]] {
68+
implicit val featureEncoder = implicitly[Encoder[D]]
69+
implicit val rowEncoder = Encoders.tuple(spatialKeyEncoder, singlebandTileEncoder, featureEncoder)
70+
71+
val schema: StructType = {
72+
val base = spatialTileConverter.schema
73+
StructType(base.fields :+ StructField(TILE_FEATURE_DATA_COLUMN.columnName, featureEncoder.schema, nullable = true))
74+
}
75+
76+
def toDataFrame(rdd: RDD[(SpatialKey, TileFeature[Tile, D])])(implicit spark: SparkSession): DataFrame = {
77+
import spark.implicits._
78+
rdd.map{ case (k, v) (k, v.tile, v.data)}.toDF(schema.fields.map(_.name): _*)
79+
}
80+
}
81+
82+
/** Enables conversion of `RDD[(SpaceTimeKey, TileFeature[Tile, D])]` to DataFrame. */
83+
implicit def spaceTimeTileFeatureConverter[D: Encoder] = new PairRDDConverter[SpaceTimeKey, TileFeature[Tile, D]] {
84+
implicit val featureEncoder = implicitly[Encoder[D]]
85+
implicit val rowEncoder = Encoders.tuple(spatialKeyEncoder, temporalKeyEncoder, singlebandTileEncoder, featureEncoder)
86+
87+
val schema: StructType = {
88+
val base = spaceTimeTileConverter.schema
89+
StructType(base.fields :+ StructField(TILE_FEATURE_DATA_COLUMN.columnName, featureEncoder.schema, nullable = true))
90+
}
91+
92+
def toDataFrame(rdd: RDD[(SpaceTimeKey, TileFeature[Tile, D])])(implicit spark: SparkSession): DataFrame = {
93+
import spark.implicits._
94+
val tupRDD = rdd.map { case (k, v) (k.spatialKey, k.temporalKey, v.tile, v.data) }
95+
96+
rddToDatasetHolder(tupRDD)
97+
tupRDD.toDF(schema.fields.map(_.name): _*)
98+
}
99+
}
100+
101+
/** Enables conversion of `RDD[(SpatialKey, MultibandTile)]` to DataFrame. */
102+
def forSpatialMultiband(bands: Int) = new PairRDDConverter[SpatialKey, MultibandTile] {
103+
val schema: StructType = {
104+
val base = spatialTileConverter.schema
105+
106+
val basename = TILE_COLUMN.columnName
107+
108+
val tiles = for(i 1 to bands) yield {
109+
StructField(s"${basename}_$i" , serializableTileUDT, nullable = false)
110+
}
111+
112+
StructType(base.fields.patch(1, tiles, 1))
113+
}
114+
115+
def toDataFrame(rdd: RDD[(SpatialKey, MultibandTile)])(implicit spark: SparkSession): DataFrame = {
116+
spark.createDataFrame(
117+
rdd.map { case (k, v) Row(Row(k.col, k.row) +: v.bands: _*) },
118+
schema
119+
)
120+
}
121+
}
122+
123+
/** Enables conversion of `RDD[(SpaceTimeKey, MultibandTile)]` to DataFrame. */
124+
def forSpaceTimeMultiband(bands: Int) = new PairRDDConverter[SpaceTimeKey, MultibandTile] {
125+
val schema: StructType = {
126+
val base = spaceTimeTileConverter.schema
127+
128+
val basename = TILE_COLUMN.columnName
129+
130+
val tiles = for(i 1 to bands) yield {
131+
StructField(s"${basename}_$i" , serializableTileUDT, nullable = false)
132+
}
133+
134+
StructType(base.fields.patch(2, tiles, 1))
135+
}
136+
137+
def toDataFrame(rdd: RDD[(SpaceTimeKey, MultibandTile)])(implicit spark: SparkSession): DataFrame = {
138+
spark.createDataFrame(
139+
rdd.map { case (k, v) Row(Seq(Row(k.spatialKey.col, k.spatialKey.row), Row(k.temporalKey)) ++ v.bands: _*) },
140+
schema
141+
)
142+
}
143+
}
144+
}

core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ package astraea.spark.rasterframes
2222
import astraea.spark.rasterframes.encoders.SparkDefaultEncoders
2323
import astraea.spark.rasterframes.expressions.ExplodeTileExpression
2424
import astraea.spark.rasterframes.functions.{CellCountAggregateFunction, CellMeanAggregateFunction}
25+
import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
2526
import astraea.spark.rasterframes.{functions F}
2627
import com.vividsolutions.jts.geom.Envelope
27-
import geotrellis.raster.histogram.Histogram
2828
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
2929
import geotrellis.raster.{CellType, Tile}
3030
import org.apache.spark.annotation.Experimental
@@ -84,21 +84,25 @@ trait RasterFunctions {
8484
def cellType(col: Column): TypedColumn[Any, String] =
8585
expressions.CellTypeExpression(col.expr).asColumn.as[String]
8686

87+
/** Change the Tile's cell type */
88+
def convertCellType(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
89+
udf[Tile, Tile](F.convertCellType(cellType)).apply(col).as[Tile]
90+
8791
/** Assign a `NoData` value to the Tiles. */
8892
def withNoData(col: Column, nodata: Double) = withAlias("withNoData", col)(
8993
udf[Tile, Tile](F.withNoData(nodata)).apply(col)
9094
).as[Tile]
9195

9296
/** Compute the full column aggregate floating point histogram. */
93-
def aggHistogram(col: Column): TypedColumn[Any, Histogram[Double]] =
97+
def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] =
9498
withAlias("histogram", col)(
9599
F.aggHistogram(col)
96-
).as[Histogram[Double]]
100+
).as[CellHistogram]
97101

98102
/** Compute the full column aggregate floating point statistics. */
99-
def aggStats(col: Column): TypedColumn[Any, Statistics] = withAlias("aggStats", col)(
103+
def aggStats(col: Column): TypedColumn[Any, CellStatistics] = withAlias("aggStats", col)(
100104
F.aggStats(col)
101-
).as[Statistics]
105+
).as[CellStatistics]
102106

103107
/** Computes the column aggregate mean. */
104108
def aggMean(col: Column) = CellMeanAggregateFunction(col.expr)
@@ -140,16 +144,16 @@ trait RasterFunctions {
140144
).as[Double]
141145

142146
/** Compute TileHistogram of Tile values. */
143-
def tileHistogram(col: Column): TypedColumn[Any, Histogram[Double]] =
147+
def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] =
144148
withAlias("tileHistogram", col)(
145-
udf[Histogram[Double], Tile](F.tileHistogram).apply(col)
146-
).as[Histogram[Double]]
149+
udf[CellHistogram, Tile](F.tileHistogram).apply(col)
150+
).as[CellHistogram]
147151

148152
/** Compute statistics of Tile values. */
149-
def tileStats(col: Column): TypedColumn[Any, Statistics] =
153+
def tileStats(col: Column): TypedColumn[Any, CellStatistics] =
150154
withAlias("tileStats", col)(
151-
udf[Statistics, Tile](F.tileStats).apply(col)
152-
).as[Statistics]
155+
udf[CellStatistics, Tile](F.tileStats).apply(col)
156+
).as[CellStatistics]
153157

154158
/** Counts the number of non-NoData cells per Tile. */
155159
def dataCells(tile: Column): TypedColumn[Any, Long] =

core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,36 +19,35 @@
1919

2020
package astraea.spark.rasterframes.encoders
2121

22-
import astraea.spark.rasterframes.Statistics
23-
import geotrellis.raster.histogram.Histogram
24-
import geotrellis.raster.{MultibandTile, Tile}
22+
import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
23+
import geotrellis.raster.Tile
2524
import geotrellis.spark.tiling.LayoutDefinition
2625
import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TemporalKey, TileLayerMetadata}
2726
import geotrellis.vector.Extent
28-
import org.apache.spark.sql.Encoder
2927
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
28+
import org.apache.spark.sql.{Encoder, Encoders}
3029

3130
import scala.reflect.runtime.universe._
3231

3332
/**
3433
* Implicit encoder definitions for RasterFrame types.
3534
*/
3635
trait StandardEncoders {
36+
implicit val spatialKeyEncoder = ExpressionEncoder[SpatialKey]
37+
implicit val temporalKeyEncoder = ExpressionEncoder[TemporalKey]
38+
implicit val spaceTimeKeyEncoder = ExpressionEncoder[SpaceTimeKey]
39+
implicit val statsEncoder = ExpressionEncoder[CellStatistics]
40+
implicit val histEncoder = ExpressionEncoder[CellHistogram]
41+
implicit val layoutDefinitionEncoder = ExpressionEncoder[LayoutDefinition]
42+
implicit val stkBoundsEncoder = ExpressionEncoder[KeyBounds[SpaceTimeKey]]
43+
implicit val extentEncoder = ExpressionEncoder[Extent]
44+
3745
implicit def singlebandTileEncoder = ExpressionEncoder[Tile]()
38-
implicit def multibandTileEncoder = ExpressionEncoder[MultibandTile]()
46+
implicit def tileLayerMetadataEncoder[K: TypeTag]: Encoder[TileLayerMetadata[K]] = TileLayerMetadataEncoder[K]()
3947
implicit val crsEncoder = CRSEncoder()
40-
implicit val extentEncoder = ExpressionEncoder[Extent]()
4148
implicit val projectedExtentEncoder = ProjectedExtentEncoder()
4249
implicit val temporalProjectedExtentEncoder = TemporalProjectedExtentEncoder()
43-
implicit def histogramDoubleEncoder = ExpressionEncoder[Histogram[Double]]()
44-
implicit val statsEncoder = ExpressionEncoder[Statistics]()
45-
implicit def tileLayerMetadataEncoder[K: TypeTag]: Encoder[TileLayerMetadata[K]] = TileLayerMetadataEncoder[K]()
46-
implicit val layoutDefinitionEncoder = ExpressionEncoder[LayoutDefinition]()
47-
implicit val stkBoundsEncoder = ExpressionEncoder[KeyBounds[SpaceTimeKey]]()
4850
implicit val cellTypeEncoder = CellTypeEncoder()
49-
implicit val spatialKeyEncoder = ExpressionEncoder[SpatialKey]()
50-
implicit val temporalKeyEncoder = ExpressionEncoder[TemporalKey]()
51-
implicit val spaceTimeKeyEncoder = ExpressionEncoder[SpaceTimeKey]()
5251
implicit val uriEncoder = URIEncoder()
5352
implicit val envelopeEncoder = EnvelopeEncoder()
5453
}

0 commit comments

Comments
 (0)