Skip to content

Commit 3f87e34

Browse files
committed
Merge branch 'develop' into feature/docs-intro-update
* develop: PR feedback. Regression fix. Release notes update. Added forced truncation of WKT types in Markdown/HTML rendering. Ensure default tile size is applied to `raster` reader. Fix nodata doc Doc supervised, set tile size to 256 for visual Update doc to use rf_local_is_in when masking; fix #351 Close #310 move reference to static rf_local_is_in python implementation Applying pre-partitioning to DataSources. Expanded RasterRefSpec to ensure lazy tiles provide metadata without I/O. Fix unit tests for rf_local_is_in Attempting to keep TravisCI from timing out by using jobs. Add rf_local_is_in function
2 parents ba2848e + f226ff0 commit 3f87e34

File tree

24 files changed

+267
-180
lines changed

24 files changed

+267
-180
lines changed

.travis.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
sudo: false
21
dist: xenial
32
language: python
43

@@ -28,11 +27,10 @@ install:
2827
- pip install rasterio shapely pandas numpy pweave
2928
- wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -
3029

31-
script:
32-
- sbt/bin/sbt -java-home $JAVA_HOME -batch test
33-
- sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
34-
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
35-
# Tricks to avoid unnecessary cache updates
36-
- find $HOME/.sbt -name "*.lock" | xargs rm
37-
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
3830

31+
jobs:
32+
include:
33+
- stage: "Unit Tests"
34+
script: sbt/bin/sbt -java-home $JAVA_HOME -batch test
35+
- stage: "Integration Tests"
36+
script: sbt/bin/sbt -java-home $JAVA_HOME -batch it:test

core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,9 @@ trait RasterFunctions {
405405
/** Cellwise inequality comparison between a tile and a scalar. */
406406
def rf_local_unequal[T: Numeric](tileCol: Column, value: T): Column = Unequal(tileCol, value)
407407

408+
/** Test if each cell value is in provided array */
409+
def rf_local_is_in(tileCol: Column, arrayCol: Column) = IsIn(tileCol, arrayCol)
410+
408411
/** Return a tile with ones where the input is NoData, otherwise zero */
409412
def rf_local_no_data(tileCol: Column): Column = Undefined(tileCol)
410413

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ object TileRasterizerAggregate {
138138
}
139139
}
140140

141-
// Scan table and constuct what the TileLayerMetadata would be in the specified destination CRS.
141+
// Scan table and construct what the TileLayerMetadata would be in the specified destination CRS.
142142
val tlm: TileLayerMetadata[SpatialKey] = df
143143
.select(
144144
ProjectedLayerMetadataAggregate(
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* This software is licensed under the Apache 2 license, quoted below.
3+
*
4+
* Copyright 2019 Astraea, Inc.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
7+
* use this file except in compliance with the License. You may obtain a copy of
8+
* the License at
9+
*
10+
* [http://www.apache.org/licenses/LICENSE-2.0]
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15+
* License for the specific language governing permissions and limitations under
16+
* the License.
17+
*
18+
* SPDX-License-Identifier: Apache-2.0
19+
*
20+
*/
21+
22+
package org.locationtech.rasterframes.expressions.localops
23+
24+
import geotrellis.raster.Tile
25+
import geotrellis.raster.mapalgebra.local.IfCell
26+
import org.apache.spark.sql.Column
27+
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
28+
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
29+
import org.apache.spark.sql.types.{ArrayType, DataType}
30+
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
31+
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, ExpressionDescription}
32+
import org.apache.spark.sql.catalyst.util.ArrayData
33+
import org.apache.spark.sql.rf.TileUDT
34+
import org.locationtech.rasterframes.encoders.CatalystSerializer._
35+
import org.locationtech.rasterframes.expressions.DynamicExtractors._
36+
import org.locationtech.rasterframes.expressions._
37+
38+
@ExpressionDescription(
39+
usage = "_FUNC_(tile, rhs) - In each cell of `tile`, return true if the value is in rhs.",
40+
arguments = """
41+
Arguments:
42+
* tile - tile column to apply abs
43+
* rhs - array to test against
44+
""",
45+
examples = """
46+
Examples:
47+
> SELECT _FUNC_(tile, array(lit(33), lit(66), lit(99)));
48+
..."""
49+
)
50+
case class IsIn(left: Expression, right: Expression) extends BinaryExpression with CodegenFallback {
51+
override val nodeName: String = "rf_local_is_in"
52+
53+
override def dataType: DataType = left.dataType
54+
55+
@transient private lazy val elementType: DataType = right.dataType.asInstanceOf[ArrayType].elementType
56+
57+
override def checkInputDataTypes(): TypeCheckResult =
58+
if(!tileExtractor.isDefinedAt(left.dataType)) {
59+
TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
60+
} else right.dataType match {
61+
case _: ArrayType TypeCheckSuccess
62+
case _ TypeCheckFailure(s"Input type '${right.dataType}' does not conform to ArrayType.")
63+
}
64+
65+
override protected def nullSafeEval(input1: Any, input2: Any): Any = {
66+
implicit val tileSer = TileUDT.tileSerializer
67+
val (childTile, childCtx) = tileExtractor(left.dataType)(row(input1))
68+
69+
val arr = input2.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
70+
71+
childCtx match {
72+
case Some(ctx) => ctx.toProjectRasterTile(op(childTile, arr)).toInternalRow
73+
case None => op(childTile, arr).toInternalRow
74+
}
75+
76+
}
77+
78+
protected def op(left: Tile, right: IndexedSeq[AnyRef]): Tile = {
79+
def fn(i: Int): Boolean = right.contains(i)
80+
IfCell(left, fn(_), 1, 0)
81+
}
82+
83+
}
84+
85+
object IsIn {
86+
def apply(left: Column, right: Column): Column =
87+
new Column(IsIn(left.expr, right.expr))
88+
}

core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ package object expressions {
8686
registry.registerExpression[GreaterEqual]("rf_local_greater_equal")
8787
registry.registerExpression[Equal]("rf_local_equal")
8888
registry.registerExpression[Unequal]("rf_local_unequal")
89+
registry.registerExpression[IsIn]("rf_local_is_in")
8990
registry.registerExpression[Undefined]("rf_local_no_data")
9091
registry.registerExpression[Defined]("rf_local_data")
9192
registry.registerExpression[Sum]("rf_tile_sum")

core/src/main/scala/org/locationtech/rasterframes/util/DataFrameRenderers.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ package org.locationtech.rasterframes.util
2424
import geotrellis.raster.render.ColorRamps
2525
import org.apache.spark.sql.Dataset
2626
import org.apache.spark.sql.functions.{base64, concat, concat_ws, length, lit, substring, when}
27+
import org.apache.spark.sql.jts.JTSTypes
2728
import org.apache.spark.sql.types.{StringType, StructField}
2829
import org.locationtech.rasterframes.expressions.DynamicExtractors
2930
import org.locationtech.rasterframes.{rfConfig, rf_render_png, rf_resample}
31+
import org.apache.spark.sql.rf.WithTypeConformity
3032

3133
/**
32-
* DataFrame extensiosn for rendering sample content in a number of ways
34+
* DataFrame extension for rendering sample content in a number of ways
3335
*/
3436
trait DataFrameRenderers {
3537
private val truncateWidth = rfConfig.getInt("max-truncate-row-element-length")
@@ -47,8 +49,9 @@ trait DataFrameRenderers {
4749
lit("\"></img>")
4850
)
4951
else {
52+
val isGeom = WithTypeConformity(c.dataType).conformsTo(JTSTypes.GeometryTypeInstance)
5053
val str = resolved.cast(StringType)
51-
if (truncate)
54+
if (truncate || isGeom)
5255
when(length(str) > lit(truncateWidth),
5356
concat(substring(str, 1, truncateWidth), lit("..."))
5457
)

core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml

Lines changed: 0 additions & 92 deletions
This file was deleted.

core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import scala.xml.parsing.XhtmlParser
3939
class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSupport {
4040
lazy val rf = sampleTileLayerRDD.toLayer
4141

42-
describe("DataFrame exention methods") {
42+
describe("DataFrame extension methods") {
4343
it("should maintain original type") {
4444
val df = rf.withPrefixedColumnNames("_foo_")
4545
"val rf2: RasterFrameLayer = df" should compile
@@ -49,7 +49,7 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu
4949
"val Some(col) = df.spatialKeyColumn" should compile
5050
}
5151
}
52-
describe("RasterFrameLayer exention methods") {
52+
describe("RasterFrameLayer extension methods") {
5353
it("should provide spatial key column") {
5454
noException should be thrownBy {
5555
rf.spatialKeyColumn
@@ -124,6 +124,10 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu
124124

125125
val md3 = rf.toMarkdown(truncate=true, renderTiles = false)
126126
md3 shouldNot include("<img")
127+
128+
// Should truncate JTS types even when we don't ask for it.
129+
val md4 = rf.withGeometry().select("geometry").toMarkdown(truncate = false)
130+
md4 should include ("...")
127131
}
128132

129133
it("should render HTML") {

core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,4 +972,28 @@ class RasterFunctionsSpec extends TestEnvironment with RasterMatchers {
972972
val dResult = df.select($"ld").as[Tile].first()
973973
dResult should be (randNDPRT.localDefined())
974974
}
975+
976+
it("should check values isin"){
977+
checkDocs("rf_local_is_in")
978+
979+
// tile is 3 by 3 with values, 1 to 9
980+
val df = Seq(byteArrayTile).toDF("t")
981+
.withColumn("one", lit(1))
982+
.withColumn("five", lit(5))
983+
.withColumn("ten", lit(10))
984+
.withColumn("in_expect_2", rf_local_is_in($"t", array($"one", $"five")))
985+
.withColumn("in_expect_1", rf_local_is_in($"t", array($"ten", $"five")))
986+
.withColumn("in_expect_0", rf_local_is_in($"t", array($"ten")))
987+
988+
val e2Result = df.select(rf_tile_sum($"in_expect_2")).as[Double].first()
989+
e2Result should be (2.0)
990+
991+
val e1Result = df.select(rf_tile_sum($"in_expect_1")).as[Double].first()
992+
e1Result should be (1.0)
993+
994+
val e0Result = df.select($"in_expect_0").as[Tile].first()
995+
e0Result.toArray() should contain only (0)
996+
997+
// lazy val invalid = df.select(rf_local_is_in($"t", lit("foobar"))).as[Tile].first()
998+
}
975999
}

core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,18 @@ class RasterRefSpec extends TestEnvironment with TestData {
253253
}
254254
}
255255

256-
it("should construct a RasterRefTile without I/O") {
256+
it("should construct and inspect a RasterRefTile without I/O") {
257257
new Fixture {
258258
// SimpleRasterInfo is a proxy for header data requests.
259-
val start = SimpleRasterInfo.cacheStats.hitCount()
259+
val startStats = SimpleRasterInfo.cacheStats
260260
val t: ProjectedRasterTile = RasterRefTile(subRaster)
261-
val result = Seq(t, subRaster.tile).toDF("tile").first()
262-
val end = SimpleRasterInfo.cacheStats.hitCount()
263-
end should be(start)
261+
val df = Seq(t, subRaster.tile).toDF("tile")
262+
val result = df.first()
263+
SimpleRasterInfo.cacheStats.hitCount() should be(startStats.hitCount())
264+
SimpleRasterInfo.cacheStats.missCount() should be(startStats.missCount())
265+
val info = df.select(rf_dimensions($"tile"), rf_extent($"tile")).first()
266+
SimpleRasterInfo.cacheStats.hitCount() should be(startStats.hitCount() + 2)
267+
SimpleRasterInfo.cacheStats.missCount() should be(startStats.missCount())
264268
}
265269
}
266270
}

0 commit comments

Comments
 (0)