Skip to content

Commit 5da64f8

Browse files
authored
Merge pull request #256 from s22s/fix/169
Use implicit file:// for schemeless paths with leading /
2 parents 76e494d + 4ffbd9c commit 5da64f8

File tree

5 files changed

+57
-6
lines changed

5 files changed

+57
-6
lines changed

core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,18 @@ object RasterSource extends LazyLogging {
129129
} else false
130130

131131
/** Extractor for determining if a scheme indicates GDAL preference. */
132-
def unapply(source: URI): Boolean =
133-
gdalOnly(source) || ((preferGdal || source.getScheme.startsWith("gdal")) && GDALRasterSource.hasGDAL)
132+
def unapply(source: URI): Boolean = {
133+
lazy val schemeIsGdal = Option(source.getScheme())
134+
.exists(_.startsWith("gdal"))
135+
136+
gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL)
137+
}
134138
}
135139

136140
object IsDefaultGeoTiff {
137141
def unapply(source: URI): Boolean = source.getScheme match {
138-
case "file" | "http" | "https" | "s3" | "" => true
142+
case "file" | "http" | "https" | "s3" => true
143+
case null | "" true
139144
case _ => false
140145
}
141146
}

core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ class RasterSourceSpec extends TestEnvironment with TestData {
106106
val src = RasterSource(localSrc)
107107
assert(!src.extent.isEmpty)
108108
}
109+
it("should interpret no scheme as file://"){
110+
val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString()
111+
val schemelessUri = new URI(localSrc)
112+
schemelessUri.getScheme should be (null)
113+
val src = RasterSource(schemelessUri)
114+
assert(!src.extent.isEmpty)
115+
}
109116
}
110117

111118
if(GDALRasterSource.hasGDAL) {
@@ -132,6 +139,15 @@ class RasterSourceSpec extends TestEnvironment with TestData {
132139

133140
gdal.bandCount should be (3)
134141
}
142+
143+
it("should interpret no scheme as file://") {
144+
val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString()
145+
val schemelessUri = new URI(localSrc)
146+
val gdal = GDALRasterSource(schemelessUri)
147+
val jvm = JVMGeoTiffRasterSource(schemelessUri)
148+
gdal.extent should be (jvm.extent)
149+
gdal.cellSize should be(jvm.cellSize)
150+
}
135151
}
136152
}
137153

pyrasterframes/src/main/python/docs/raster-write.pymd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ Fortunately, we can use the cluster computing capability to downsample the data
6666

6767
```python write_geotiff
6868
outfile = os.path.join('/tmp', 'geotiff-overview.tif')
69-
spark_df.write.geotiff('file://' + outfile, crs='EPSG:4326', raster_dimensions=(256, 256))
69+
spark_df.write.geotiff(outfile, crs='EPSG:4326', raster_dimensions=(256, 256))
7070
```
7171

7272
View it with `rasterio` to check the results:
@@ -83,6 +83,10 @@ with rasterio.open(outfile) as src:
8383

8484
If there are many tile or projected raster columns in the DataFrame, the GeoTIFF writer will write each one as a separate band in the file. Each band in the output will be tagged the input column names for reference.
8585

86+
```python, echo=False
87+
os.remove(outfile)
88+
```
89+
8690
## GeoTrellis Layers
8791

8892
[GeoTrellis][GeoTrellis] is one of the key libraries that RasterFrames builds upon. It provides a Scala language API to working with large raster data with Apache Spark. Ingesting raster data into a Layer is one of the key concepts for creating a dataset for processing on Spark. RasterFrames write data from an appropriate DataFrame into a [GeoTrellis Layer](https://geotrellis.readthedocs.io/en/latest/guide/tile-backends.html). RasterFrames provides a `geotrellis` DataSource that supports both reading and writing of GeoTrellis layers.

pyrasterframes/src/main/python/tests/GeoTiffWriterTests.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@ def _tmpfile():
3333

3434
def test_identity_write(self):
3535
rf = self.spark.read.geotiff(self.img_uri)
36+
rf_count = rf.count()
37+
self.assertTrue(rf_count > 0)
3638

3739
dest = self._tmpfile()
3840
rf.write.geotiff(dest)
3941

40-
rf2 = self.spark.read.geotiff('file://' + dest)
42+
rf2 = self.spark.read.geotiff(dest)
43+
4144
self.assertEqual(rf2.count(), rf.count())
4245

4346
os.remove(dest)
@@ -47,7 +50,7 @@ def test_unstructured_write(self):
4750
dest_file = self._tmpfile()
4851
rf.write.geotiff(dest_file, crs='EPSG:32616')
4952

50-
rf2 = self.spark.read.raster('file://' + dest_file)
53+
rf2 = self.spark.read.raster(dest_file)
5154
self.assertEqual(rf2.count(), rf.count())
5255

5356
with rasterio.open(self.img_uri) as source:
@@ -58,6 +61,22 @@ def test_unstructured_write(self):
5861

5962
os.remove(dest_file)
6063

64+
def test_unstructured_write_schemeless(self):
65+
# should be able to write a projected raster tile column to path like '/data/foo/file.tif'
66+
from pyrasterframes.rasterfunctions import rf_agg_stats, rf_crs
67+
rf = self.spark.read.raster(self.img_uri)
68+
max = rf.agg(rf_agg_stats('proj_raster').max.alias('max')).first()['max']
69+
crs = rf.select(rf_crs('proj_raster').crsProj4.alias('c')).first()['c']
70+
71+
dest_file = self._tmpfile()
72+
self.assertTrue(not dest_file.startswith('file://'))
73+
rf.write.geotiff(dest_file, crs=crs)
74+
75+
with rasterio.open(dest_file) as src:
76+
self.assertEqual(src.read().max(), max)
77+
78+
os.remove(dest_file)
79+
6180
def test_downsampled_write(self):
6281
rf = self.spark.read.raster(self.img_uri)
6382
dest = self._tmpfile()

pyrasterframes/src/main/python/tests/PyRasterFramesTests.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,13 @@ def l8path(b):
470470
print(path_count.toPandas())
471471
self.assertTrue(path_count.count() == 3)
472472

473+
def test_raster_source_reader_schemeless(self):
474+
import os.path
475+
path = os.path.join(self.resource_dir, "L8-B8-Robinson-IL.tiff")
476+
self.assertTrue(not path.startswith('file://'))
477+
df = self.spark.read.raster(path)
478+
self.assertTrue(df.count() > 0)
479+
473480
def test_raster_source_catalog_reader(self):
474481
import pandas as pd
475482

0 commit comments

Comments
 (0)