Skip to content

Commit a73e368

Browse files
authored
Merge pull request #219 from s22s/feature/raster-read-uris
Added VSI driver handling via gdal:// scheme.
2 parents 86b64ed + 54e7ef1 commit a73e368

File tree

8 files changed

+40
-17
lines changed

8 files changed

+40
-17
lines changed

core/src/main/resources/reference.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ vlm.gdal {
1212
//CPL_DEBUG = "OFF"
1313
AWS_REQUEST_PAYER = "requester"
1414
GDAL_DISABLE_READDIR_ON_OPEN = "YES"
15-
CPL_VSIL_CURL_ALLOWED_EXTENSIONS = ".tif,.tiff,.jp2,.mrf,.idx,.lrc,.mrf.aux.xml"
15+
CPL_VSIL_CURL_ALLOWED_EXTENSIONS = ".tif,.tiff,.jp2,.mrf,.idx,.lrc,.mrf.aux.xml,.vrt"
1616
}
1717
// set this to `false` if CPL_DEBUG is `ON`
1818
useExceptions = true

core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ case class GDALRasterSource(source: URI) extends RasterSource with URIRasterSour
3434

3535
@transient
3636
private lazy val gdal: VLMRasterSource = {
37-
val cleaned = source.toASCIIString.replace("gdal+", "")
37+
val cleaned = source.toASCIIString
38+
.replace("gdal+", "")
39+
.replace("gdal:/", "")
3840
// VSIPath doesn't like single slash "file:/path..."
3941
val tweaked =
4042
if (cleaned.matches("^file:/[^/].*"))

core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ object RasterSource extends LazyLogging {
140140

141141
/** Extractor for determining if a scheme indicates GDAL preference. */
142142
def unapply(source: URI): Boolean =
143-
gdalOnly(source) || ((preferGdal || source.getScheme.startsWith("gdal+")) && hasGDAL)
143+
gdalOnly(source) || ((preferGdal || source.getScheme.startsWith("gdal")) && hasGDAL)
144144
}
145145

146146
object IsDefaultGeoTiff {
58.4 KB
Binary file not shown.

core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import geotrellis.proj4.LatLng
2525
import geotrellis.raster.{ByteCellType, GridBounds, TileLayout}
2626
import geotrellis.spark.tiling.{CRSWorldExtent, LayoutDefinition}
2727
import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata}
28-
import org.apache.spark.sql.Encoders
28+
import org.apache.spark.sql.{Encoder, Encoders}
2929
import org.locationtech.rasterframes.util.SubdivideSupport
3030

3131
/**
@@ -57,23 +57,27 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu
5757
}
5858
describe("Miscellaneous extensions") {
5959
import spark.implicits._
60+
6061
it("should find multiple extent columns") {
6162
val df = Seq((extent, "fred", extent, 34.0)).toDF("e1", "s", "e2", "n")
6263
df.extentColumns.size should be(2)
6364
}
65+
6466
it("should find multiple crs columns") {
6567
// Not sure why implicit resolution isn't handling this properly.
6668
implicit val enc = Encoders.tuple(crsEncoder, Encoders.STRING, crsEncoder, Encoders.scalaDouble)
6769
val df = Seq((pe.crs, "fred", pe.crs, 34.0)).toDF("c1", "s", "c2", "n")
6870
df.crsColumns.size should be (2)
6971
}
72+
7073
it("should split TileLayout") {
7174
val tl1 = TileLayout(2, 3, 10, 10)
7275
assert(tl1.subdivide(0) === tl1)
7376
assert(tl1.subdivide(1) === tl1)
7477
assert(tl1.subdivide(2) === TileLayout(4, 6, 5, 5))
7578
assertThrows[IllegalArgumentException](tl1.subdivide(-1))
7679
}
80+
7781
it("should split KeyBounds[SpatialKey]") {
7882
val grid = GridBounds(0, 0, 9, 9)
7983
val kb = KeyBounds(grid)

core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
package org.locationtech.rasterframes.ref
2323

24+
import java.net.URI
25+
2426
import org.locationtech.rasterframes.TestData
2527
import geotrellis.vector.Extent
2628
import org.apache.spark.sql.rf.RasterSourceUDT
@@ -125,6 +127,15 @@ class RasterSourceSpec extends TestEnvironment with TestData {
125127
gdal.layoutBounds(dims) should contain allElementsOf jvm.layoutBounds(dims)
126128
gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims)
127129
}
130+
131+
132+
it("should support vsi file paths") {
133+
val archivePath = geotiffDir.resolve("L8-archive.zip")
134+
val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff")
135+
val gdal = GDALRasterSource(archiveURI)
136+
137+
gdal.bandCount should be (3)
138+
}
128139
}
129140
}
130141

pyrasterframes/src/main/python/docs/raster-read.pymd

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,24 @@ display(tile)
5151

5252
## URI Formats
5353

54-
/[ TODO Populate this with valid URI's schemes we can use with an example? including how to use gdal /]
55-
56-
file:// (is this only valid for spark local? or if file is )
57-
https://
58-
http:// ?
59-
s3a://
60-
s3n:// ?
61-
hdfs:// ?
62-
ftp:// ?
63-
azure ?
64-
google cloud ?
65-
66-
gdal vsi? https://gdal.org/user/virtual_file_systems.html
54+
RasterFrames relies on three different IO drivers, selected based on a combination of scheme, file extentions, and library availability. GDAL is used first if a compatible version of GDAL (>= 2.4) is installed. It is the only driver that can read non-GeoTIFF files. If GDAL is not available, either the _Java I/O` or _Hadoop_ driver will be selected, depending on scheme.
55+
56+
57+
| Prefix | GDAL | Java I/O | Hadoop |
58+
| ------------------- | ----------- | -------- | ------ |
59+
| `gdal://<vsidrv>//` | ✔︎ | -︎ | - |
60+
| `file://` | ✔︎ | ✔︎ | - |
61+
| `http://` | ✔︎ | ✔︎ | - |
62+
| `https://` | ✔︎ | ✔︎ | - |
63+
| `ftp://` | `/vsicurl/` | ✔ | - |
64+
| `hdfs://` | `/vsihdfs/` ︎| - | ✔︎ |
65+
| `s3://` | `/vsis3/` ︎ | ✔︎ | - |
66+
| `s3n://` | - ︎ | - | ✔︎ |
67+
| `s3a://` | - ︎ | - | ✔︎ |
68+
| `wasb://` | `/vsiaz/` ︎ | - | ✔︎ |
69+
| `wasbs://` | - ︎ | - | ✔︎ |
70+
71+
Specific [GDAL Virtual File System drivers](https://gdal.org/user/virtual_file_systems.html) can be selected using the `gdal://<vsidrv>//` syntax. For example If you have a `archive.zip` file containing a GeoTiff named `my-file-inside.tif`, you can address it with `gdal://vsizip//path/to/archive.zip/my-file-inside.tif`. See the GDAL documentation for the format of the URIs after the `gdal:/` prefix (which is stripped off before passing the rest of the path to GDAL).
6772

6873

6974
## Raster Catalogs

pyrasterframes/src/main/python/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def doctype(self):
134134
'pytest==3.4.2',
135135
'pypandoc',
136136
'numpy>=1.7',
137+
'shapley',
137138
'pandas',
138139
'rasterio'
139140
],

0 commit comments

Comments
 (0)