diff --git a/Project.toml b/Project.toml index 7cfe5d8..2973f5f 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157" CoordRefSystems = "b46f11dc-f210-4604-bfba-323c1ec968cb" +DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" GRIBDatasets = "82be9cdb-ee19-4151-bdb3-b400788d9abc" @@ -27,6 +28,7 @@ PlyIO = "42171d58-473b-503a-8d5f-782019eb09ec" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3" +SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9" Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -41,6 +43,7 @@ CSV = "0.10" Colors = "0.12, 0.13" CommonDataModel = "0.2, 0.3, 0.4" CoordRefSystems = "0.19" +DBInterface = "2.6" FileIO = "1.16" Format = "1.3" GRIBDatasets = "0.3, 0.4" @@ -59,6 +62,7 @@ PlyIO = "1.1" PrecompileTools = "1.2" PrettyTables = "3.0" ReadVTK = "0.2" +SQLite = "1.6" Shapefile = "0.13" StaticArrays = "1.6" Tables = "1.7" diff --git a/src/GeoIO.jl b/src/GeoIO.jl index dd8f2e0..bdaf148 100644 --- a/src/GeoIO.jl +++ b/src/GeoIO.jl @@ -38,6 +38,10 @@ import PlyIO # CSV format import CSV +# Database interfaces +import DBInterface +import SQLite + # geostats formats import GslibIO @@ -72,7 +76,7 @@ const CDMEXTS = [".grib", ".nc"] const FORMATS = [ (extension=".csv", load="CSV.jl", save="CSV.jl"), (extension=".geojson", load="GeoJSON.jl", save="GeoJSON.jl"), - (extension=".gpkg", load="ArchGDAL.jl", save="ArchGDAL.jl"), + (extension=".gpkg", load="GeoIO.jl", save="ArchGDAL.jl"), (extension=".grib", load="GRIBDatasets.jl", save=""), (extension=".gslib", load="GslibIO.jl", save="GslibIO.jl"), (extension=".jpeg", load="ImageIO.jl", save="ImageIO.jl"), @@ -127,6 +131,7 @@ include("extra/csv.jl") include("extra/gdal.jl") include("extra/geotiff.jl") include("extra/gis.jl") +include("extra/gpkg.jl") include("extra/img.jl") include("extra/msh.jl") include("extra/obj.jl") diff --git a/src/extra/gis.jl b/src/extra/gis.jl index 90b7d1f..f3b2787 100644 --- a/src/extra/gis.jl +++ b/src/extra/gis.jl @@ -2,22 +2,12 @@ # Licensed under the MIT License. See LICENSE in the project root. # ------------------------------------------------------------------ -function gisread(fname; repair, layer, numtype, kwargs...) +function gisread(fname; layer, numtype, kwargs...) # extract Tables.jl table from GIS format table = gistable(fname; layer, numtype, kwargs...) # convert Tables.jl table to GeoTable - geotable = asgeotable(table) - - # repair pipeline - pipeline = if repair - Repair(11) → Repair(12) - else - Identity() - end - - # perform repairs - geotable |> pipeline + asgeotable(table) end function giswrite(fname, geotable; warn, kwargs...) diff --git a/src/extra/gpkg.jl b/src/extra/gpkg.jl new file mode 100644 index 0000000..a83d124 --- /dev/null +++ b/src/extra/gpkg.jl @@ -0,0 +1,6 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +include("gpkg/wkb.jl") +include("gpkg/read.jl") diff --git a/src/extra/gpkg/read.jl b/src/extra/gpkg/read.jl new file mode 100644 index 0000000..6232e51 --- /dev/null +++ b/src/extra/gpkg/read.jl @@ -0,0 +1,177 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +function gpkgread(fname; layer=1) + db = gpkgdatabase(fname) + table, geoms = gpkgextract(db; layer) + DBInterface.close!(db) + georef(table, geoms) +end + +function gpkgdatabase(fname) + # connect to SQLite database on disk + db = SQLite.DB(fname) + + # According to https://www.geopackage.org/spec/#r6 + # PRAGMA integrity_check returns a single row with the value 'ok' + if first(DBInterface.execute(db, "PRAGMA integrity_check;")).integrity_check != "ok" + throw(ErrorException("database integrity at risk")) + end + + # According to https://www.geopackage.org/spec/#r7 + # PRAGMA foreign_key_check (w/ no parameter value) returns an empty result set + if !(isempty(DBInterface.execute(db, "PRAGMA foreign_key_check;"))) + throw(ErrorException("foreign key violation(s)")) + end + + # According to https://www.geopackage.org/spec/#r10 + # A GeoPackage SHALL include a 'gpkg_spatial_ref_sys' table + if isnothing(SQLite.tableinfo(db, "gpkg_spatial_ref_sys")) + throw(ErrorException("missing required metadata tables in the GeoPackage SQL database")) + end + + # According to https://www.geopackage.org/spec/#r13 + # A GeoPackage SHALL include a 'gpkg_contents` table + if isnothing(SQLite.tableinfo(db, "gpkg_contents")) + throw(ErrorException("missing required metadata tables in the GeoPackage SQL database")) + end + + db +end + +function gpkgextract(db; layer=1) + # get the feature table or Any[] returned in sqlite query results + metadata = first( + DBInterface.execute( + db, + """ + SELECT g.table_name AS tablename, g.column_name AS geomcolumn, + c.srs_id AS srsid, srs.organization AS org, srs.organization_coordsys_id AS code + FROM gpkg_geometry_columns g, gpkg_spatial_ref_sys srs + """ * + # According to https://www.geopackage.org/spec/#r24 + # The column_name column value in a gpkg_geometry_columns row + # SHALL be the name of a column in the table or view specified + # by the table_name column value for that row. + """ + JOIN gpkg_contents c ON ( g.table_name = c.table_name ) + """ * + # According to https://www.geopackage.org/spec/#r23 + # gpkg_geometry_columns table_name column SHALL reference values + # in the gpkg_contents table_name column for rows with a data_type + # of 'features' + """ + WHERE c.data_type = 'features' + """ * + # According to https://www.geopackage.org/spec/#r146 + # The srs_id value in a gpkg_geometry_columns table row SHALL + # match the srs_id column value from the corresponding row in + # the gpkg_contents table. + """ + AND g.srs_id = c.srs_id + LIMIT 1 OFFSET ($layer-1); + """ + ) + ) + + # According to https://www.geopackage.org/spec/#r33 + # Feature table geometry columns SHALL contain geometries + # with the srs_id specified for the column by the + # gpkg_geometry_columns table srs_id column value. + org = metadata.org + code = metadata.code + srsid = metadata.srsid + if srsid == 0 + crs = LatLon{NoDatum} + elseif srsid == 4326 + crs = LatLon{WGS84Latest} + elseif srsid == -1 + crs = Cartesian{NoDatum} + else + if org == "EPSG" + crs = CoordRefSystems.get(EPSG{code}) + elseif org == "ESRI" + crs = CoordRefSystems.get(ESRI{code}) + end + end + + # According to https://www.geopackage.org/spec/#r14 + # The table_name column value in a gpkg_contents table row + # SHALL contain the name of a SQLite table or view. + tablename = metadata.tablename + + # According to https://www.geopackage.org/spec/#r30 + # A feature table or view SHALL have only one geometry column. + geomcolumn = metadata.geomcolumn |> Symbol + + # Retrieve names of columns with attributes (i.e., ≠ geometry) + # pk is the index of the column within the primary key, + # or 0 for columns that are not part of the primary key + tabinfo = SQLite.tableinfo(db, tablename) + columns = [Symbol(name) for (name, pk) in zip(tabinfo.name, tabinfo.pk) if pk == 0] + attribs = setdiff(columns, [geomcolumn]) + + # load feature table from database + gpkgtable = DBInterface.execute(db, "SELECT $(join(columns, ',')) FROM $tablename;") + + # extract attribute table and geometries + pairs = map(Tables.rows(gpkgtable)) do row + # retrieve attribute values as a named tuple + vals = (; (col => Tables.getcolumn(row, col) for col in attribs)...) + + # retrieve geometry binary data as IO buffer + buff = IOBuffer(Tables.getcolumn(row, geomcolumn)) + + # seek start of geometry (e.g., discard envelope) + gbuff = seekgeom(buff) + + # convert buffer into Meshes.jl geometry + geom = wkb2geom(gbuff, crs) + + vals, geom + end + + # handle tables without attributes + table = isempty(attribs) ? nothing : first.(pairs) + geoms = GeometrySet(last.(pairs)) + + table, geoms +end + +# According to https://www.geopackage.org/spec/#r19 +# A GeoPackage SHALL store feature table geometries in +# SQL BLOBs using the Standard GeoPackageBinary format +function seekgeom(buff) + # check the GeoPackageBinaryHeader for the 'GP' magic + read(buff, UInt16) == 0x5047 || @warn "Missing magic 'GP' string in GPkgBinaryGeometry" + + # skip version (0 => version 1) + read(buff, UInt8) + + # bit layout of GeoPackageBinary flags byte + # https://www.geopackage.org/spec/#flags_layout + # --------------------------------------- + # bit # 7 # 6 # 5 # 4 # 3 # 2 # 1 # 0 # + # use # R # R # X # Y # E # E # E # B # + # --------------------------------------- + # R: reserved for future use; set to 0 + # X: GeoPackageBinary type + # Y: empty geometry flag + # E: envelope contents indicator code (3-bit unsigned integer) + # B: byte order for SRS_ID and envelope values in header + flag = read(buff, UInt8) + + # 0x07 is the mask 0x00001110 + envelope = (flag & (0x07 << 1)) >> 1 + + # calculate GeoPackageBinaryHeader size in byte stream given extent of envelope: + # [no envelope] => 0 bytes + # [minx, maxx, miny, maxy] => 32 bytes + # [minx, maxx, miny, maxy, minz, maxz] => 48 bytes + # byte[2] magic + byte[1] version + byte[1] flag + byte[4] srs_id + byte[(8*2)×(x,y{,z})] envelope + skiplen = iszero(envelope) ? 4 : 4 + 8 * 2 * (envelope + 1) + + # skip reading the double[] envelope and start reading Well-Known Binary geometry + skip(buff, skiplen) +end diff --git a/src/extra/gpkg/wkb.jl b/src/extra/gpkg/wkb.jl new file mode 100644 index 0000000..fa6b48d --- /dev/null +++ b/src/extra/gpkg/wkb.jl @@ -0,0 +1,106 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +function wkb2geom(buff, crs) + byteswap = isone(read(buff, UInt8)) ? ltoh : ntoh + wkbtype = read(buff, UInt32) + # Input variants of WKB supported are standard, extended, and ISO WKB geometry with Z dimensions (M/ZM not supported) + # SQL/MM Part 3 and SFSQL 1.2 use offsets of 1000 (Z), 2000 (M) and 3000 (ZM) + # to indicate the present of higher dimensional coordinates in a WKB geometry + if wkbtype >= 1001 && wkbtype <= 1007 + # the SFSQL 1.2 offset of 1000 (Z) is present and subtracting a round number of 1000 gives the standard WKB type + wkbtype -= UInt32(1000) + # 99-402 was a short-lived extension to SFSQL 1.1 that used a high-bit flag to indicate the presence of Z coordinates in a WKB geometry + # the high-bit flag 0x80000000 for Z (or 0x40000000 for M) is set and masking it off gives the standard WKB type + elseif wkbtype > 0x80000000 + # the SFSQL 1.1 high-bit flag 0x80000000 (Z) is present and removing the flag reveals the standard WKB type + wkbtype -= 0x80000000 + end + if wkbtype <= 3 + # 0 - 3 [Geometry, Point, Linestring, Polygon] + wkb2single(buff, crs, wkbtype, byteswap) + else + # 4 - 7 [MultiPoint, MultiLinestring, MultiPolygon, GeometryCollection] + wkb2multi(buff, crs, byteswap) + end +end + +# read single features from Well-Known Binary IO Buffer and return Concrete Geometry +function wkb2single(buff, crs, wkbtype, byteswap) + if wkbtype == 1 + wkb2point(buff, crs, byteswap) + elseif wkbtype == 2 + wkb2chain(buff, crs, byteswap) + elseif wkbtype == 3 + wkb2poly(buff, crs, byteswap) + else + error("Unsupported WKB Geometry Type: $wkbtype") + end +end + +function wkb2point(buff, crs, byteswap) + coordinates = wkb2coords(buff, crs, byteswap) + Point(referencecoords(coordinates, crs)) +end + +function wkb2coords(buff, crs, byteswap) + if CoordRefSystems.ncoords(crs) == 2 + x = byteswap(read(buff, Float64)) + y = byteswap(read(buff, Float64)) + return (x, y) + elseif CoordRefSystems.ncoords(crs) == 3 + x = byteswap(read(buff, Float64)) + y = byteswap(read(buff, Float64)) + z = byteswap(read(buff, Float64)) + return (x, y, z) + end +end + +function referencecoords(coordinates, crs) + if crs <: LatLon + crs(coordinates[2], coordinates[1]) + elseif crs <: LatLonAlt + crs(coordinates[2], coordinates[1], coordinates[3]) + else + crs(coordinates...) + end +end + +function wkb2points(buff, npoints, crs, byteswap) + map(1:npoints) do _ + coordinates = wkb2coords(buff, crs, byteswap) + Point(referencecoords(coordinates, crs)) + end +end + +function wkb2chain(buff, crs, byteswap) + npoints = byteswap(read(buff, UInt32)) + chain = wkb2points(buff, npoints, crs, byteswap) + if length(chain) >= 2 && first(chain) == last(chain) + Ring(chain[1:(end - 1)]) + elseif length(chain) >= 2 + Rope(chain) + else + # single point or closed single point + Ring(chain) + end +end + +function wkb2poly(buff, crs, byteswap) + nrings = byteswap(read(buff, UInt32)) + rings = map(1:nrings) do _ + wkb2chain(buff, crs, byteswap) + end + PolyArea(rings) +end + +function wkb2multi(buff, crs, byteswap) + ngeoms = byteswap(read(buff, UInt32)) + geoms = map(1:ngeoms) do _ + wkbbswap = isone(read(buff, UInt8)) ? ltoh : ntoh + wkbtype = read(buff, UInt32) + wkb2single(buff, crs, wkbtype, wkbbswap) + end + Multi(geoms) +end \ No newline at end of file diff --git a/src/load.jl b/src/load.jl index 7d904f0..6e9f136 100644 --- a/src/load.jl +++ b/src/load.jl @@ -157,7 +157,18 @@ function load(fname; repair=true, layer=1, lenunit=nothing, numtype=Float64, kwa end # GIS formats - gisread(fname; repair, layer, numtype, kwargs...) + geotable = if endswith(fname, ".gpkg") + gpkgread(fname; layer, kwargs...) + else + gisread(fname; layer, numtype, kwargs...) + end + + # repair geometries + if repair + geotable |> Repair(11) |> Repair(12) + else + geotable + end end """