|
| 1 | +# --- |
| 2 | +# jupyter: |
| 3 | +# jupytext: |
| 4 | +# formats: py:percent |
| 5 | +# text_representation: |
| 6 | +# extension: .py |
| 7 | +# format_name: percent |
| 8 | +# --- |
| 9 | + |
| 10 | +# %% [markdown] |
| 11 | +# # COP vs USGS 3DEP DEM comparison |
| 12 | +# |
| 13 | +# COP (Copernicus GLO-30) is a **radar-based surface model** (TanDEM-X C-band SAR) |
| 14 | +# that reflects the top of whatever the radar sees: tree canopy, buildings, etc. |
| 15 | +# |
| 16 | +# USGS 3DEP is a **LiDAR-derived bare-earth model** — the LiDAR point cloud is |
| 17 | +# classified and ground points are used to build the DEM, so vegetation and |
| 18 | +# buildings are stripped away. |
| 19 | +# |
| 20 | +# This fundamental difference should produce dramatic elevation discrepancies in |
| 21 | +# areas with tall vegetation or dense urban development. |
| 22 | +# |
| 23 | +# | Source | Type | Datum (native) | Resolution | |
| 24 | +# |--------|------|---------------|------------| |
| 25 | +# | **COP** (GLO-30) | DSM (radar surface) | EGM2008 | 1 arc-sec (~30 m) | |
| 26 | +# | **3DEP** | DEM (bare earth, LiDAR) | NAVD88 | up to 1 m (resampled to 1 arc-sec) | |
| 27 | +# |
| 28 | +# Both are converted to WGS84 ellipsoidal heights by `sardem`. |
| 29 | + |
| 30 | +# %% |
| 31 | +from __future__ import annotations |
| 32 | + |
| 33 | +from pathlib import Path |
| 34 | + |
| 35 | +import numpy as np |
| 36 | +from osgeo import gdal |
| 37 | + |
| 38 | +gdal.UseExceptions() |
| 39 | + |
| 40 | + |
| 41 | +# %% [markdown] |
| 42 | +# ## Helper functions |
| 43 | + |
| 44 | + |
| 45 | +# %% |
| 46 | +def fetch_dem(bbox: tuple, data_source: str, output: str) -> None: |
| 47 | + """Download a DEM for the given source and bounding box.""" |
| 48 | + import sardem.dem |
| 49 | + |
| 50 | + print(f"Fetching {data_source} DEM -> {output}") |
| 51 | + sardem.dem.main( |
| 52 | + output_name=output, bbox=bbox, data_source=data_source, output_type="float32", |
| 53 | + ) |
| 54 | + |
| 55 | + |
| 56 | +def read_dem(path: str) -> tuple[np.ma.MaskedArray, gdal.Dataset]: |
| 57 | + """Read a single-band raster into a masked float32 array; also return the dataset.""" |
| 58 | + ds = gdal.Open(path) |
| 59 | + band = ds.GetRasterBand(1) |
| 60 | + arr = band.ReadAsArray().astype(np.float32) |
| 61 | + nodata = band.GetNoDataValue() |
| 62 | + if nodata is not None: |
| 63 | + arr = np.ma.masked_equal(arr, nodata) |
| 64 | + else: |
| 65 | + arr = np.ma.array(arr) |
| 66 | + return arr, ds |
| 67 | + |
| 68 | + |
| 69 | +def get_extent(ds: gdal.Dataset) -> list[float]: |
| 70 | + """Return [left, right, bottom, top] for imshow extent from a GDAL dataset.""" |
| 71 | + gt = ds.GetGeoTransform() |
| 72 | + left = gt[0] |
| 73 | + top = gt[3] |
| 74 | + right = left + gt[1] * ds.RasterXSize |
| 75 | + bottom = top + gt[5] * ds.RasterYSize |
| 76 | + return [left, right, bottom, top] |
| 77 | + |
| 78 | + |
| 79 | +# %% [markdown] |
| 80 | +# ## Area 1 — Olympic Peninsula old-growth forest (Washington) |
| 81 | +# |
| 82 | +# The Hoh Rainforest contains some of the tallest trees in the Pacific Northwest: |
| 83 | +# Sitka spruce reaching 60-80 m. COP's C-band radar scatters off the canopy |
| 84 | +# tops, while 3DEP's LiDAR ground classification penetrates to bare earth. |
| 85 | +# We expect COP to be **systematically higher** than 3DEP throughout the forest, |
| 86 | +# with differences of 30-60+ m in old-growth stands. |
| 87 | + |
| 88 | +# %% |
| 89 | +bbox_olympic = (-123.95, 47.82, -123.80, 47.92) |
| 90 | +area1_dir = Path("area1_olympic") |
| 91 | +area1_dir.mkdir(exist_ok=True) |
| 92 | + |
| 93 | +sources = ["COP", "3DEP"] |
| 94 | +LABELS = {"COP": "COP (radar surface)", "3DEP": "3DEP (bare earth)"} |
| 95 | + |
| 96 | +area1_files = {s: str(area1_dir / f"olympic_{s.lower()}.tif") for s in sources} |
| 97 | + |
| 98 | +for src in sources: |
| 99 | + fetch_dem(bbox_olympic, src, area1_files[src]) |
| 100 | + |
| 101 | +# %% |
| 102 | +dems_olympic = {} |
| 103 | +extents_olympic = {} |
| 104 | +for src in sources: |
| 105 | + arr, ds = read_dem(area1_files[src]) |
| 106 | + dems_olympic[src] = arr |
| 107 | + extents_olympic[src] = get_extent(ds) |
| 108 | + ds = None |
| 109 | + |
| 110 | +shapes = {s: arr.shape for s, arr in dems_olympic.items()} |
| 111 | +print("Shapes:", shapes) |
| 112 | +assert len(set(shapes.values())) == 1, f"Shape mismatch: {shapes}" |
| 113 | + |
| 114 | +extent = extents_olympic["COP"] |
| 115 | + |
| 116 | +# %% [markdown] |
| 117 | +# ### Side-by-side elevation maps |
| 118 | + |
| 119 | +# %% |
| 120 | +import matplotlib.pyplot as plt |
| 121 | + |
| 122 | +fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharex=True, sharey=True) |
| 123 | +vmin = min(arr.min() for arr in dems_olympic.values()) |
| 124 | +vmax = max(arr.max() for arr in dems_olympic.values()) |
| 125 | + |
| 126 | +for ax, src in zip(axes, sources): |
| 127 | + im = ax.imshow( |
| 128 | + dems_olympic[src], vmin=vmin, vmax=vmax, cmap="terrain", extent=extent, |
| 129 | + ) |
| 130 | + ax.set_title(LABELS[src]) |
| 131 | + ax.set_xlabel("Longitude") |
| 132 | +axes[0].set_ylabel("Latitude") |
| 133 | +fig.colorbar(im, ax=axes, label="Elevation [m, WGS84]", shrink=0.8) |
| 134 | +fig.suptitle("Olympic Peninsula — Elevation", y=1.02) |
| 135 | +fig.savefig("olympic_elevation.png", dpi=150, bbox_inches="tight") |
| 136 | + |
| 137 | +# %% [markdown] |
| 138 | +# ### Difference map (COP - 3DEP) |
| 139 | +# |
| 140 | +# Positive values = COP higher than 3DEP = canopy height signal. |
| 141 | + |
| 142 | +# %% |
| 143 | +diff_olympic = dems_olympic["COP"] - dems_olympic["3DEP"] |
| 144 | + |
| 145 | +fig, ax = plt.subplots(figsize=(10, 7)) |
| 146 | +im = ax.imshow(diff_olympic, cmap="RdBu_r", vmin=-20, vmax=80, extent=extent) |
| 147 | +ax.set_title("COP - 3DEP (positive = canopy/surface above bare earth)") |
| 148 | +ax.set_xlabel("Longitude") |
| 149 | +ax.set_ylabel("Latitude") |
| 150 | +fig.colorbar(im, ax=ax, label="Difference [m]", shrink=0.8) |
| 151 | +fig.savefig("olympic_diff.png", dpi=150, bbox_inches="tight") |
| 152 | + |
| 153 | +# %% [markdown] |
| 154 | +# ### Histogram of differences |
| 155 | + |
| 156 | +# %% |
| 157 | +fig, ax = plt.subplots(figsize=(8, 4)) |
| 158 | +ax.hist(diff_olympic.compressed(), bins=200, range=(-20, 100), edgecolor="none", alpha=0.8) |
| 159 | +ax.axvline(np.ma.median(diff_olympic), color="red", ls="--", label=f"median = {np.ma.median(diff_olympic):.1f} m") |
| 160 | +ax.axvline(diff_olympic.mean(), color="orange", ls="--", label=f"mean = {diff_olympic.mean():.1f} m") |
| 161 | +ax.set_xlabel("COP - 3DEP [m]") |
| 162 | +ax.set_ylabel("Pixel count") |
| 163 | +ax.set_title("Olympic Peninsula — COP minus 3DEP distribution") |
| 164 | +ax.legend() |
| 165 | +fig.savefig("olympic_hist.png", dpi=150, bbox_inches="tight") |
| 166 | + |
| 167 | +# %% [markdown] |
| 168 | +# ### Summary statistics |
| 169 | + |
| 170 | +# %% |
| 171 | +print(f"{'Statistic':<12} {'Value':>10}") |
| 172 | +print("-" * 24) |
| 173 | +for name, fn in [("mean", np.ma.mean), ("median", np.ma.median), ("std", np.ma.std), |
| 174 | + ("min", np.ma.min), ("max", np.ma.max)]: |
| 175 | + print(f"{name:<12} {fn(diff_olympic):10.2f} m") |
| 176 | + |
| 177 | +# %% [markdown] |
| 178 | +# ## Area 2 — San Francisco (California) |
| 179 | +# |
| 180 | +# Dense urban core with skyscrapers (Salesforce Tower 326 m, Transamerica Pyramid |
| 181 | +# 260 m) surrounded by steep residential hills. COP radar sees building rooftops; |
| 182 | +# 3DEP LiDAR extracts bare earth. We expect large positive COP-3DEP spikes |
| 183 | +# at tall buildings and smaller but still visible effects in residential areas. |
| 184 | + |
| 185 | +# %% |
| 186 | +bbox_sf = (-122.44, 37.76, -122.38, 37.81) |
| 187 | +area2_dir = Path("area2_sf") |
| 188 | +area2_dir.mkdir(exist_ok=True) |
| 189 | + |
| 190 | +area2_files = {s: str(area2_dir / f"sf_{s.lower()}.tif") for s in sources} |
| 191 | + |
| 192 | +for src in sources: |
| 193 | + fetch_dem(bbox_sf, src, area2_files[src]) |
| 194 | + |
| 195 | +# %% |
| 196 | +dems_sf = {} |
| 197 | +extents_sf = {} |
| 198 | +for src in sources: |
| 199 | + arr, ds = read_dem(area2_files[src]) |
| 200 | + dems_sf[src] = arr |
| 201 | + extents_sf[src] = get_extent(ds) |
| 202 | + ds = None |
| 203 | + |
| 204 | +shapes = {s: arr.shape for s, arr in dems_sf.items()} |
| 205 | +print("Shapes:", shapes) |
| 206 | +assert len(set(shapes.values())) == 1, f"Shape mismatch: {shapes}" |
| 207 | + |
| 208 | +extent_sf = extents_sf["COP"] |
| 209 | + |
| 210 | +# %% [markdown] |
| 211 | +# ### Side-by-side elevation maps |
| 212 | + |
| 213 | +# %% |
| 214 | +fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharex=True, sharey=True) |
| 215 | +vmin = min(arr.min() for arr in dems_sf.values()) |
| 216 | +vmax = max(arr.max() for arr in dems_sf.values()) |
| 217 | + |
| 218 | +for ax, src in zip(axes, sources): |
| 219 | + im = ax.imshow( |
| 220 | + dems_sf[src], vmin=vmin, vmax=vmax, cmap="terrain", extent=extent_sf, |
| 221 | + ) |
| 222 | + ax.set_title(LABELS[src]) |
| 223 | + ax.set_xlabel("Longitude") |
| 224 | +axes[0].set_ylabel("Latitude") |
| 225 | +fig.colorbar(im, ax=axes, label="Elevation [m, WGS84]", shrink=0.8) |
| 226 | +fig.suptitle("San Francisco — Elevation", y=1.02) |
| 227 | +fig.savefig("sf_elevation.png", dpi=150, bbox_inches="tight") |
| 228 | + |
| 229 | +# %% [markdown] |
| 230 | +# ### Difference map (COP - 3DEP) |
| 231 | + |
| 232 | +# %% |
| 233 | +diff_sf = dems_sf["COP"] - dems_sf["3DEP"] |
| 234 | + |
| 235 | +fig, ax = plt.subplots(figsize=(10, 7)) |
| 236 | +im = ax.imshow(diff_sf, cmap="RdBu_r", vmin=-10, vmax=50, extent=extent_sf) |
| 237 | +ax.set_title("COP - 3DEP (positive = buildings/surface above bare earth)") |
| 238 | +ax.set_xlabel("Longitude") |
| 239 | +ax.set_ylabel("Latitude") |
| 240 | +fig.colorbar(im, ax=ax, label="Difference [m]", shrink=0.8) |
| 241 | +fig.savefig("sf_diff.png", dpi=150, bbox_inches="tight") |
| 242 | + |
| 243 | +# %% [markdown] |
| 244 | +# ### Histogram of differences |
| 245 | + |
| 246 | +# %% |
| 247 | +fig, ax = plt.subplots(figsize=(8, 4)) |
| 248 | +ax.hist(diff_sf.compressed(), bins=200, range=(-20, 60), edgecolor="none", alpha=0.8) |
| 249 | +ax.axvline(np.ma.median(diff_sf), color="red", ls="--", label=f"median = {np.ma.median(diff_sf):.1f} m") |
| 250 | +ax.axvline(diff_sf.mean(), color="orange", ls="--", label=f"mean = {diff_sf.mean():.1f} m") |
| 251 | +ax.set_xlabel("COP - 3DEP [m]") |
| 252 | +ax.set_ylabel("Pixel count") |
| 253 | +ax.set_title("San Francisco — COP minus 3DEP distribution") |
| 254 | +ax.legend() |
| 255 | +fig.savefig("sf_hist.png", dpi=150, bbox_inches="tight") |
| 256 | + |
| 257 | +# %% [markdown] |
| 258 | +# ### Summary statistics |
| 259 | + |
| 260 | +# %% |
| 261 | +print(f"{'Statistic':<12} {'Value':>10}") |
| 262 | +print("-" * 24) |
| 263 | +for name, fn in [("mean", np.ma.mean), ("median", np.ma.median), ("std", np.ma.std), |
| 264 | + ("min", np.ma.min), ("max", np.ma.max)]: |
| 265 | + print(f"{name:<12} {fn(diff_sf):10.2f} m") |
| 266 | + |
| 267 | +# %% [markdown] |
| 268 | +# ## Discussion |
| 269 | +# |
| 270 | +# The key insight is that COP and 3DEP measure fundamentally different things: |
| 271 | +# |
| 272 | +# - **COP** is a Digital Surface Model (DSM) derived from X-band SAR radar |
| 273 | +# (TanDEM-X). The radar scatters off the first surface it hits: tree canopy, |
| 274 | +# building rooftops, etc. |
| 275 | +# |
| 276 | +# - **3DEP** is a Digital Elevation Model (DEM) derived from classified LiDAR |
| 277 | +# point clouds. Ground returns are separated from vegetation/building returns, |
| 278 | +# so the result is a bare-earth surface. |
| 279 | +# |
| 280 | +# **Olympic Peninsula (forest):** |
| 281 | +# COP should be systematically 30-60+ m higher than 3DEP across forested areas, |
| 282 | +# reflecting the canopy height of old-growth Sitka spruce and western hemlock. |
| 283 | +# The difference map is effectively a proxy for canopy height — a valuable |
| 284 | +# remote sensing product in its own right. |
| 285 | +# |
| 286 | +# **San Francisco (urban):** |
| 287 | +# COP should show elevation spikes at tall buildings in the Financial District |
| 288 | +# and SoMa, while 3DEP shows the true ground level. Residential neighborhoods |
| 289 | +# on hills should show smaller differences (1-2 story buildings at 30 m |
| 290 | +# resolution are mostly averaged out). The difference map reveals the urban |
| 291 | +# built environment. |
| 292 | +# |
| 293 | +# Both areas also have a datum conversion difference: COP converts from EGM2008, |
| 294 | +# while 3DEP converts from NAVD88. This adds a small (~1 m) spatially smooth |
| 295 | +# offset that is dwarfed by the DSM-vs-DEM signal. |
0 commit comments