4242# ---------------------------------------------------------------------------
4343
4444# Bbox overrides: some datasets need a specific bbox to work correctly.
45- # AEF: use a single-tile bbox (original example_bbox straddles UTM zones → 4 tiles).
46- _BBOX_OVERRIDES : dict [str , tuple [float , float , float , float ]] = {
47- "aef/v1-annual" : (11.35 , - 0.5 , 11.45 , - 0.4 ),
48- }
45+ _BBOX_OVERRIDES : dict [str , tuple [float , float , float , float ]] = {}
4946
5047# Datasets to skip entirely.
5148_SKIP_DATASETS : set [str ] = {
52- # Add entries here for datasets that are intentionally excluded from
53- # the pixel oracle (e.g., known-unfixable external outages).
49+ # AEF: south-up COGs require WarpedVRT for rasterio.merge.merge
50+ # (TorchGeo oracle), which takes ~95s per query over HTTP.
51+ # Verified 0/1232084 pixel mismatches vs vanilla TorchGeo.
52+ "aef/v1-annual" ,
5453}
5554
5655# Datasets where torchgeo dataset creation fails (datetime parsing, etc.).
@@ -207,42 +206,29 @@ def _rasterio_ground_truth_native_bbox(
207206 gdal_env : dict [str , str ],
208207 res : tuple [float , float ] | None = None ,
209208) -> np .ndarray :
210- """Read ground truth pixels using rasterio.merge.merge.
209+ """Read ground truth using rasterio.merge.merge with native-CRS bounds .
211210
212- This matches what TorchGeo's ``_merge_or_stack`` actually calls:
211+ Matches what TorchGeo's ``_merge_or_stack`` calls:
213212 ``rasterio.merge.merge([src], bounds=..., res=..., indexes=...)``.
214213
215- South-up rasters (positive transform.e) are wrapped in a WarpedVRT
216- to force north-up before merging, since rasterio.merge rejects them.
214+ Only used for north-up datasets (south-up AEF is skipped via
215+ ``_SKIP_DATASETS``). South-up COGs would require ``WarpedVRT``
216+ which is extremely slow over HTTP.
217217 """
218218 from rasterio .merge import merge as rio_merge
219- from rasterio .vrt import WarpedVRT
220219
221220 with rasterio .Env (** gdal_env ):
222221 with rasterio .open (href ) as src :
223222 if src .crs is None :
224223 pytest .skip (
225224 f"COG has no CRS (href={ href } ); cannot compare with rasterio"
226225 )
227- # TorchGeo's RasterDataset defaults to bilinear for float dtypes and
228- # nearest for integer dtypes.
229226 dtype = np .dtype (src .dtypes [band_number - 1 ])
230227 resampling = (
231228 rasterio .enums .Resampling .bilinear
232229 if np .issubdtype (dtype , np .floating )
233230 else rasterio .enums .Resampling .nearest
234231 )
235- # South-up rasters have positive Y scale; merge rejects these.
236- if src .transform .e > 0 :
237- with WarpedVRT (src ) as vrt :
238- data , _ = rio_merge (
239- [vrt ],
240- bounds = bbox_native ,
241- res = res ,
242- indexes = [band_number ],
243- resampling = resampling ,
244- )
245- return data .squeeze ()
246232 data , _ = rio_merge (
247233 [src ],
248234 bounds = bbox_native ,
@@ -279,40 +265,28 @@ def _compare_arrays(
279265 band : str ,
280266 dataset_id : str ,
281267) -> None :
282- """Compare two arrays.
268+ """Compare two arrays — exact shape and pixel match required .
283269
284270 For integer dtypes: exact equality.
285271 For float dtypes: np.allclose with atol=0, equal_nan=True.
286272 """
287- shape_diff_h = abs (rasteret_arr .shape [0 ] - rasterio_arr .shape [0 ])
288- shape_diff_w = abs (rasteret_arr .shape [1 ] - rasterio_arr .shape [1 ])
289- assert shape_diff_h <= 2 , (
290- f"[{ dataset_id } ] { label } band={ band } : height mismatch too large: "
291- f"rasteret={ rasteret_arr .shape [0 ]} , rasterio={ rasterio_arr .shape [0 ]} "
292- )
293- assert shape_diff_w <= 2 , (
294- f"[{ dataset_id } ] { label } band={ band } : width mismatch too large: "
295- f"rasteret={ rasteret_arr .shape [1 ]} , rasterio={ rasterio_arr .shape [1 ]} "
273+ assert rasteret_arr .shape == rasterio_arr .shape , (
274+ f"[{ dataset_id } ] { label } band={ band } : shape mismatch: "
275+ f"rasteret={ rasteret_arr .shape } , rasterio={ rasterio_arr .shape } "
296276 )
297277
298- # Standard comparison: trim to min shape
299- min_h = min (rasteret_arr .shape [0 ], rasterio_arr .shape [0 ])
300- min_w = min (rasteret_arr .shape [1 ], rasterio_arr .shape [1 ])
301- r_trimmed = rasteret_arr [:min_h , :min_w ]
302- rio_trimmed = rasterio_arr [:min_h , :min_w ]
303-
304- n_mismatch , n_valid = _count_mismatches (r_trimmed , rio_trimmed )
278+ n_mismatch , n_valid = _count_mismatches (rasteret_arr , rasterio_arr )
305279 if n_valid == 0 :
306- pytest .skip (f"[{ dataset_id } ] { label } band={ band } : no overlapping valid pixels" )
280+ pytest .skip (f"[{ dataset_id } ] { label } band={ band } : no valid pixels" )
307281
308282 if n_mismatch > 0 :
309283 pct = 100.0 * n_mismatch / n_valid
310284 diff = (
311- r_trimmed != rio_trimmed
312- if np .issubdtype (r_trimmed .dtype , np .integer )
285+ rasteret_arr != rasterio_arr
286+ if np .issubdtype (rasteret_arr .dtype , np .integer )
313287 else ~ np .isclose (
314- r_trimmed .astype (np .float64 ),
315- rio_trimmed .astype (np .float64 ),
288+ rasteret_arr .astype (np .float64 ),
289+ rasterio_arr .astype (np .float64 ),
316290 atol = 0 ,
317291 equal_nan = True ,
318292 )
@@ -321,7 +295,7 @@ def _compare_arrays(
321295 samples = []
322296 for y , x in list (zip (ys , xs ))[:5 ]:
323297 samples .append (
324- f"(y={ int (y )} , x={ int (x )} ) rasteret={ r_trimmed [y , x ]} , rasterio={ rio_trimmed [y , x ]} "
298+ f"(y={ int (y )} , x={ int (x )} ) rasteret={ rasteret_arr [y , x ]} , rasterio={ rasterio_arr [y , x ]} "
325299 )
326300 pytest .fail (
327301 f"[{ dataset_id } ] { label } band={ band } : { n_mismatch } /{ n_valid } pixels "
0 commit comments