Skip to content

STAC SENTINEL2_L2A SCL band alignment issue after resample_spatial + aggregate_spatial #1556

@JeroenVerstraelen

Description

@JeroenVerstraelen

Minimal code to reproduce:

import openeo

connection = openeo.connect("openeo.dev.warsaw.openeo.dataspace.copernicus.eu").authenticate_oidc()

spatial_extent = {
    "west": 3649940,   # center 3652500 - 256*20/2
    "south": 2094940,  # center 2097500 - 256*20/2
    "east": 3655060,   # center 3652500 + 256*20/2
    "north": 2100060,  # center 2097500 + 256*20/2
    "crs": "EPSG:3035",
}

datacube = connection.load_collection(
    "SENTINEL2_L2A",
    spatial_extent=spatial_extent,
    temporal_extent=["2024-01-01", "2024-01-10"],
    bands=["SCL"],
    max_cloud_cover=90,
)

datacube = datacube.resample_spatial(
    resolution=20,
    projection="EPSG:3035",
    method="near",
)

feature_collections = {
        "geometries": {
            "features": [
                {
                    "geometry": {
                        "coordinates": [
                            [
                                [
                                    2.087900765675578,
                                    41.59887309605375
                                ],
                                [
                                    2.0701235855616362,
                                    41.73630244825719
                                ],
                                [
                                    1.8876825985159764,
                                    41.72163837980604
                                ],
                                [
                                    1.9058605900418466,
                                    41.584247051069255
                                ],
                                [
                                    2.087900765675578,
                                    41.59887309605375
                                ]
                            ]
                        ],
                        "type": "Polygon"
                    },
                    "properties": {},
                    "type": "Feature"
                }
            ],
            "type": "FeatureCollection"
        },
}

datacube = datacube.aggregate_spatial(geometries = feature_collections["geometries"], reducer="mean")

datacube.download(
    outputfile="output_agg_spatial.json",
)

Batch job: j-260224151703475b9f9544194372d97f

Full error:

Traceback (most recent call last):
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 2004, in apply_process
    return process_function(args=ProcessArgs(args, process_id=process_id), env=env)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1513, in aggregate_spatial
    return cube.aggregate_spatial(geometries=geoms, reducer=reduce_pg, target_dimension=target_dimension)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 97](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=97&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 1759](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=1759&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in aggregate_spatial
    return self.zonal_statistics(geometries, visitor.builder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 1839](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=1839&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in zonal_statistics
    self._compute_stats_geotrellis().compute_generic_timeseries_from_datacube(
  File "/usr/local/spark/python/lib/py4j-0.10.9.9-src.zip/py4j/java_gateway.py", line 1362, in __call__
    return_value = get_return_value(
                   ^^^^^^^^^^^^^^^^^
  File "/usr/local/spark/python/lib/py4j-0.10.9.9-src.zip/py4j/protocol.py", line 327, in get_return_value
    raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o2138.compute_generic_timeseries_from_datacube.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.1 failed 4 times, most recent failure: Lost task 0.3 in stage 6.1 (TID 82) (10.42.233.83 executor 4): java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983
	at scala.Predef$.require(Predef.scala:337)
	at geotrellis.layer.LayoutTileSource$.geotrellis$layer$LayoutTileSource$$requireGridAligned(LayoutTileSource.scala:228)
	at geotrellis.layer.LayoutTileSource.(LayoutTileSource.scala:34)
	at org.openeo.geotrellis.layers.LayoutTileSourceFixed.(FileLayerProvider.scala:55)
	at org.openeo.geotrellis.layers.FileLayerProvider.$anonfun$convertToRasterRegions$4(FileLayerProvider.scala:1256)
	at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:143)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
	at org.apache.spark.scheduler.Task.run(Task.scala:147)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
	at java.base/java.lang.Thread.run(Thread.java:1583)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
	at scala.Option.getOrElse(Option.scala:201)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
	at scala.Option.foreach(Option.scala:437)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
	at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1439)
	at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:131)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:192)
	at org.apache.spark.sql.classic.DataFrameWriter.runCommand(DataFrameWriter.scala:622)
	at org.apache.spark.sql.classic.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
	at org.apache.spark.sql.classic.DataFrameWriter.saveInternal(DataFrameWriter.scala:241)
	at org.apache.spark.sql.classic.DataFrameWriter.save(DataFrameWriter.scala:118)
	at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:426)
	at org.openeo.geotrellis.aggregate_polygon.AggregatePolygonProcess.aggregateByDateAndPolygon(AggregatePolygonProcess.scala:373)
	at org.openeo.geotrellis.aggregate_polygon.AggregatePolygonProcess.aggregateSpatialGeneric(AggregatePolygonProcess.scala:322)
	at org.openeo.geotrellis.ComputeStatsGeotrellisAdapter.compute_generic_timeseries_from_datacube(ComputeStatsGeotrellisAdapter.scala:107)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52)
	at java.base/java.lang.reflect.Method.invoke(Method.java:580)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
	at java.base/java.lang.Thread.run(Thread.java:1583)
	Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
		at scala.Option.getOrElse(Option.scala:201)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
		at scala.collection.immutable.List.foreach(List.scala:334)
		at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
		at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
		at scala.Option.foreach(Option.scala:437)
		at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
		at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
		at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
Caused by: java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983
	at scala.Predef$.require(Predef.scala:337)
	at geotrellis.layer.LayoutTileSource$.geotrellis$layer$LayoutTileSource$$requireGridAligned(LayoutTileSource.scala:228)
	at geotrellis.layer.LayoutTileSource.(LayoutTileSource.scala:34)
	at org.openeo.geotrellis.layers.LayoutTileSourceFixed.(FileLayerProvider.scala:55)
	at org.openeo.geotrellis.layers.FileLayerProvider.$anonfun$convertToRasterRegions$4(FileLayerProvider.scala:1256)
	at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:143)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
	at org.apache.spark.scheduler.Task.run(Task.scala:147)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
	... 1 more


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 1177](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=1177&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in start_main
    main(sys.argv)
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 260](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=260&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in main
    run_driver()
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 221](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=221&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run_driver
    run_job(
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/utils.py", line 66](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/utils.py?line=66&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in memory_logging_wrapper
    return function(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  [File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 351](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=351&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run_job
    result = ProcessGraphDeserializer.evaluate(process_graph, env=env, do_dry_run=tracer)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 496, in evaluate
    result = convert_node(top_level_node, env=env)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
    process_result = apply_process(
                     ^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in apply_process
    args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in 
    args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 621, in convert_node
    return [convert_node(x, env=env) for x in processGraph]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 621, in 
    return [convert_node(x, env=env) for x in processGraph]
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
    process_result = apply_process(
                     ^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in apply_process
    args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in 
    args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 607, in convert_node
    return convert_node(processGraph['node'], env=env)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
    process_result = apply_process(
                     ^^^^^^^^^^^^^^
  File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 2014, in apply_process
    raise OpenEOApiException(f"Unexpected error during {process_id!r}: {detail}. The process had these arguments: {args!r} ") from e
openeo_driver.errors.OpenEOApiException: Unexpected error during 'aggregate_spatial': Exception during Spark execution: java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983. The process had these arguments: {'data': GeopysparkDataCube(metadata=GeopysparkCubeMetadata(dimension_names=['x', 'y', 't', 'bands'], band_names=['SCL'])), 'geometries': {'features': [{'geometry': {'coordinates': [[[2.087900765675578, 41.59887309605375], [2.0701235855616362, 41.73630244825719], [1.8876825985159764, 41.72163837980604], [1.9058605900418466, 41.584247051069255], [2.087900765675578, 41.59887309605375]]], 'type': 'Polygon'}, 'properties': {}, 'type': 'Feature'}], 'type': 'FeatureCollection'}, 'reducer': {'process_graph': {'mean1': {'process_id': 'mean', 'arguments': {'data': {'from_parameter': 'data'}}, 'result': True}}}}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions