-
Notifications
You must be signed in to change notification settings - Fork 9
STAC SENTINEL2_L2A SCL band alignment issue after resample_spatial + aggregate_spatial #1556
Copy link
Copy link
Open
Description
Minimal code to reproduce:
import openeo
connection = openeo.connect("openeo.dev.warsaw.openeo.dataspace.copernicus.eu").authenticate_oidc()
spatial_extent = {
"west": 3649940, # center 3652500 - 256*20/2
"south": 2094940, # center 2097500 - 256*20/2
"east": 3655060, # center 3652500 + 256*20/2
"north": 2100060, # center 2097500 + 256*20/2
"crs": "EPSG:3035",
}
datacube = connection.load_collection(
"SENTINEL2_L2A",
spatial_extent=spatial_extent,
temporal_extent=["2024-01-01", "2024-01-10"],
bands=["SCL"],
max_cloud_cover=90,
)
datacube = datacube.resample_spatial(
resolution=20,
projection="EPSG:3035",
method="near",
)
feature_collections = {
"geometries": {
"features": [
{
"geometry": {
"coordinates": [
[
[
2.087900765675578,
41.59887309605375
],
[
2.0701235855616362,
41.73630244825719
],
[
1.8876825985159764,
41.72163837980604
],
[
1.9058605900418466,
41.584247051069255
],
[
2.087900765675578,
41.59887309605375
]
]
],
"type": "Polygon"
},
"properties": {},
"type": "Feature"
}
],
"type": "FeatureCollection"
},
}
datacube = datacube.aggregate_spatial(geometries = feature_collections["geometries"], reducer="mean")
datacube.download(
outputfile="output_agg_spatial.json",
)
Batch job: j-260224151703475b9f9544194372d97f
Full error:
Traceback (most recent call last):
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 2004, in apply_process
return process_function(args=ProcessArgs(args, process_id=process_id), env=env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1513, in aggregate_spatial
return cube.aggregate_spatial(geometries=geoms, reducer=reduce_pg, target_dimension=target_dimension)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 97](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=97&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 1759](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=1759&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in aggregate_spatial
return self.zonal_statistics(geometries, visitor.builder)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py", line 1839](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/geopysparkdatacube.py?line=1839&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in zonal_statistics
self._compute_stats_geotrellis().compute_generic_timeseries_from_datacube(
File "/usr/local/spark/python/lib/py4j-0.10.9.9-src.zip/py4j/java_gateway.py", line 1362, in __call__
return_value = get_return_value(
^^^^^^^^^^^^^^^^^
File "/usr/local/spark/python/lib/py4j-0.10.9.9-src.zip/py4j/protocol.py", line 327, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o2138.compute_generic_timeseries_from_datacube.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.1 failed 4 times, most recent failure: Lost task 0.3 in stage 6.1 (TID 82) (10.42.233.83 executor 4): java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983
at scala.Predef$.require(Predef.scala:337)
at geotrellis.layer.LayoutTileSource$.geotrellis$layer$LayoutTileSource$$requireGridAligned(LayoutTileSource.scala:228)
at geotrellis.layer.LayoutTileSource.(LayoutTileSource.scala:34)
at org.openeo.geotrellis.layers.LayoutTileSourceFixed.(FileLayerProvider.scala:55)
at org.openeo.geotrellis.layers.FileLayerProvider.$anonfun$convertToRasterRegions$4(FileLayerProvider.scala:1256)
at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:143)
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:147)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
at scala.Option.foreach(Option.scala:437)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1439)
at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:131)
at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:192)
at org.apache.spark.sql.classic.DataFrameWriter.runCommand(DataFrameWriter.scala:622)
at org.apache.spark.sql.classic.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
at org.apache.spark.sql.classic.DataFrameWriter.saveInternal(DataFrameWriter.scala:241)
at org.apache.spark.sql.classic.DataFrameWriter.save(DataFrameWriter.scala:118)
at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:426)
at org.openeo.geotrellis.aggregate_polygon.AggregatePolygonProcess.aggregateByDateAndPolygon(AggregatePolygonProcess.scala:373)
at org.openeo.geotrellis.aggregate_polygon.AggregatePolygonProcess.aggregateSpatialGeneric(AggregatePolygonProcess.scala:322)
at org.openeo.geotrellis.ComputeStatsGeotrellisAdapter.compute_generic_timeseries_from_datacube(ComputeStatsGeotrellisAdapter.scala:107)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
at java.base/java.lang.Thread.run(Thread.java:1583)
Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
at scala.Option.foreach(Option.scala:437)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
Caused by: java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983
at scala.Predef$.require(Predef.scala:337)
at geotrellis.layer.LayoutTileSource$.geotrellis$layer$LayoutTileSource$$requireGridAligned(LayoutTileSource.scala:228)
at geotrellis.layer.LayoutTileSource.(LayoutTileSource.scala:34)
at org.openeo.geotrellis.layers.LayoutTileSourceFixed.(FileLayerProvider.scala:55)
at org.openeo.geotrellis.layers.FileLayerProvider.$anonfun$convertToRasterRegions$4(FileLayerProvider.scala:1256)
at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:594)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:608)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:143)
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:147)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
... 1 more
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 1177](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=1177&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in start_main
main(sys.argv)
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 260](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=260&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in main
run_driver()
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 221](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=221&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run_driver
run_job(
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/utils.py", line 66](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/utils.py?line=66&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in memory_logging_wrapper
return function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
[File "/opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py", line 351](vscode://file//opt/venv/lib64/python3.11/site-packages/openeogeotrellis/deploy/batch_job.py?line=351&workspace=/home/jeroen/Projects/openEO/openeo-geopyspark-driver), in run_job
result = ProcessGraphDeserializer.evaluate(process_graph, env=env, do_dry_run=tracer)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 496, in evaluate
result = convert_node(top_level_node, env=env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
process_result = apply_process(
^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in apply_process
args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in
args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 621, in convert_node
return [convert_node(x, env=env) for x in processGraph]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 621, in
return [convert_node(x, env=env) for x in processGraph]
^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
process_result = apply_process(
^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in apply_process
args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1984, in
args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())}
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 607, in convert_node
return convert_node(processGraph['node'], env=env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 580, in convert_node
process_result = apply_process(
^^^^^^^^^^^^^^
File "/opt/venv/lib64/python3.11/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 2014, in apply_process
raise OpenEOApiException(f"Unexpected error during {process_id!r}: {detail}. The process had these arguments: {args!r} ") from e
openeo_driver.errors.OpenEOApiException: Unexpected error during 'aggregate_spatial': Exception during Spark execution: java.lang.IllegalArgumentException: requirement failed: x-aligned: offset by CellSize(20.0,20.0) 0.5000035031232983. The process had these arguments: {'data': GeopysparkDataCube(metadata=GeopysparkCubeMetadata(dimension_names=['x', 'y', 't', 'bands'], band_names=['SCL'])), 'geometries': {'features': [{'geometry': {'coordinates': [[[2.087900765675578, 41.59887309605375], [2.0701235855616362, 41.73630244825719], [1.8876825985159764, 41.72163837980604], [1.9058605900418466, 41.584247051069255], [2.087900765675578, 41.59887309605375]]], 'type': 'Polygon'}, 'properties': {}, 'type': 'Feature'}], 'type': 'FeatureCollection'}, 'reducer': {'process_graph': {'mean1': {'process_id': 'mean', 'arguments': {'data': {'from_parameter': 'data'}}, 'result': True}}}}
Reactions are currently unavailable