diff --git a/CHANGELOG.md b/CHANGELOG.md index e9884dfb..0619fbec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html) - Obstore and VirtualiZarr should not be required([#1097](https://github.com/nsidc/earthaccess/issues/1097))([@betolink](https://github.com/betolink)) +## Added: + +- Multi-feature search support (multi_bounding_box, multipolygon, multipoint, multicircle, multiline) from a single API call following the [CMR](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#g-polygon) standards. + ## [v0.15.0] - 2025-09-16 ### Changed diff --git a/docs/user_guide/search.md b/docs/user_guide/search.md index 988bc0ed..f6117f44 100644 --- a/docs/user_guide/search.md +++ b/docs/user_guide/search.md @@ -394,6 +394,63 @@ results = earthaccess.search_data( ) ``` +### Multi feature support + +`earthaccess` supports multi-feature searches for various spatial types, including polygons, points, lines, and more. To query multiple features of the same type, use the prefix multi followed by the feature type, such as: + +- `multi_bounding_box` +- `multipolygon` +- `multipoint` +- `multicircle` +- `multiline` + +When using `earthaccess.search_data`, the query will return granules that intersect any of the specified features. For example, to search using multiple polygons, you can structure your code as follows: + +```python +polygons = [ + # same polygon used in the single query + [ + (-49.64860422604741, 69.23553485026147), + (-49.667876114626296, 69.07309059285959), + (-49.1722491331669, 69.03175841820749), + (-47.53552489113113, 69.03872918462292), + (-47.35616491854395, 69.22149993224824), + (-48.1447695277283, 69.33507802083219), + (-49.178671242118384, 69.29455117736225), + (-49.64860422604741, 69.23553485026147), + ], + # a second polygon over the Eyjafjallajökull volcano in Iceland + [ + (-19.61490317965708, 63.63370144220765), + (-19.61490317965708, 63.61370144220765), + (-19.59490317965708, 63.61370144220765), + (-19.59490317965708, 63.63370144220765), + (-19.61490317965708, 63.63370144220765), + ] +] + +results = earthaccess.search_data( + short_name="ATL06", + multipolygon=polygons, + ) +``` + +Similarly, to query multiple points, you can use: + +```python +lon_lat_pairs = [ + (-105.25303896425012, 40.01259873086735), + (-96.123457744456789, 19.98765455634521) +] + +results = earthaccess.search_data( + short_name="ATL06", + multipoint=lon_lat_pairs, + ) +``` + +This method enables efficient querying of granules that intersect with any of the specified spatial features without the need for multiple API calls. + ## Search for services NASA Earthdata provides services that you can use to transform data before you download it. Transformations include converting data files to a different file format, subsetting data by spatial extent, time range or variable, reprojecting or transforming data to a different coordinate reference system (CRS) from the one it is stored in. Not all datasets have services and not all transformation services are available for a given dataset. diff --git a/earthaccess/search.py b/earthaccess/search.py index c1e56a8f..7f6ccdf2 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -851,6 +851,26 @@ def point(self, lon: FloatLike, lat: FloatLike) -> Self: """ return super().point(lon, lat) + def multipoint(self, lon_lat_pairs: Sequence[PointLike]) -> Self: + """Filter by granules that include multiple geographic points. + + Parameters: + lon_lat_pairs: sequence of (lon, lat) tuples + + Returns: + self + """ + points = [] + + for x, y in lon_lat_pairs: + self.point(x, y) + points.append(self.params.pop('point')[0]) + + self.params['point'] = points + self.options['point'] = {'or': True} + return self + + @override def polygon(self, coordinates: Sequence[PointLike]) -> Self: """Filter by granules that overlap a polygonal area. Must be used in combination @@ -869,6 +889,25 @@ def polygon(self, coordinates: Sequence[PointLike]) -> Self: """ return super().polygon(coordinates) + def multipolygon(self, multi_coordinates: Sequence[Sequence[PointLike]]) -> Self: + """Filter by granules that overlap any polygonal area from an input list. + + Parameters: + multi_coordinates: list of lists of (lon, lat) tuples + + Returns: + self + """ + polygons = [] + + for polygon in multi_coordinates: + self.polygon(polygon) + polygons.append(self.params.pop('polygon')) + + self.params['polygon'] = polygons + self.options['polygon'] = {'or': True} + return self + @override def bounding_box( self, @@ -895,6 +934,25 @@ def bounding_box( return super().bounding_box( lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat ) + + def multi_bounding_box(self, boxes: Sequence[Tuple[FloatLike, FloatLike, FloatLike, FloatLike]]) -> Self: + """Filter by granules that overlap any bounding box from an input list. + + Parameters: + boxes: list of tuples of (lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat) + + Returns: + self + """ + bboxes = [] + + for box in boxes: + self.bounding_box(*box) + bboxes.append(self.params.pop('bounding_box')) + + self.params['bounding_box'] = bboxes + self.options['bounding_box'] = {'or': True} + return self @override def line(self, coordinates: Sequence[PointLike]) -> Self: @@ -913,6 +971,44 @@ def line(self, coordinates: Sequence[PointLike]) -> Self: pairs, or a coordinate could not be converted to a float. """ return super().line(coordinates) + + def multiline(self, multi_coordinates: Sequence[Sequence[PointLike]]) -> Self: + """Filter by granules that overlap any series of connected points from an input list. + + Parameters: + multi_coordinates: a list of lists of (lon, lat) tuples + + Returns: + self + """ + lines = [] + + for line in multi_coordinates: + self.line(line) + lines.append(self.params.pop('line')) + + self.params['line'] = lines + self.options['line'] = {'or': True} + return self + + def multicircle(self, multi_circles: Sequence[Tuple[FloatLike,FloatLike,FloatLike]]) -> Self: + """Filter by granules that overlap any circle from an input list. + + Parameters: + multi_circles: list of tuples of (lon, lat, radius) + + Returns: + self + """ + circles = [] + + for circle in multi_circles: + self.circle(*circle) + circles.append(self.params.pop('circle')) + + self.params['circle'] = circles + self.options['circle'] = {'or': True} + return self @override def downloadable(self, downloadable: bool = True) -> Self: diff --git a/tests/integration/test_search_data.py b/tests/integration/test_search_data.py index 29f18d89..e5f533e2 100644 --- a/tests/integration/test_search_data.py +++ b/tests/integration/test_search_data.py @@ -260,3 +260,141 @@ def test_search_data_by_short_name_with_line(): count=expected_count, ) assert len(results) > 0 + + +@pytest.mark.skipif(SKIP_THIS, reason="calls python-cmr, set SKIP_THIS=False to run") +def test_search_data_by_short_name_with_multipoint(): + """Tests searching for granules with multiple points.""" + # Test with 2 points + multipoint_coords = [ + (-105.61708725711999, 36.38510879364757), # Taos, NM + (-112.73, 42.5), # Idaho/Utah area + ] + results = earthaccess.search_data( + short_name="MOD10A1", + multipoint=multipoint_coords, + count=expected_count, + ) + assert len(results) > 0 + + # Verify that multipoint returns more results than single point + single_point_results = earthaccess.search_data( + short_name="MOD10A1", + point=multipoint_coords[0], + count=expected_count, + ) + # Note: multipoint uses OR logic, so should generally return >= single point results + assert len(results) >= len(single_point_results) + + +@pytest.mark.skipif(SKIP_THIS, reason="calls python-cmr, set SKIP_THIS=False to run") +def test_search_data_by_short_name_with_multipolygon(): + """Tests searching for granules with multiple polygons.""" + # Second polygon near Greenland + polygon2 = [ + (-45.0, 70.0), + (-45.0, 68.0), + (-40.0, 68.0), + (-40.0, 70.0), + (-45.0, 70.0), + ] + + multipolygon_coords = [polygon, polygon2] + + results = earthaccess.search_data( + short_name="ATL06", + multipolygon=multipolygon_coords, + count=expected_count, + ) + assert len(results) > 0 + + # Verify that multipolygon returns more results than single polygon + single_polygon_results = earthaccess.search_data( + short_name="ATL06", + polygon=polygon, + count=expected_count, + ) + # Note: multipolygon uses OR logic, so should generally return >= single polygon results + assert len(results) >= len(single_polygon_results) + + +@pytest.mark.skipif(SKIP_THIS, reason="calls python-cmr, set SKIP_THIS=False to run") +def test_search_data_by_short_name_with_multi_bounding_box(): + """Tests searching for granules with multiple bounding boxes.""" + # Greenland area bounding boxes + bbox1 = (-46.5, 61.0, -42.5, 63.0) # Original bbox from existing test + bbox2 = (-50.0, 65.0, -45.0, 68.0) # Another Greenland area + + multi_bboxes = [bbox1, bbox2] + + results = earthaccess.search_data( + short_name="ATL06", + multi_bounding_box=multi_bboxes, + count=expected_count, + ) + assert len(results) > 0 + + # Verify that multi_bounding_box returns more results than single bbox + single_bbox_results = earthaccess.search_data( + short_name="ATL06", + bounding_box=bbox1, + count=expected_count, + ) + # Note: multi_bounding_box uses OR logic, so should generally return >= single bbox results + assert len(results) >= len(single_bbox_results) + + +@pytest.mark.skipif(SKIP_THIS, reason="calls python-cmr, set SKIP_THIS=False to run") +def test_search_data_by_short_name_with_multiline(): + """Tests searching for granules with multiple lines.""" + # Second line in a different area + line2 = [ + (-120.0, 40.0), + (-119.0, 41.0), + (-118.0, 42.0), + (-117.0, 43.0), + ] + + multiline_coords = [line, line2] + + results = earthaccess.search_data( + short_name="ATL08", + multiline=multiline_coords, + count=expected_count, + ) + assert len(results) > 0 + + # Verify that multiline returns more results than single line + single_line_results = earthaccess.search_data( + short_name="ATL08", + line=line, + count=expected_count, + ) + # Note: multiline uses OR logic, so should generally return >= single line results + assert len(results) >= len(single_line_results) + + +@pytest.mark.skipif(SKIP_THIS, reason="calls python-cmr, set SKIP_THIS=False to run") +def test_search_data_by_short_name_with_multicircle(): + """Tests searching for granules with multiple circles.""" + # Define two circles + circle1 = (-105.61708725711999, 36.38510879364757, 1000.0) # Taos, NM + circle2 = (-110.0, 35.0, 1500.0) # Another area + + multicircle_coords = [circle1, circle2] + + results = earthaccess.search_data( + short_name="ATL03", + multicircle=multicircle_coords, + count=expected_count, + ) + assert len(results) > 0 + + # Verify that multicircle returns more results than single circle + single_circle_results = earthaccess.search_data( + short_name="ATL03", + circle=circle1, + count=expected_count, + ) + # Note: multicircle uses OR logic, so should generally return >= single circle results + assert len(results) >= len(single_circle_results)