|
11 | 11 |
|
12 | 12 | import requests |
13 | 13 | import yaml |
| 14 | +from more_itertools import take |
14 | 15 | from pystac import STACValidationError |
15 | 16 | from pystac_client import Client |
16 | 17 | from shapely.geometry import shape |
@@ -286,7 +287,12 @@ def validate_api( |
286 | 287 | print("Validating STAC API - Features conformance class.") |
287 | 288 | validate_collections(root_body, collection, warnings, errors) # type:ignore |
288 | 289 | validate_features( |
289 | | - root_body, conforms_to, collection, warnings, errors # type:ignore |
| 290 | + root_body, |
| 291 | + conforms_to, |
| 292 | + collection, |
| 293 | + geometry, |
| 294 | + warnings, |
| 295 | + errors, |
290 | 296 | ) |
291 | 297 | else: |
292 | 298 | print("Skipping STAC API - Features conformance class.") |
@@ -320,8 +326,8 @@ def validate_api( |
320 | 326 |
|
321 | 327 |
|
322 | 328 | def link_by_rel( |
323 | | - links: Optional[List[Dict[str, str]]], rel: str |
324 | | -) -> Optional[Dict[str, str]]: |
| 329 | + links: Optional[List[Dict[str, Any]]], rel: str |
| 330 | +) -> Optional[Dict[str, Any]]: |
325 | 331 | if not links: |
326 | 332 | return None |
327 | 333 | else: |
@@ -455,12 +461,21 @@ def validate_collections( |
455 | 461 | def validate_features( |
456 | 462 | root_body: Dict[str, Any], |
457 | 463 | conforms_to: List[str], |
458 | | - collection: str, |
| 464 | + collection: Optional[str], |
| 465 | + geometry: Optional[str], |
459 | 466 | warnings: List[str], |
460 | 467 | errors: List[str], |
461 | 468 | ) -> None: |
462 | 469 | print("WARNING: Features validation is not yet fully implemented.") |
463 | 470 |
|
| 471 | + if not geometry: |
| 472 | + errors.append("Geometry parameter required for running Features validations.") |
| 473 | + return |
| 474 | + |
| 475 | + if not collection: |
| 476 | + errors.append("Collection parameter required for running Features validations.") |
| 477 | + return |
| 478 | + |
464 | 479 | if conforms_to and ( |
465 | 480 | req_ccs := [ |
466 | 481 | x |
@@ -524,24 +539,42 @@ def validate_features( |
524 | 539 | f"service-desc ({conformance}): must return JSON, instead got non-JSON text" |
525 | 540 | ) |
526 | 541 |
|
527 | | - # this is hard to figure out, since it's likely a mistake, but most apis can't undo it for |
528 | | - # backwards-compat reasons |
529 | | - if not (link_by_rel(root_links, "collections") is None): |
530 | | - warnings.append( |
531 | | - "/ Link[rel=collections] is a non-standard relation. Use Link[rel=data instead]" |
532 | | - ) |
| 542 | + # this is hard to figure out, since it's likely a mistake, but most apis can't undo it for |
| 543 | + # backwards-compat reasons |
| 544 | + if not (link_by_rel(root_links, "collections") is None): |
| 545 | + warnings.append( |
| 546 | + "/ Link[rel=collections] is a non-standard relation. Use Link[rel=data instead]" |
| 547 | + ) |
533 | 548 |
|
534 | | - # todo: validate items exists |
| 549 | + # todo: validate items exists in the collection |
535 | 550 |
|
536 | | - if not (collections_url := link_by_rel(root_links, "data")): |
537 | | - errors.append("/: Link[rel=data] must href /collections") |
| 551 | + if not (collections_url := link_by_rel(root_links, "data")): |
| 552 | + errors.append("/: Link[rel=data] must href /collections") |
| 553 | + else: |
| 554 | + item_url = f"{collections_url['href']}/{collection}/items/non-existent-item" |
| 555 | + r = requests.get(item_url) |
| 556 | + if r.status_code != 404: |
| 557 | + errors.append( |
| 558 | + f"[Features] GET {item_url} (non-existent item) returned status code {r.status_code} instead of 404" |
| 559 | + ) |
| 560 | + |
| 561 | + if not (collections_url := link_by_rel(root_links, "data")): |
| 562 | + errors.append( |
| 563 | + "/: Link[rel=data] must href /collections, cannot run pagination test" |
| 564 | + ) |
| 565 | + else: |
| 566 | + if not (self_link := link_by_rel(root_links, "self")): |
| 567 | + errors.append("/: Link[rel=self] missing") |
538 | 568 | else: |
539 | | - item_url = f"{collections_url['href']}/{collection}/items/non-existent-item" |
540 | | - r = requests.get(item_url) |
541 | | - if r.status_code != 404: |
542 | | - errors.append( |
543 | | - f"[Features] GET {item_url} (non-existent item) returned status code {r.status_code} instead of 404" |
544 | | - ) |
| 569 | + validate_item_pagination( |
| 570 | + root_url=self_link.get("href", ""), |
| 571 | + search_url=f"{collections_url['href']}/{collection}/items", |
| 572 | + collection=None, |
| 573 | + geometry=geometry, |
| 574 | + post=False, |
| 575 | + errors=errors, |
| 576 | + use_pystac_client=False, |
| 577 | + ) |
545 | 578 |
|
546 | 579 | # if any(cc_features_fields_regex.fullmatch(x) for x in conforms_to): |
547 | 580 | # print("STAC API - Features - Fields extension conformance class found.") |
@@ -631,6 +664,15 @@ def validate_item_search( |
631 | 664 | geometry=geometry, |
632 | 665 | ) |
633 | 666 |
|
| 667 | + validate_item_pagination( |
| 668 | + root_url=root_url, |
| 669 | + search_url=search_url, |
| 670 | + collection=collection, |
| 671 | + geometry=geometry, |
| 672 | + post=post, |
| 673 | + errors=errors, |
| 674 | + ) |
| 675 | + |
634 | 676 | # if any(cc_item_search_fields_regex.fullmatch(x) for x in conforms_to): |
635 | 677 | # print("STAC API - Item Search - Fields extension conformance class found.") |
636 | 678 | # |
@@ -1034,6 +1076,166 @@ def validate_item_search_bbox_xor_intersects( |
1034 | 1076 | ) |
1035 | 1077 |
|
1036 | 1078 |
|
| 1079 | +def validate_item_pagination( |
| 1080 | + root_url: str, |
| 1081 | + search_url: str, |
| 1082 | + collection: Optional[str], |
| 1083 | + geometry: str, |
| 1084 | + post: bool, |
| 1085 | + errors: List[str], |
| 1086 | + use_pystac_client: bool = True, |
| 1087 | +) -> None: |
| 1088 | + url = f"{search_url}?limit=1" |
| 1089 | + if collection is not None: |
| 1090 | + url = f"{url}&collections={collection}" |
| 1091 | + |
| 1092 | + r = requests.get(url) |
| 1093 | + if not r.status_code == 200: |
| 1094 | + errors.append( |
| 1095 | + "STAC API - Item Search GET pagination get failed for initial request" |
| 1096 | + ) |
| 1097 | + else: |
| 1098 | + try: |
| 1099 | + first_body = r.json() |
| 1100 | + if link := link_by_rel(first_body.get("links"), "next"): |
| 1101 | + if (method := link.get("method")) and method != "GET": |
| 1102 | + errors.append( |
| 1103 | + f"STAC API - Item Search GET pagination first request 'next' link relation has method {method} instead of 'GET'" |
| 1104 | + ) |
| 1105 | + |
| 1106 | + next_url = link.get("href") |
| 1107 | + if next_url is None: |
| 1108 | + errors.append( |
| 1109 | + "STAC API - Item Search GET pagination first request 'next' link relation missing href" |
| 1110 | + ) |
| 1111 | + else: |
| 1112 | + if url == next_url: |
| 1113 | + errors.append( |
| 1114 | + "STAC API - Item Search GET pagination next href same as first url" |
| 1115 | + ) |
| 1116 | + |
| 1117 | + r = requests.get(next_url) |
| 1118 | + if not r.status_code == 200: |
| 1119 | + errors.append( |
| 1120 | + f"STAC API - Item Search GET pagination get failed for next url {next_url}" |
| 1121 | + ) |
| 1122 | + else: |
| 1123 | + errors.append( |
| 1124 | + "STAC API - Item Search GET pagination first request had no 'next' link relation" |
| 1125 | + ) |
| 1126 | + |
| 1127 | + except json.decoder.JSONDecodeError: |
| 1128 | + errors.append( |
| 1129 | + f"STAC API - Item Search GET pagination response failed {url}" |
| 1130 | + ) |
| 1131 | + |
| 1132 | + max_items = 100 |
| 1133 | + |
| 1134 | + # todo: how to paginate over items, not just search? |
| 1135 | + |
| 1136 | + if use_pystac_client: |
| 1137 | + client = Client.open(root_url) |
| 1138 | + search = client.search( |
| 1139 | + method="GET", collections=[collection], max_items=max_items, limit=5 |
| 1140 | + ) |
| 1141 | + |
| 1142 | + items = list(search.items_as_dicts()) |
| 1143 | + |
| 1144 | + if len(items) > max_items: |
| 1145 | + errors.append( |
| 1146 | + "STAC API - Item Search GET pagination - more than max items returned from paginating" |
| 1147 | + ) |
| 1148 | + |
| 1149 | + if len(items) > len({item["id"] for item in items}): |
| 1150 | + errors.append( |
| 1151 | + "STAC API - Item Search GET pagination - duplicate items returned from paginating items" |
| 1152 | + ) |
| 1153 | + |
| 1154 | + # GET paging has a problem with intersects https://github.com/stac-utils/pystac-client/issues/335 |
| 1155 | + # search = client.search(method="GET", collections=[collection], intersects=geometry) |
| 1156 | + # if len(list(take(20000, search.items_as_dicts()))) == 20000: |
| 1157 | + # errors.append( |
| 1158 | + # f"STAC API - Item Search GET pagination - paged through 20,000 results. This could mean the last page " |
| 1159 | + # f"of results references itself, or your collection and geometry combination has too many results." |
| 1160 | + # ) |
| 1161 | + |
| 1162 | + if post: |
| 1163 | + initial_json_body = {"limit": 1, "collections": [collection]} |
| 1164 | + r = requests.post(search_url, json=initial_json_body) |
| 1165 | + if not r.status_code == 200: |
| 1166 | + errors.append( |
| 1167 | + "STAC API - Item Search POST pagination get failed for initial request" |
| 1168 | + ) |
| 1169 | + else: |
| 1170 | + try: |
| 1171 | + first_body = r.json() |
| 1172 | + if link := link_by_rel(first_body.get("links"), "next"): |
| 1173 | + if (method := link.get("method")) and method != "POST": |
| 1174 | + errors.append( |
| 1175 | + f"STAC API - Item Search POST pagination first request 'next' link relation has method {method} instead of 'POST'" |
| 1176 | + ) |
| 1177 | + |
| 1178 | + next_url = link.get("href") |
| 1179 | + if next_url is None: |
| 1180 | + errors.append( |
| 1181 | + "STAC API - Item Search POST pagination first request 'next' link relation missing href" |
| 1182 | + ) |
| 1183 | + else: |
| 1184 | + if url == next_url: |
| 1185 | + errors.append( |
| 1186 | + "STAC API - Item Search POST pagination next href same as first url" |
| 1187 | + ) |
| 1188 | + |
| 1189 | + next_body: Dict[str, Any] = link.get("body", {}) |
| 1190 | + if not link.get("merge", False): |
| 1191 | + second_json_body = next_body |
| 1192 | + else: |
| 1193 | + second_json_body = initial_json_body |
| 1194 | + second_json_body.update(next_body) |
| 1195 | + |
| 1196 | + r = requests.post(next_url, json=second_json_body) |
| 1197 | + if not r.status_code == 200: |
| 1198 | + errors.append( |
| 1199 | + f"STAC API - Item Search POST pagination get failed for next url {next_url} with body {second_json_body}" |
| 1200 | + ) |
| 1201 | + else: |
| 1202 | + r.json() |
| 1203 | + else: |
| 1204 | + errors.append( |
| 1205 | + "STAC API - Item Search POST pagination first request had no 'next' link relation" |
| 1206 | + ) |
| 1207 | + |
| 1208 | + except json.decoder.JSONDecodeError: |
| 1209 | + errors.append("STAC API - Item Search POST pagination response failed") |
| 1210 | + |
| 1211 | + max_items = 100 |
| 1212 | + client = Client.open(root_url) |
| 1213 | + search = client.search( |
| 1214 | + method="POST", collections=[collection], max_items=max_items, limit=5 |
| 1215 | + ) |
| 1216 | + |
| 1217 | + items = list(search.items_as_dicts()) |
| 1218 | + |
| 1219 | + if len(items) > max_items: |
| 1220 | + errors.append( |
| 1221 | + "STAC API - Item Search POST pagination - more than max items returned from paginating" |
| 1222 | + ) |
| 1223 | + |
| 1224 | + if len(items) > len({item["id"] for item in items}): |
| 1225 | + errors.append( |
| 1226 | + "STAC API - Item Search POST pagination - duplicate items returned from paginating items" |
| 1227 | + ) |
| 1228 | + |
| 1229 | + search = client.search( |
| 1230 | + method="POST", collections=[collection], intersects=geometry |
| 1231 | + ) |
| 1232 | + if len(list(take(20000, search.items_as_dicts()))) == 20000: |
| 1233 | + errors.append( |
| 1234 | + "STAC API - Item Search POST pagination - paged through 20,000 results. This could mean the last page " |
| 1235 | + "of results references itself, or your collection and geometry combination has too many results." |
| 1236 | + ) |
| 1237 | + |
| 1238 | + |
1037 | 1239 | def validate_item_search_intersects( |
1038 | 1240 | search_url: str, collection: str, post: bool, errors: List[str], geometry: str |
1039 | 1241 | ) -> None: |
|
0 commit comments