From 5ead8aa9fe431c6d423576fd3c4222c847041c76 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Mon, 26 Jun 2023 10:27:10 -0600 Subject: [PATCH 1/8] Add ability to read other method types than GET for OAS ingest --- .../api/entities/dataproduct/dataproduct.py | 3 - .../datahub/cli/specific/dataproduct_cli.py | 1 - .../source/bigquery_v2/bigquery_audit.py | 1 - .../ingestion/source/bigquery_v2/usage.py | 1 - .../source/data_lake_common/config.py | 1 - .../ingestion/source/looker/looker_config.py | 1 - .../ingestion/source/looker/looker_source.py | 1 - .../src/datahub/ingestion/source/nifi.py | 1 - .../src/datahub/ingestion/source/openapi.py | 6 + .../ingestion/source/openapi_parser.py | 130 +++++++++--------- .../ingestion/source/powerbi/powerbi.py | 1 - .../source/snowflake/snowflake_query.py | 1 - .../source/state/entity_removal_state.py | 6 +- .../transformer/generic_aspect_transformer.py | 3 +- .../integration/azure_ad/test_azure_ad.py | 1 - .../tests/integration/looker/test_looker.py | 1 - .../tests/integration/okta/test_okta.py | 3 - .../tests/integration/powerbi/test_powerbi.py | 1 - .../integration/superset/test_superset.py | 2 - .../tests/unit/test_nifi_source.py | 4 - .../tests/unit/test_unity_catalog_config.py | 1 - 21 files changed, 78 insertions(+), 92 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 04f12b4f61d1e1..b48d4e259a7fa9 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -225,7 +225,6 @@ def _generate_properties_mcp( def generate_mcp( self, upsert: bool ) -> Iterable[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]: - if self._resolved_domain_urn is None: raise Exception( f"Unable to generate MCP-s because we were unable to resolve the domain {self.domain} to an urn." @@ -440,7 +439,6 @@ def patch_yaml( original_dataproduct: DataProduct, output_file: Path, ) -> bool: - update_needed = False if not original_dataproduct._original_yaml_dict: raise Exception("Original Data Product was not loaded from yaml") @@ -523,7 +521,6 @@ def to_yaml( self, file: Path, ) -> None: - with open(file, "w") as fp: yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip) yaml.indent(mapping=2, sequence=4, offset=2) diff --git a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py index 5d6c65512354a2..bdb08c353f8dd2 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py @@ -56,7 +56,6 @@ def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> def _print_diff(orig_file, new_file): - with open(orig_file) as fp: orig_lines = fp.readlines() with open(new_file) as fp: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index 379a773e248faa..2c2512da9c0741 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -599,7 +599,6 @@ def from_query_event( query_event: QueryEvent, debug_include_full_payloads: bool = False, ) -> "ReadEvent": - readEvent = ReadEvent( actor_email=query_event.actor_email, timestamp=query_event.timestamp, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 1081dd8eec1ec1..27888d48e48b4c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -664,7 +664,6 @@ def _get_exported_bigquery_audit_metadata( def _get_bigquery_log_entries_via_gcp_logging( self, client: GCPLoggingClient, limit: Optional[int] = None ) -> Iterable[AuditLogEntry]: - filter = self._generate_filter(BQ_AUDIT_V2) logger.debug(filter) diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py index 5f88cf0234947a..ede7d3c3c56959 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py @@ -7,7 +7,6 @@ class PathSpecsConfigMixin(ConfigModel): - path_specs: List[PathSpec] = Field( description="List of PathSpec. See [below](#path-spec) the details about PathSpec" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 455614c758bb93..4da30dfba93b64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -202,7 +202,6 @@ def external_url_defaults_to_api_config_base_url( def stateful_ingestion_should_be_enabled( cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any] ) -> Optional[bool]: - stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast( StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion") ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 8297a0aa8efa7e..9a6d091693a3f9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -1128,7 +1128,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def emit_independent_looks_mcp( self, dashboard_element: LookerDashboardElement ) -> Iterable[MetadataWorkUnit]: - yield from auto_workunit( stream=self._make_chart_metadata_events( dashboard_element=dashboard_element, diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 27cd5aeb3c68d0..5f820b22dad515 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -142,7 +142,6 @@ def validate_auth_params(cla, values): @root_validator(pre=False) def validator_site_url_to_site_name(cls, values): - site_url_to_site_name = values.get("site_url_to_site_name") site_url = values.get("site_url") site_name = values.get("site_name") diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py index ad9aec927832b1..8fe4df0d02740e 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py @@ -250,6 +250,12 @@ def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 schema_metadata = set_metadata(dataset_name, endpoint_dets["data"]) dataset_snapshot.aspects.append(schema_metadata) yield self.build_wu(dataset_snapshot, dataset_name) + elif endpoint_dets["method"] != "get": + self.report.report_warning( + key=endpoint_k, + reason=f"No example provided for {endpoint_dets['method']}", + ) + continue # Only test endpoints if they're GETs elif ( "{" not in endpoint_k ): # if the API does not explicitly require parameters diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index 14597618a9d51b..39217e2b42d11c 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -89,10 +89,13 @@ def get_swag_json( def get_url_basepath(sw_dict: dict) -> str: - try: + if "basePath" in sw_dict: return sw_dict["basePath"] - except KeyError: # no base path defined - return "" + if "servers" in sw_dict: + # When the API path doesn't match the OAS path + return sw_dict["servers"][0]["url"] + + return "" def check_sw_version(sw_dict: dict) -> None: @@ -111,70 +114,69 @@ def check_sw_version(sw_dict: dict) -> None: def get_endpoints(sw_dict: dict) -> dict: # noqa: C901 """ - Get all the URLs accepting the "GET" method, together with their description and the tags + Get all the URLs, together with their description and the tags """ url_details = {} check_sw_version(sw_dict) for p_k, p_o in sw_dict["paths"].items(): - # will track only the "get" methods, which are the ones that give us data - if "get" in p_o.keys(): - if "200" in p_o["get"]["responses"].keys(): - base_res = p_o["get"]["responses"]["200"] - elif 200 in p_o["get"]["responses"].keys(): - # if you read a plain yml file the 200 will be an integer - base_res = p_o["get"]["responses"][200] - else: - # the endpoint does not have a 200 response - continue - - if "description" in p_o["get"].keys(): - desc = p_o["get"]["description"] - elif "summary" in p_o["get"].keys(): - desc = p_o["get"]["summary"] - else: # still testing - desc = "" - - try: - tags = p_o["get"]["tags"] - except KeyError: - tags = [] - - url_details[p_k] = {"description": desc, "tags": tags} - - # trying if dataset is defined in swagger... - if "content" in base_res.keys(): - res_cont = base_res["content"] - if "application/json" in res_cont.keys(): - ex_field = None - if "example" in res_cont["application/json"]: - ex_field = "example" - elif "examples" in res_cont["application/json"]: - ex_field = "examples" - - if ex_field: - if isinstance(res_cont["application/json"][ex_field], dict): - url_details[p_k]["data"] = res_cont["application/json"][ - ex_field - ] - elif isinstance(res_cont["application/json"][ex_field], list): - # taking the first example - url_details[p_k]["data"] = res_cont["application/json"][ - ex_field - ][0] - else: - logger.warning( - f"Field in swagger file does not give consistent data --- {p_k}" - ) - elif "text/csv" in res_cont.keys(): - url_details[p_k]["data"] = res_cont["text/csv"]["schema"] - elif "examples" in base_res.keys(): - url_details[p_k]["data"] = base_res["examples"]["application/json"] - - # checking whether there are defined parameters to execute the call... - if "parameters" in p_o["get"].keys(): - url_details[p_k]["parameters"] = p_o["get"]["parameters"] + method = list(p_o)[0] + if "200" in p_o[method]["responses"].keys(): + base_res = p_o[method]["responses"]["200"] + elif 200 in p_o[method]["responses"].keys(): + # if you read a plain yml file the 200 will be an integer + base_res = p_o[method]["responses"][200] + else: + # the endpoint does not have a 200 response + continue + + if "description" in p_o[method].keys(): + desc = p_o[method]["description"] + elif "summary" in p_o[method].keys(): + desc = p_o[method]["summary"] + else: # still testing + desc = "" + + try: + tags = p_o[method]["tags"] + except KeyError: + tags = [] + + url_details[p_k] = {"description": desc, "tags": tags, "method": method} + + # trying if dataset is defined in swagger... + if "content" in base_res.keys(): + res_cont = base_res["content"] + if "application/json" in res_cont.keys(): + ex_field = None + if "example" in res_cont["application/json"]: + ex_field = "example" + elif "examples" in res_cont["application/json"]: + ex_field = "examples" + + if ex_field: + if isinstance(res_cont["application/json"][ex_field], dict): + url_details[p_k]["data"] = res_cont["application/json"][ + ex_field + ] + elif isinstance(res_cont["application/json"][ex_field], list): + # taking the first example + url_details[p_k]["data"] = res_cont["application/json"][ + ex_field + ][0] + else: + logger.warning( + f"Field in swagger file does not give consistent data --- {p_k}" + ) + elif "text/csv" in res_cont.keys(): + url_details[p_k]["data"] = res_cont["text/csv"]["schema"] + elif "examples" in base_res.keys(): + url_details[p_k]["data"] = base_res["examples"]["application/json"] + + # checking whether there are defined parameters to execute the call... + if "parameters" in p_o[method].keys(): + url_details[p_k]["parameters"] = p_o[method]["parameters"] return dict(sorted(url_details.items())) @@ -314,12 +316,10 @@ def extract_fields( return ["contains_a_string"], {"contains_a_string": dict_data[0]} else: raise ValueError("unknown format") - if len(dict_data.keys()) > 1: + if len(dict_data) > 0: # the elements are directly inside the dict return flatten2list(dict_data), dict_data - dst_key = list(dict_data.keys())[ - 0 - ] # the first and unique key is the dataset's name + dst_key = list(dict_data)[0] # the first and unique key is the dataset's name try: return flatten2list(dict_data[dst_key]), dict_data[dst_key] diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 33596091e420d0..afa1af906be091 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -256,7 +256,6 @@ def to_datahub_schema( self, table: powerbi_data_classes.Table, ) -> SchemaMetadataClass: - fields = [] table_fields = ( [self.to_datahub_schema_field(column) for column in table.columns] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 40aaf2e4145a8c..076e6382f379c5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -158,7 +158,6 @@ def get_all_tags_on_object_with_propagation( @staticmethod def get_all_tags_in_database_without_propagation(db_name: str) -> str: - allowed_object_domains = ( "(" f"'{SnowflakeObjectDomain.DATABASE.upper()}'," diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index 0f031177c403a3..0e141b32cd39d5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -135,7 +135,11 @@ def get_percent_entities_changed( def compute_percent_entities_changed( new_entities: List[str], old_entities: List[str] ) -> float: - (overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities( + ( + overlap_count, + old_count, + _, + ) = _get_entity_overlap_and_cardinalities( new_entities=new_entities, old_entities=old_entities ) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py index 4dc5f12005e499..b09e67d9168eac 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py @@ -38,7 +38,8 @@ def transform_generic_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[GenericAspectClass] ) -> Optional[GenericAspectClass]: """Implement this method to transform the single custom aspect for an entity. - The purpose of this abstract method is to reinforce the use of GenericAspectClass.""" + The purpose of this abstract method is to reinforce the use of GenericAspectClass. + """ pass def _transform_or_record_mcpc( diff --git a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py index 7005bc2e4411bf..024bb62bbe9ce9 100644 --- a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py +++ b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py @@ -68,7 +68,6 @@ def run_ingest( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, ) as mock_checkpoint: - mock_checkpoint.return_value = mock_datahub_graph mocked_functions_reference( diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index 9dc15fae3a23ba..a55c646d2cbc6a 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -914,7 +914,6 @@ def test_independent_soft_deleted_looks( mocked_client = mock.MagicMock() with mock.patch("looker_sdk.init40") as mock_sdk: - mock_sdk.return_value = mocked_client setup_mock_look(mocked_client) setup_mock_soft_deleted_look(mocked_client) diff --git a/metadata-ingestion/tests/integration/okta/test_okta.py b/metadata-ingestion/tests/integration/okta/test_okta.py index 63ef8793cadddc..10148273c93666 100644 --- a/metadata-ingestion/tests/integration/okta/test_okta.py +++ b/metadata-ingestion/tests/integration/okta/test_okta.py @@ -58,14 +58,12 @@ def run_ingest( mocked_functions_reference, recipe, ): - with patch( "datahub.ingestion.source.identity.okta.OktaClient" ) as MockClient, patch( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, ) as mock_checkpoint: - mock_checkpoint.return_value = mock_datahub_graph mocked_functions_reference(MockClient=MockClient) @@ -277,7 +275,6 @@ def overwrite_group_in_mocked_data(test_resources_dir, MockClient): def _init_mock_okta_client( test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None ): - okta_users_json_file = ( test_resources_dir / "okta_users.json" if mock_users_json is None diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 5036f758a7de9b..9a4faa19d13559 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1420,7 +1420,6 @@ def test_reports_with_failed_page_request( def test_independent_datasets_extraction( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( diff --git a/metadata-ingestion/tests/integration/superset/test_superset.py b/metadata-ingestion/tests/integration/superset/test_superset.py index bc299e36515e18..89d16458c655e4 100644 --- a/metadata-ingestion/tests/integration/superset/test_superset.py +++ b/metadata-ingestion/tests/integration/superset/test_superset.py @@ -151,7 +151,6 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_superset_ingest(pytestconfig, tmp_path, mock_time, requests_mock): - test_resources_dir = pytestconfig.rootpath / "tests/integration/superset" register_mock_api(request_mock=requests_mock) @@ -193,7 +192,6 @@ def test_superset_ingest(pytestconfig, tmp_path, mock_time, requests_mock): def test_superset_stateful_ingest( pytestconfig, tmp_path, mock_time, requests_mock, mock_datahub_graph ): - test_resources_dir = pytestconfig.rootpath / "tests/integration/superset" register_mock_api(request_mock=requests_mock) diff --git a/metadata-ingestion/tests/unit/test_nifi_source.py b/metadata-ingestion/tests/unit/test_nifi_source.py index a90f03bea3e425..384c817ada5ad6 100644 --- a/metadata-ingestion/tests/unit/test_nifi_source.py +++ b/metadata-ingestion/tests/unit/test_nifi_source.py @@ -316,7 +316,6 @@ def test_client_cert_auth_without_client_cert_file(): def test_single_user_auth_failed_to_get_token(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work username="username", @@ -338,7 +337,6 @@ def test_single_user_auth_failed_to_get_token(): def test_kerberos_auth_failed_to_get_token(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="KERBEROS", @@ -358,7 +356,6 @@ def test_kerberos_auth_failed_to_get_token(): def test_client_cert_auth_failed(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="CLIENT_CERT", @@ -379,7 +376,6 @@ def test_client_cert_auth_failed(): def test_failure_to_create_nifi_flow(): - with patch("datahub.ingestion.source.nifi.NifiSource.authenticate"): config = NifiSourceConfig( site_url="https://localhost:12345", # will never work diff --git a/metadata-ingestion/tests/unit/test_unity_catalog_config.py b/metadata-ingestion/tests/unit/test_unity_catalog_config.py index 4be6f60171844b..f7ba1483deb210 100644 --- a/metadata-ingestion/tests/unit/test_unity_catalog_config.py +++ b/metadata-ingestion/tests/unit/test_unity_catalog_config.py @@ -63,7 +63,6 @@ def test_profiling_requires_warehouses_id(): @freeze_time(FROZEN_TIME) def test_workspace_url_should_start_with_https(): - with pytest.raises(ValueError, match="Workspace URL must start with http scheme"): UnityCatalogSourceConfig.parse_obj( { From a734b143735d426e1481265adb5a7b3361d052d5 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Mon, 26 Jun 2023 13:10:23 -0600 Subject: [PATCH 2/8] Fix readme for new function --- .../docs/sources/openapi/openapi.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/docs/sources/openapi/openapi.md b/metadata-ingestion/docs/sources/openapi/openapi.md index b3231b018bddeb..1fa52133c067a7 100644 --- a/metadata-ingestion/docs/sources/openapi/openapi.md +++ b/metadata-ingestion/docs/sources/openapi/openapi.md @@ -2,11 +2,11 @@ The dataset metadata should be defined directly in the Swagger file, section `[" ## Capabilities -The plugin read the swagger file where the endopints are defined and searches for the ones which accept -a `GET` call: those are the ones supposed to give back the datasets. +This plugin reads the swagger file where the endpoints are defined, reads example data if provided or searches for +data for the endpoints which accept a `GET` call. For every selected endpoint defined in the `paths` section, -the tool searches whether the medatada are already defined in there. +the tool searches whether the metadata are already defined. As example, if in your swagger file there is the `/api/users/` defined as follows: ```yaml @@ -27,7 +27,7 @@ paths: then this plugin has all the information needed to create the dataset in DataHub. -In case there is no example defined, the plugin will try to get the metadata directly from the endpoint. +In case there is no example defined, the plugin will try to get the metadata directly from the endpoint, if it is a `GET` method. So, if in your swagger file you have ```yaml @@ -42,7 +42,7 @@ paths: description: Return the list of colors ``` -the tool will make a `GET` call to `https:///test_endpoint.com/colors` +the tool will make a `GET` call to `https://test_endpoint.com/colors` and parse the response obtained. ### Automatically recorded examples @@ -53,7 +53,7 @@ Sometimes you can have an endpoint which wants a parameter to work, like Since in the OpenApi specifications the listing endpoints are specified just before the detailed ones, in the list of the paths, you will find - https:///test_endpoint.com/colors + https://test_endpoint.com/colors defined before @@ -80,7 +80,7 @@ and this last URL will be called to get back the needed metadata. If no useful example is found, a second procedure will try to guess a numerical ID. So if we have: - https:///test_endpoint.com/colors/{colorID} + https://test_endpoint.com/colors/{colorID} and there is no `colorID` example already found by the plugin, it will try to put a number one (1) at the parameter place @@ -120,8 +120,8 @@ paths: description: Return details about the group ``` -and the plugin did not found an example in its previous calls, -so the tool have no idea about what substitute to the `{name}` part. +and the plugin did not find an example in its previous calls, +the tool has no idea about what to substitute for the `{name}` part. By specifying in the configuration file From bf9e16732f98e472229f415bee4c341415eca366 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Mon, 26 Jun 2023 13:11:19 -0600 Subject: [PATCH 3/8] Clarification --- metadata-ingestion/docs/sources/openapi/openapi.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/docs/sources/openapi/openapi.md b/metadata-ingestion/docs/sources/openapi/openapi.md index 1fa52133c067a7..b661d5e14568db 100644 --- a/metadata-ingestion/docs/sources/openapi/openapi.md +++ b/metadata-ingestion/docs/sources/openapi/openapi.md @@ -2,8 +2,8 @@ The dataset metadata should be defined directly in the Swagger file, section `[" ## Capabilities -This plugin reads the swagger file where the endpoints are defined, reads example data if provided or searches for -data for the endpoints which accept a `GET` call. +This plugin reads the swagger file where the endpoints are defined, reads example data if provided (for any method), or searches for +data for the endpoints which do not have example data and accept a `GET` call. For every selected endpoint defined in the `paths` section, the tool searches whether the metadata are already defined. From cad2babd7b33ad99b87a5f21e03e8fba48448d5d Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Tue, 27 Jun 2023 10:08:39 -0600 Subject: [PATCH 4/8] Undo black changes --- .../src/datahub/api/entities/dataproduct/dataproduct.py | 3 +++ .../src/datahub/cli/specific/dataproduct_cli.py | 1 + .../datahub/ingestion/source/bigquery_v2/bigquery_audit.py | 1 + .../src/datahub/ingestion/source/bigquery_v2/usage.py | 1 + .../src/datahub/ingestion/source/data_lake_common/config.py | 1 + .../src/datahub/ingestion/source/looker/looker_config.py | 1 + .../src/datahub/ingestion/source/looker/looker_source.py | 1 + metadata-ingestion/src/datahub/ingestion/source/nifi.py | 1 + .../src/datahub/ingestion/source/powerbi/powerbi.py | 1 + .../datahub/ingestion/source/snowflake/snowflake_query.py | 1 + .../datahub/ingestion/source/state/entity_removal_state.py | 6 +----- .../ingestion/transformer/generic_aspect_transformer.py | 3 +-- .../tests/integration/azure_ad/test_azure_ad.py | 1 + metadata-ingestion/tests/integration/looker/test_looker.py | 1 + metadata-ingestion/tests/integration/okta/test_okta.py | 3 +++ .../tests/integration/powerbi/test_powerbi.py | 1 + .../tests/integration/superset/test_superset.py | 2 ++ metadata-ingestion/tests/unit/test_nifi_source.py | 4 ++++ metadata-ingestion/tests/unit/test_unity_catalog_config.py | 1 + 19 files changed, 27 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index b48d4e259a7fa9..04f12b4f61d1e1 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -225,6 +225,7 @@ def _generate_properties_mcp( def generate_mcp( self, upsert: bool ) -> Iterable[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]: + if self._resolved_domain_urn is None: raise Exception( f"Unable to generate MCP-s because we were unable to resolve the domain {self.domain} to an urn." @@ -439,6 +440,7 @@ def patch_yaml( original_dataproduct: DataProduct, output_file: Path, ) -> bool: + update_needed = False if not original_dataproduct._original_yaml_dict: raise Exception("Original Data Product was not loaded from yaml") @@ -521,6 +523,7 @@ def to_yaml( self, file: Path, ) -> None: + with open(file, "w") as fp: yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip) yaml.indent(mapping=2, sequence=4, offset=2) diff --git a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py index bdb08c353f8dd2..5d6c65512354a2 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py @@ -56,6 +56,7 @@ def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> def _print_diff(orig_file, new_file): + with open(orig_file) as fp: orig_lines = fp.readlines() with open(new_file) as fp: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index 2c2512da9c0741..379a773e248faa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -599,6 +599,7 @@ def from_query_event( query_event: QueryEvent, debug_include_full_payloads: bool = False, ) -> "ReadEvent": + readEvent = ReadEvent( actor_email=query_event.actor_email, timestamp=query_event.timestamp, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 27888d48e48b4c..1081dd8eec1ec1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -664,6 +664,7 @@ def _get_exported_bigquery_audit_metadata( def _get_bigquery_log_entries_via_gcp_logging( self, client: GCPLoggingClient, limit: Optional[int] = None ) -> Iterable[AuditLogEntry]: + filter = self._generate_filter(BQ_AUDIT_V2) logger.debug(filter) diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py index ede7d3c3c56959..5f88cf0234947a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/config.py @@ -7,6 +7,7 @@ class PathSpecsConfigMixin(ConfigModel): + path_specs: List[PathSpec] = Field( description="List of PathSpec. See [below](#path-spec) the details about PathSpec" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 4da30dfba93b64..455614c758bb93 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -202,6 +202,7 @@ def external_url_defaults_to_api_config_base_url( def stateful_ingestion_should_be_enabled( cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any] ) -> Optional[bool]: + stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast( StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion") ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 9a6d091693a3f9..8297a0aa8efa7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -1128,6 +1128,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def emit_independent_looks_mcp( self, dashboard_element: LookerDashboardElement ) -> Iterable[MetadataWorkUnit]: + yield from auto_workunit( stream=self._make_chart_metadata_events( dashboard_element=dashboard_element, diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 5f820b22dad515..27cd5aeb3c68d0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -142,6 +142,7 @@ def validate_auth_params(cla, values): @root_validator(pre=False) def validator_site_url_to_site_name(cls, values): + site_url_to_site_name = values.get("site_url_to_site_name") site_url = values.get("site_url") site_name = values.get("site_name") diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index afa1af906be091..33596091e420d0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -256,6 +256,7 @@ def to_datahub_schema( self, table: powerbi_data_classes.Table, ) -> SchemaMetadataClass: + fields = [] table_fields = ( [self.to_datahub_schema_field(column) for column in table.columns] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 076e6382f379c5..40aaf2e4145a8c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -158,6 +158,7 @@ def get_all_tags_on_object_with_propagation( @staticmethod def get_all_tags_in_database_without_propagation(db_name: str) -> str: + allowed_object_domains = ( "(" f"'{SnowflakeObjectDomain.DATABASE.upper()}'," diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index 0e141b32cd39d5..0f031177c403a3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -135,11 +135,7 @@ def get_percent_entities_changed( def compute_percent_entities_changed( new_entities: List[str], old_entities: List[str] ) -> float: - ( - overlap_count, - old_count, - _, - ) = _get_entity_overlap_and_cardinalities( + (overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities( new_entities=new_entities, old_entities=old_entities ) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py index b09e67d9168eac..4dc5f12005e499 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py @@ -38,8 +38,7 @@ def transform_generic_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[GenericAspectClass] ) -> Optional[GenericAspectClass]: """Implement this method to transform the single custom aspect for an entity. - The purpose of this abstract method is to reinforce the use of GenericAspectClass. - """ + The purpose of this abstract method is to reinforce the use of GenericAspectClass.""" pass def _transform_or_record_mcpc( diff --git a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py index 024bb62bbe9ce9..7005bc2e4411bf 100644 --- a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py +++ b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py @@ -68,6 +68,7 @@ def run_ingest( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, ) as mock_checkpoint: + mock_checkpoint.return_value = mock_datahub_graph mocked_functions_reference( diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index a55c646d2cbc6a..9dc15fae3a23ba 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -914,6 +914,7 @@ def test_independent_soft_deleted_looks( mocked_client = mock.MagicMock() with mock.patch("looker_sdk.init40") as mock_sdk: + mock_sdk.return_value = mocked_client setup_mock_look(mocked_client) setup_mock_soft_deleted_look(mocked_client) diff --git a/metadata-ingestion/tests/integration/okta/test_okta.py b/metadata-ingestion/tests/integration/okta/test_okta.py index 10148273c93666..63ef8793cadddc 100644 --- a/metadata-ingestion/tests/integration/okta/test_okta.py +++ b/metadata-ingestion/tests/integration/okta/test_okta.py @@ -58,12 +58,14 @@ def run_ingest( mocked_functions_reference, recipe, ): + with patch( "datahub.ingestion.source.identity.okta.OktaClient" ) as MockClient, patch( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, ) as mock_checkpoint: + mock_checkpoint.return_value = mock_datahub_graph mocked_functions_reference(MockClient=MockClient) @@ -275,6 +277,7 @@ def overwrite_group_in_mocked_data(test_resources_dir, MockClient): def _init_mock_okta_client( test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None ): + okta_users_json_file = ( test_resources_dir / "okta_users.json" if mock_users_json is None diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 9a4faa19d13559..5036f758a7de9b 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1420,6 +1420,7 @@ def test_reports_with_failed_page_request( def test_independent_datasets_extraction( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): + test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( diff --git a/metadata-ingestion/tests/integration/superset/test_superset.py b/metadata-ingestion/tests/integration/superset/test_superset.py index 89d16458c655e4..bc299e36515e18 100644 --- a/metadata-ingestion/tests/integration/superset/test_superset.py +++ b/metadata-ingestion/tests/integration/superset/test_superset.py @@ -151,6 +151,7 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_superset_ingest(pytestconfig, tmp_path, mock_time, requests_mock): + test_resources_dir = pytestconfig.rootpath / "tests/integration/superset" register_mock_api(request_mock=requests_mock) @@ -192,6 +193,7 @@ def test_superset_ingest(pytestconfig, tmp_path, mock_time, requests_mock): def test_superset_stateful_ingest( pytestconfig, tmp_path, mock_time, requests_mock, mock_datahub_graph ): + test_resources_dir = pytestconfig.rootpath / "tests/integration/superset" register_mock_api(request_mock=requests_mock) diff --git a/metadata-ingestion/tests/unit/test_nifi_source.py b/metadata-ingestion/tests/unit/test_nifi_source.py index 384c817ada5ad6..a90f03bea3e425 100644 --- a/metadata-ingestion/tests/unit/test_nifi_source.py +++ b/metadata-ingestion/tests/unit/test_nifi_source.py @@ -316,6 +316,7 @@ def test_client_cert_auth_without_client_cert_file(): def test_single_user_auth_failed_to_get_token(): + config = NifiSourceConfig( site_url="https://localhost:12345", # will never work username="username", @@ -337,6 +338,7 @@ def test_single_user_auth_failed_to_get_token(): def test_kerberos_auth_failed_to_get_token(): + config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="KERBEROS", @@ -356,6 +358,7 @@ def test_kerberos_auth_failed_to_get_token(): def test_client_cert_auth_failed(): + config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="CLIENT_CERT", @@ -376,6 +379,7 @@ def test_client_cert_auth_failed(): def test_failure_to_create_nifi_flow(): + with patch("datahub.ingestion.source.nifi.NifiSource.authenticate"): config = NifiSourceConfig( site_url="https://localhost:12345", # will never work diff --git a/metadata-ingestion/tests/unit/test_unity_catalog_config.py b/metadata-ingestion/tests/unit/test_unity_catalog_config.py index f7ba1483deb210..4be6f60171844b 100644 --- a/metadata-ingestion/tests/unit/test_unity_catalog_config.py +++ b/metadata-ingestion/tests/unit/test_unity_catalog_config.py @@ -63,6 +63,7 @@ def test_profiling_requires_warehouses_id(): @freeze_time(FROZEN_TIME) def test_workspace_url_should_start_with_https(): + with pytest.raises(ValueError, match="Workspace URL must start with http scheme"): UnityCatalogSourceConfig.parse_obj( { From 540944a95547971d712a2bfe2eea8e8f90d77ee9 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Tue, 27 Jun 2023 15:28:23 -0600 Subject: [PATCH 5/8] extract method for example data --- .../ingestion/source/openapi_parser.py | 65 ++++++++++--------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index 39217e2b42d11c..e665b4082b1177 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -145,34 +145,9 @@ def get_endpoints(sw_dict: dict) -> dict: # noqa: C901 url_details[p_k] = {"description": desc, "tags": tags, "method": method} - # trying if dataset is defined in swagger... - if "content" in base_res.keys(): - res_cont = base_res["content"] - if "application/json" in res_cont.keys(): - ex_field = None - if "example" in res_cont["application/json"]: - ex_field = "example" - elif "examples" in res_cont["application/json"]: - ex_field = "examples" - - if ex_field: - if isinstance(res_cont["application/json"][ex_field], dict): - url_details[p_k]["data"] = res_cont["application/json"][ - ex_field - ] - elif isinstance(res_cont["application/json"][ex_field], list): - # taking the first example - url_details[p_k]["data"] = res_cont["application/json"][ - ex_field - ][0] - else: - logger.warning( - f"Field in swagger file does not give consistent data --- {p_k}" - ) - elif "text/csv" in res_cont.keys(): - url_details[p_k]["data"] = res_cont["text/csv"]["schema"] - elif "examples" in base_res.keys(): - url_details[p_k]["data"] = base_res["examples"]["application/json"] + example_data = check_for_api_example_data(base_res, key) + if example_data: + url_details[p_k]["data"] = example_data # checking whether there are defined parameters to execute the call... if "parameters" in p_o[method].keys(): @@ -181,6 +156,38 @@ def get_endpoints(sw_dict: dict) -> dict: # noqa: C901 return dict(sorted(url_details.items())) +def check_for_api_example_data(base_res: dict, key: str) -> dict: + """ + Try to determine if example data is defined for the endpoint, and return it + """ + data = {} + if "content" in base_res.keys(): + res_cont = base_res["content"] + if "application/json" in res_cont.keys(): + ex_field = None + if "example" in res_cont["application/json"]: + ex_field = "example" + elif "examples" in res_cont["application/json"]: + ex_field = "examples" + + if ex_field: + if isinstance(res_cont["application/json"][ex_field], dict): + data = res_cont["application/json"][ex_field] + elif isinstance(res_cont["application/json"][ex_field], list): + # taking the first example + data = res_cont["application/json"][ex_field][0] + else: + logger.warning( + f"Field in swagger file does not give consistent data --- {key}" + ) + elif "text/csv" in res_cont.keys(): + data = res_cont["text/csv"]["schema"] + elif "examples" in base_res.keys(): + data = base_res["examples"]["application/json"] + + return data + + def guessing_url_name(url: str, examples: dict) -> str: """ given a url and dict of extracted data, we try to guess a working URL. Example: @@ -316,7 +323,7 @@ def extract_fields( return ["contains_a_string"], {"contains_a_string": dict_data[0]} else: raise ValueError("unknown format") - if len(dict_data) > 0: + if len(dict_data) > 1: # the elements are directly inside the dict return flatten2list(dict_data), dict_data dst_key = list(dict_data)[0] # the first and unique key is the dataset's name From 4f2460003f3b59a0145952fefdba54f8df9998a4 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Tue, 27 Jun 2023 15:28:48 -0600 Subject: [PATCH 6/8] black --- .../src/datahub/ingestion/source/openapi_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index e665b4082b1177..de199c32aedd8f 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -183,7 +183,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict: elif "text/csv" in res_cont.keys(): data = res_cont["text/csv"]["schema"] elif "examples" in base_res.keys(): - data = base_res["examples"]["application/json"] + data = base_res["examples"]["application/json"] return data From ad84aec00ee1342076a8fdefd3abbe3c3962e969 Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Fri, 7 Jul 2023 14:32:18 -0600 Subject: [PATCH 7/8] Add unittest for post methods with/without examples --- .../ingestion/source/openapi_parser.py | 2 +- metadata-ingestion/tests/unit/test_openapi.py | 182 +++++++++++++++++- 2 files changed, 179 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index de199c32aedd8f..1ab40bc8be73d4 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -145,7 +145,7 @@ def get_endpoints(sw_dict: dict) -> dict: # noqa: C901 url_details[p_k] = {"description": desc, "tags": tags, "method": method} - example_data = check_for_api_example_data(base_res, key) + example_data = check_for_api_example_data(base_res, p_k) if example_data: url_details[p_k]["data"] = example_data diff --git a/metadata-ingestion/tests/unit/test_openapi.py b/metadata-ingestion/tests/unit/test_openapi.py index 64edd7fab21589..67316e6bdf08f3 100644 --- a/metadata-ingestion/tests/unit/test_openapi.py +++ b/metadata-ingestion/tests/unit/test_openapi.py @@ -176,6 +176,115 @@ class TestGetEndpoints(unittest.TestCase): ] } } + post: + operationId: updateVersionDetailsv2 + summary: Update API version details + produces: + - application/json + responses: + "200": + description: |- + 200 203 response + examples: + application/json: |- + { + "version": { + "status": "CURRENT", + "updated": "2011-01-21T11:33:21Z", + "media-types": [ + { + "base": "application/xml", + "type": "application/vnd.openstack.compute+xml;version=2" + }, + { + "base": "application/json", + "type": "application/vnd.openstack.compute+json;version=2" + } + ], + "id": "v2.0", + "links": [ + { + "href": "http://127.0.0.1:8774/v2/", + "rel": "self" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/os-compute-devguide-2.pdf", + "type": "application/pdf", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + } + ] + } + } + /v2/updateNoExample: + post: + operationId: updateVersionDetailsNoExample + summary: Show API version details no example output + produces: + - application/json + responses: + "200": + description: |- + 200 203 response + /v2/update: + post: + operationId: updateVersionDetailsv2 + summary: Show API version details + produces: + - application/json + responses: + "200": + description: |- + 200 203 response + examples: + application/json: |- + { + "version": { + "status": "CURRENT", + "updated": "2011-01-21T11:33:21Z", + "media-types": [ + { + "base": "application/xml", + "type": "application/vnd.openstack.compute+xml;version=2" + }, + { + "base": "application/json", + "type": "application/vnd.openstack.compute+json;version=2" + } + ], + "id": "v2.0", + "links": [ + { + "href": "http://127.0.0.1:8774/v2/", + "rel": "self" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/os-compute-devguide-2.pdf", + "type": "application/pdf", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + } + ] + } + } consumes: - application/json """ @@ -359,6 +468,65 @@ class TestGetEndpoints(unittest.TestCase): ] } } + /v2/updateNoExample: + post: + operationId: updateVersionDetailsNoExample + summary: Update API version details + responses: + '200': + description: |- + 200 response + /v2/update: + post: + operationId: updateVersionDetailsv2 + summary: Update API version details + responses: + '200': + description: |- + 200 response + content: + application/json: + examples: + foo: + value: + { + "version": { + "status": "CURRENT", + "updated": "2011-01-21T11:33:21Z", + "media-types": [ + { + "base": "application/xml", + "type": "application/vnd.openstack.compute+xml;version=2" + }, + { + "base": "application/json", + "type": "application/vnd.openstack.compute+json;version=2" + } + ], + "id": "v2.0", + "links": [ + { + "href": "http://127.0.0.1:8774/v2/", + "rel": "self" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/os-compute-devguide-2.pdf", + "type": "application/pdf", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + }, + { + "href": "http://docs.openstack.org/api/openstack-compute/2/wadl/os-compute-2.wadl", + "type": "application/vnd.sun.wadl+xml", + "rel": "describedby" + } + ] + } + } """ def test_get_endpoints_openapi30(self) -> None: @@ -366,19 +534,25 @@ def test_get_endpoints_openapi30(self) -> None: sw_file_raw = yaml.safe_load(self.openapi30) url_endpoints = get_endpoints(sw_file_raw) - self.assertEqual(len(url_endpoints), 2) - d4k = {"data": "", "tags": "", "description": ""} + self.assertEqual(len(url_endpoints), 4) + d4k = {"data": "", "tags": "", "description": "", "method": ""} self.assertEqual(url_endpoints["/"].keys(), d4k.keys()) + self.assertIn("data", url_endpoints["/v2/update"]) + self.assertNotIn("data", url_endpoints["/v2/updateNoExample"]) + def test_get_endpoints_openapi20(self) -> None: """extracting 'get' type endpoints from swagger 2.0 file""" sw_file_raw = yaml.safe_load(self.openapi20) url_endpoints = get_endpoints(sw_file_raw) - self.assertEqual(len(url_endpoints), 2) - d4k = {"data": "", "tags": "", "description": ""} + self.assertEqual(len(url_endpoints), 4) + d4k = {"data": "", "tags": "", "description": "", "method":""} self.assertEqual(url_endpoints["/"].keys(), d4k.keys()) + self.assertIn("data", url_endpoints["/v2/update"]) + self.assertNotIn("data", url_endpoints["/v2/updateNoExample"]) + class TestExplodeDict(unittest.TestCase): def test_d1(self): From fac0b516b2696a29a4d76552e7a8cc87d978e45c Mon Sep 17 00:00:00 2001 From: Jarod Smilkstein Date: Fri, 7 Jul 2023 14:34:05 -0600 Subject: [PATCH 8/8] format new changes --- metadata-ingestion/tests/unit/test_openapi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/tests/unit/test_openapi.py b/metadata-ingestion/tests/unit/test_openapi.py index 67316e6bdf08f3..42a8a13c1943d5 100644 --- a/metadata-ingestion/tests/unit/test_openapi.py +++ b/metadata-ingestion/tests/unit/test_openapi.py @@ -547,7 +547,7 @@ def test_get_endpoints_openapi20(self) -> None: url_endpoints = get_endpoints(sw_file_raw) self.assertEqual(len(url_endpoints), 4) - d4k = {"data": "", "tags": "", "description": "", "method":""} + d4k = {"data": "", "tags": "", "description": "", "method": ""} self.assertEqual(url_endpoints["/"].keys(), d4k.keys()) self.assertIn("data", url_endpoints["/v2/update"])