diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index e2dd515143992..e5792da32fb5d 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -769,7 +769,7 @@ def generate( import importlib from typing import TYPE_CHECKING -from datahub._codegen.aspect import _Aspect +from datahub._codegen.aspect import _Aspect as _Aspect from datahub.utilities.docs_build import IS_SPHINX_BUILD from datahub.utilities._custom_package_loader import get_custom_models_package @@ -802,7 +802,7 @@ def generate( from datahub.utilities.docs_build import IS_SPHINX_BUILD from datahub.utilities._custom_package_loader import get_custom_urns_package -from datahub.utilities.urns._urn_base import Urn # noqa: F401 +from datahub.utilities.urns._urn_base import Urn as Urn # noqa: F401 _custom_package_path = get_custom_urns_package() diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg index c095420e4e3f3..057779bc87c62 100644 --- a/metadata-ingestion/setup.cfg +++ b/metadata-ingestion/setup.cfg @@ -31,7 +31,7 @@ exclude = __pycache__ per-file-ignores = # imported but unused - __init__.py: F401 + __init__.py: F401, I250 ban-relative-imports = true [mypy] @@ -53,6 +53,14 @@ disallow_untyped_defs = no # try to be a bit more strict in certain areas of the codebase [mypy-datahub.*] ignore_missing_imports = no +implicit_reexport = no +[mypy-datahub.metadata.*] +# TODO: Remove this once all the code has been updated. +implicit_reexport = yes +[mypy-datahub.ingestion.*] +# TODO: Remove this once all the code has been updated. +implicit_reexport = yes + [mypy-datahub_provider.*] ignore_missing_imports = no [mypy-tests.*] diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py b/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py index 27317826264b8..0b04bfa4025a1 100644 --- a/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py @@ -12,3 +12,10 @@ ) requests_logger.setLevel(logging.WARNING) + +__all__ = [ + "AssertionCircuitBreaker", + "AssertionCircuitBreakerConfig", + "OperationCircuitBreaker", + "OperationCircuitBreakerConfig", +] diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py index a3c54046faf68..7c1180536a90f 100644 --- a/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py @@ -6,7 +6,7 @@ from gql.transport.requests import RequestsHTTPTransport from pydantic import Field -from datahub.configuration import ConfigModel +from datahub.configuration.common import ConfigModel logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/__init__.py b/metadata-ingestion/src/datahub/api/entities/datajob/__init__.py index 6d85a1569cb63..3a07300596822 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/__init__.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/__init__.py @@ -1,2 +1,5 @@ from datahub.api.entities.datajob.dataflow import DataFlow from datahub.api.entities.datajob.datajob import DataJob + +# TODO: Remove this and start importing directly from the inner files. +__all__ = ["DataFlow", "DataJob"] diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py index f2436d56d5aca..e169c07445e96 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py @@ -3,7 +3,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, cast import datahub.emitter.mce_builder as builder -from datahub.configuration.source_common import ALL_ENV_TYPES from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.schema_classes import ( @@ -114,7 +113,7 @@ def generate_tags_aspect(self) -> List[GlobalTagsClass]: def _get_env(self) -> Optional[str]: env: Optional[str] = None - if self.env and self.env.upper() in ALL_ENV_TYPES: + if self.env and self.env.upper() in builder.ALL_ENV_TYPES: env = self.env.upper() else: logger.debug( diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py index 0f5d18c20e055..4958a68caa95f 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py @@ -3,7 +3,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set import datahub.emitter.mce_builder as builder -from datahub.configuration.source_common import ALL_ENV_TYPES from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.schema_classes import ( @@ -109,7 +108,7 @@ def generate_mcp( self, materialize_iolets: bool = True ) -> Iterable[MetadataChangeProposalWrapper]: env: Optional[str] = None - if self.flow_urn.cluster.upper() in ALL_ENV_TYPES: + if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES: env = self.flow_urn.cluster.upper() else: logger.debug( diff --git a/metadata-ingestion/src/datahub/api/graphql/__init__.py b/metadata-ingestion/src/datahub/api/graphql/__init__.py index e8c8d22bbb93d..d818b19092fcb 100644 --- a/metadata-ingestion/src/datahub/api/graphql/__init__.py +++ b/metadata-ingestion/src/datahub/api/graphql/__init__.py @@ -1,2 +1,4 @@ from datahub.api.graphql.assertion import Assertion from datahub.api.graphql.operation import Operation + +__all__ = ["Assertion", "Operation"] diff --git a/metadata-ingestion/src/datahub/cli/put_cli.py b/metadata-ingestion/src/datahub/cli/put_cli.py index 989b1a6d02fd0..0a40a9f4ccf92 100644 --- a/metadata-ingestion/src/datahub/cli/put_cli.py +++ b/metadata-ingestion/src/datahub/cli/put_cli.py @@ -6,11 +6,12 @@ from datahub.cli.cli_utils import post_entity from datahub.configuration.config_loader import load_config_file -from datahub.emitter.mcp import MetadataChangeProposalWrapper, SystemMetadataClass +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import get_default_graph from datahub.metadata.schema_classes import ( DataPlatformInfoClass as DataPlatformInfo, PlatformTypeClass, + SystemMetadataClass, ) from datahub.telemetry import telemetry from datahub.upgrade import upgrade diff --git a/metadata-ingestion/src/datahub/configuration/__init__.py b/metadata-ingestion/src/datahub/configuration/__init__.py index 008d788072d0a..21979829a4453 100644 --- a/metadata-ingestion/src/datahub/configuration/__init__.py +++ b/metadata-ingestion/src/datahub/configuration/__init__.py @@ -1,5 +1,4 @@ from datahub.configuration.common import ( - ConfigModel, - ConfigurationMechanism, - DynamicTypedConfig, + ConfigModel as ConfigModel, + DynamicTypedConfig as DynamicTypedConfig, ) diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 0ce7127b44053..4fdf564162410 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -21,7 +21,7 @@ from pydantic.fields import Field from typing_extensions import Protocol -from datahub.configuration._config_enum import ConfigEnum +from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250 from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 from datahub.utilities.dedup_list import deduplicate_list diff --git a/metadata-ingestion/src/datahub/configuration/json_loader.py b/metadata-ingestion/src/datahub/configuration/json_loader.py index 35667eb5951fc..6ecb741be528d 100644 --- a/metadata-ingestion/src/datahub/configuration/json_loader.py +++ b/metadata-ingestion/src/datahub/configuration/json_loader.py @@ -1,7 +1,7 @@ import json from typing import IO -from datahub.configuration import ConfigurationMechanism +from datahub.configuration.common import ConfigurationMechanism class JsonConfigurationMechanism(ConfigurationMechanism): diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index ad12447532335..44c737f1bd13d 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -1,14 +1,10 @@ -from typing import Dict, Optional, Set +from typing import Dict, Optional from pydantic import validator from pydantic.fields import Field from datahub.configuration.common import ConfigModel -from datahub.emitter.enum_helpers import get_enum_options -from datahub.metadata.schema_classes import FabricTypeClass - -DEFAULT_ENV = FabricTypeClass.PROD -ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass)) +from datahub.emitter.mce_builder import ALL_ENV_TYPES, DEFAULT_ENV class PlatformInstanceConfigMixin(ConfigModel): diff --git a/metadata-ingestion/src/datahub/configuration/yaml.py b/metadata-ingestion/src/datahub/configuration/yaml.py index 1f1172836f744..c069845e1de11 100644 --- a/metadata-ingestion/src/datahub/configuration/yaml.py +++ b/metadata-ingestion/src/datahub/configuration/yaml.py @@ -2,7 +2,7 @@ import yaml -from datahub.configuration import ConfigurationMechanism +from datahub.configuration.common import ConfigurationMechanism class YamlConfigurationMechanism(ConfigurationMechanism): diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 63b03db7f5b60..69946c575908b 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -13,6 +13,7 @@ Any, List, Optional, + Set, Tuple, Type, TypeVar, @@ -24,7 +25,6 @@ import typing_inspect from avrogen.dict_wrapper import DictWrapper -from datahub.configuration.source_common import DEFAULT_ENV from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( AssertionKeyClass, @@ -35,6 +35,7 @@ DatasetKeyClass, DatasetLineageTypeClass, DatasetSnapshotClass, + FabricTypeClass, GlobalTagsClass, GlossaryTermAssociationClass, GlossaryTermsClass as GlossaryTerms, @@ -56,6 +57,9 @@ logger = logging.getLogger(__name__) Aspect = TypeVar("Aspect", bound=AspectAbstract) +DEFAULT_ENV = FabricTypeClass.PROD +ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass)) + DEFAULT_FLOW_CLUSTER = "prod" UNKNOWN_USER = "urn:li:corpuser:unknown" DATASET_URN_TO_LOWER: bool = ( diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index d088380d5d38c..85968f050a371 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -13,13 +13,10 @@ generate_access_token, make_shim_command, ) -from datahub.cli.config_utils import ( - DATAHUB_CONFIG_PATH, - get_boolean_env_variable, - write_gms_config, -) +from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config from datahub.cli.delete_cli import delete from datahub.cli.docker_cli import docker +from datahub.cli.env_utils import get_boolean_env_variable from datahub.cli.exists_cli import exists from datahub.cli.get_cli import get from datahub.cli.ingest_cli import ingest diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py index b390ffb9dd036..d32c0b85ceef4 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py +++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py @@ -3,7 +3,10 @@ from typing import Callable, Dict, Optional, Type from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.source import Source, SourceCapability +from datahub.ingestion.api.source import ( # noqa: I250 + Source, + SourceCapability as SourceCapability, +) def config_class(config_cls: Type) -> Callable[[Type], Type]: diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py index bcf077154343c..88d1fcc52e219 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py @@ -23,7 +23,7 @@ RecordTypeClass, SchemaFieldClass as SchemaField, SchemaFieldDataTypeClass, - SchemaMetadataClass as SchemaMetadata, + SchemaMetadataClass, StringTypeClass, UnionTypeClass, ) @@ -665,13 +665,13 @@ def get_schema_metadata( name: str, json_schema: Dict[Any, Any], raw_schema_string: Optional[str] = None, -) -> SchemaMetadata: +) -> SchemaMetadataClass: json_schema_as_string = raw_schema_string or json.dumps(json_schema) md5_hash: str = md5(json_schema_as_string.encode()).hexdigest() schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(json_schema)) - schema_metadata = SchemaMetadata( + schema_metadata = SchemaMetadataClass( schemaName=name, platform=f"urn:li:dataPlatform:{platform}", version=0, diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py index f62bb184252d9..e947aff384871 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py @@ -32,7 +32,7 @@ OneofDescriptor, ) -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, BytesTypeClass, @@ -41,8 +41,8 @@ MapTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, StringTypeClass, UnionTypeClass, ) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index c90ac93eee2cc..759aebcfd46b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -33,7 +33,9 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.emitter.serialization_helper import post_json_transform -from datahub.ingestion.graph.config import DatahubClientConfig +from datahub.ingestion.graph.config import ( # noqa: I250; TODO: Remove this alias + DatahubClientConfig as DatahubClientConfig, +) from datahub.ingestion.graph.connections import ( connections_gql, get_id_from_connection_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py index 33bfb63feb3fd..5961a553a1494 100644 --- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py @@ -11,9 +11,8 @@ redact_raw_config, ) from datahub.emitter.aspect import JSON_CONTENT_TYPE -from datahub.emitter.mce_builder import datahub_guid +from datahub.emitter.mce_builder import datahub_guid, make_data_platform_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import make_data_platform_urn from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.pipeline_run_listener import PipelineRunListener from datahub.ingestion.api.sink import NoopWriteCallback, Sink diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index b5d0ed42e651e..4598ae388b827 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -117,9 +117,8 @@ ViewPropertiesClass, ) from datahub.metadata.urns import DatasetUrn -from datahub.sql_parsing.schema_resolver import SchemaResolver +from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver from datahub.sql_parsing.sqlglot_lineage import ( - SchemaInfo, SqlParsingDebugInfo, SqlParsingResult, infer_output_schema, diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py index 81a54d1327d05..d2b4a576953da 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py @@ -6,9 +6,8 @@ from pydantic import Field from typing_extensions import Literal -from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.source_common import ( - ConfigModel, EnvConfigMixin, PlatformInstanceConfigMixin, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py index acda656526ef5..4f1de6fb06c69 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py @@ -52,24 +52,22 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, BytesTypeClass, + DataPlatformInstanceClass, + DatasetPropertiesClass, NullTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, SchemalessClass, - SchemaMetadata, + SchemaMetadataClass, StringTypeClass, UnionTypeClass, ) -from datahub.metadata.schema_classes import ( - DataPlatformInstanceClass, - DatasetPropertiesClass, -) from datahub.utilities.registries.domain_registry import DomainRegistry MAX_ITEMS_TO_RETRIEVE = 100 @@ -448,7 +446,7 @@ def construct_schema_metadata( dataset_properties: DatasetPropertiesClass, schema: Dict[Tuple[str, ...], SchemaDescription], primary_key_dict: Dict[str, str], - ) -> SchemaMetadata: + ) -> SchemaMetadataClass: """ " To construct the schema metadata, it will first sort the schema by the occurrence of attribute names in descending order and truncate the schema by MAX_SCHEMA_SIZE, and then start to construct the @@ -502,7 +500,7 @@ def construct_schema_metadata( canonical_schema.append(field) # create schema metadata object for table - schema_metadata = SchemaMetadata( + schema_metadata = SchemaMetadataClass( schemaName=table_name, platform=f"urn:li:dataPlatform:{self.platform}", version=0, diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py index e40e284d6e0a4..86826ae7bedc0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py @@ -12,8 +12,9 @@ ConfigModel, ConfigurationWarning, ) -from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.configuration.source_common import DatasetSourceConfigMixin from datahub.configuration.validate_field_rename import pydantic_renamed_field +from datahub.emitter.mce_builder import DEFAULT_ENV from datahub.ingestion.api.report import Report from datahub.ingestion.source.bigquery_v2.bigquery_config import ( BigQueryConnectionConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 3d1683100474e..3e2872a4b5caa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -48,7 +48,7 @@ from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI from datahub.ingestion.source.looker.lookml_config import ( - _BASE_PROJECT_NAME, + BASE_PROJECT_NAME, LookMLSourceReport, ) from datahub.ingestion.source.looker.str_functions import remove_suffix @@ -370,7 +370,7 @@ def _form_field_name( assert view_name # for lint false positive project_include: ProjectInclude = ProjectInclude( - project=view_project_map.get(view_name, _BASE_PROJECT_NAME), + project=view_project_map.get(view_name, BASE_PROJECT_NAME), include=view_name, ) @@ -385,7 +385,7 @@ def _form_field_name( view_urn = LookerViewId( project_name=( project_include.project - if project_include.project != _BASE_PROJECT_NAME + if project_include.project != BASE_PROJECT_NAME else explore_project_name ), model_name=model_name, @@ -1113,7 +1113,7 @@ def from_api( # noqa: C901 fields=view_fields, upstream_views=list( ProjectInclude( - project=view_project_map.get(view_name, _BASE_PROJECT_NAME), + project=view_project_map.get(view_name, BASE_PROJECT_NAME), include=view_name, ) for view_name in views @@ -1239,7 +1239,7 @@ def _to_metadata_events( # noqa: C901 view_urn = LookerViewId( project_name=( view_ref.project - if view_ref.project != _BASE_PROJECT_NAME + if view_ref.project != BASE_PROJECT_NAME else self.project_name ), model_name=self.model_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py index 7e23079156b62..327c9ebf99bd2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py @@ -9,8 +9,8 @@ load_and_preprocess_file, ) from datahub.ingestion.source.looker.lookml_config import ( - _BASE_PROJECT_NAME, - _EXPLORE_FILE_EXTENSION, + BASE_PROJECT_NAME, + EXPLORE_FILE_EXTENSION, LookMLSourceConfig, LookMLSourceReport, ) @@ -69,7 +69,7 @@ def from_looker_dict( explore_files = [ x.include for x in resolved_includes - if x.include.endswith(_EXPLORE_FILE_EXTENSION) + if x.include.endswith(EXPLORE_FILE_EXTENSION) ] for included_file in explore_files: try: @@ -152,9 +152,9 @@ def resolve_includes( # As such, we try to handle it but are as defensive as possible. non_base_project_name = project_name - if project_name == _BASE_PROJECT_NAME and root_project_name is not None: + if project_name == BASE_PROJECT_NAME and root_project_name is not None: non_base_project_name = root_project_name - if non_base_project_name != _BASE_PROJECT_NAME and inc.startswith( + if non_base_project_name != BASE_PROJECT_NAME and inc.startswith( f"/{non_base_project_name}/" ): # This might be a local include. Let's make sure that '/{project_name}' doesn't diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index f894c96debc54..9fac0b52fde0d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -9,8 +9,8 @@ load_and_preprocess_file, ) from datahub.ingestion.source.looker.lookml_config import ( - _EXPLORE_FILE_EXTENSION, - _VIEW_FILE_EXTENSION, + EXPLORE_FILE_EXTENSION, + VIEW_FILE_EXTENSION, LookMLSourceConfig, LookMLSourceReport, ) @@ -42,7 +42,7 @@ def _load_viewfile( ) -> Optional[LookerViewFile]: # always fully resolve paths to simplify de-dup path = str(pathlib.Path(path).resolve()) - allowed_extensions = [_VIEW_FILE_EXTENSION, _EXPLORE_FILE_EXTENSION] + allowed_extensions = [VIEW_FILE_EXTENSION, EXPLORE_FILE_EXTENSION] matched_any_extension = [ match for match in [path.endswith(x) for x in allowed_extensions] if match ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py index 6a623e1e97b5d..ef7d64e4f42d4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py @@ -14,7 +14,7 @@ from looker_sdk.sdk.api40.models import Dashboard, LookWithQuery -from datahub.emitter.mce_builder import Aspect, AspectAbstract +from datahub.emitter.mce_builder import Aspect from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.source.looker import looker_common from datahub.ingestion.source.looker.looker_common import ( @@ -40,6 +40,7 @@ DashboardUsageStatisticsClass, DashboardUserUsageCountsClass, TimeWindowSizeClass, + _Aspect as AspectAbstract, ) logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_view_id_cache.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_view_id_cache.py index aa45bb72d1f46..562c7863b3134 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_view_id_cache.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_view_id_cache.py @@ -6,7 +6,7 @@ from datahub.ingestion.source.looker.looker_dataclasses import LookerModel from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.lookml_config import ( - _BASE_PROJECT_NAME, + BASE_PROJECT_NAME, NAME, LookMLSourceReport, ) @@ -103,7 +103,7 @@ def get_looker_view_id( current_project_name: str = ( include.project - if include.project != _BASE_PROJECT_NAME + if include.project != BASE_PROJECT_NAME else self.project_name ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index da837da161386..7ffb895349ed2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -33,17 +33,11 @@ NAME: str = "name" -_BASE_PROJECT_NAME = "__BASE" +BASE_PROJECT_NAME = "__BASE" -_EXPLORE_FILE_EXTENSION = ".explore.lkml" - -_VIEW_FILE_EXTENSION = ".view.lkml" - -_MODEL_FILE_EXTENSION = ".model.lkml" - -VIEW_LANGUAGE_LOOKML: str = "lookml" - -VIEW_LANGUAGE_SQL: str = "sql" +EXPLORE_FILE_EXTENSION = ".explore.lkml" +VIEW_FILE_EXTENSION = ".view.lkml" +MODEL_FILE_EXTENSION = ".model.lkml" DERIVED_VIEW_SUFFIX = r".sql_table_name" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_refinement.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_refinement.py index 892ed79754a1c..6933d9d69394b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_refinement.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_refinement.py @@ -5,7 +5,7 @@ from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition from datahub.ingestion.source.looker.looker_dataclasses import LookerModel -from datahub.ingestion.source.looker.looker_view_id_cache import LookerViewFileLoader +from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.lookml_config import ( NAME, LookMLSourceConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index d258570ec384f..3c83b8728aa6f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -57,10 +57,8 @@ LookerViewContext, ) from datahub.ingestion.source.looker.lookml_config import ( - _BASE_PROJECT_NAME, - _MODEL_FILE_EXTENSION, - VIEW_LANGUAGE_LOOKML, - VIEW_LANGUAGE_SQL, + BASE_PROJECT_NAME, + MODEL_FILE_EXTENSION, LookerConnectionDefinition, LookMLSourceConfig, LookMLSourceReport, @@ -98,6 +96,9 @@ ) from datahub.sql_parsing.sqlglot_lineage import ColumnRef +VIEW_LANGUAGE_LOOKML: str = "lookml" +VIEW_LANGUAGE_SQL: str = "sql" + logger = logging.getLogger(__name__) @@ -319,7 +320,7 @@ def _load_model(self, path: str) -> LookerModel: looker_model = LookerModel.from_looker_dict( parsed, - _BASE_PROJECT_NAME, + BASE_PROJECT_NAME, self.source_config.project_name, self.base_projects_folder, path, @@ -544,7 +545,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.source_config.base_folder = checkout_dir.resolve() self.base_projects_folder[ - _BASE_PROJECT_NAME + BASE_PROJECT_NAME ] = self.source_config.base_folder visited_projects: Set[str] = set() @@ -576,7 +577,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.base_projects_folder[project] = p_ref self._recursively_check_manifests( - tmp_dir, _BASE_PROJECT_NAME, visited_projects + tmp_dir, BASE_PROJECT_NAME, visited_projects ) yield from self.get_internal_workunits() @@ -607,7 +608,7 @@ def _recursively_check_manifests( return # Special case handling if the root project has a name in the manifest file. - if project_name == _BASE_PROJECT_NAME and manifest.project_name: + if project_name == BASE_PROJECT_NAME and manifest.project_name: if ( self.source_config.project_name is not None and manifest.project_name != self.source_config.project_name @@ -696,7 +697,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 # The ** means "this directory and all subdirectories", and hence should # include all the files we want. model_files = sorted( - self.source_config.base_folder.glob(f"**/*{_MODEL_FILE_EXTENSION}") + self.source_config.base_folder.glob(f"**/*{MODEL_FILE_EXTENSION}") ) model_suffix_len = len(".model") @@ -832,7 +833,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 current_project_name: str = ( include.project - if include.project != _BASE_PROJECT_NAME + if include.project != BASE_PROJECT_NAME else project_name ) @@ -841,7 +842,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 base_folder_path: str = str( self.base_projects_folder.get( current_project_name, - self.base_projects_folder[_BASE_PROJECT_NAME], + self.base_projects_folder[BASE_PROJECT_NAME], ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 057dbca428184..632d0caf71232 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -12,6 +12,7 @@ ViewField, ViewFieldType, ) +from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition from datahub.ingestion.source.looker.looker_view_id_cache import LookerViewIdCache from datahub.ingestion.source.looker.lookml_concept_context import ( LookerFieldContext, @@ -20,7 +21,6 @@ from datahub.ingestion.source.looker.lookml_config import ( DERIVED_VIEW_SUFFIX, NAME, - LookerConnectionDefinition, LookMLSourceConfig, LookMLSourceReport, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index c87b025f13b55..bbc4897d227ba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -50,25 +50,23 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, BytesTypeClass, + DataPlatformInstanceClass, + DatasetPropertiesClass, NullTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, SchemalessClass, - SchemaMetadata, + SchemaMetadataClass as SchemaMetadata, StringTypeClass, TimeTypeClass, UnionTypeClass, ) -from datahub.metadata.schema_classes import ( - DataPlatformInstanceClass, - DatasetPropertiesClass, -) from datahub.metadata.urns import DatasetUrn logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index 7c8487727c9ee..91fa2e96be2cc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -9,7 +9,7 @@ import datahub.emitter.mce_builder as builder from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.configuration.source_common import DatasetSourceConfigMixin from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.common.subtypes import BIAssetSubTypes from datahub.ingestion.source.state.stale_entity_removal_handler import ( @@ -240,7 +240,7 @@ class PlatformDetail(ConfigModel): "recipe of other datahub sources.", ) env: str = pydantic.Field( - default=DEFAULT_ENV, + default=builder.DEFAULT_ENV, description="The environment that all assets produced by DataHub platform ingestion source belong to", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index 5fd63e7f93f92..581e32d29dcea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -41,7 +41,7 @@ ) from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.perf_timer import PerfTimer -from datahub.utilities.sql_parser import SQLParser +from datahub.utilities.sql_parser_base import SQLParser from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py index de0904107b9bb..66962b5d96d38 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py @@ -13,12 +13,9 @@ from urllib3.util.retry import Retry from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.source_common import ( - DEFAULT_ENV, - DatasetSourceConfigMixin, - EnvConfigMixin, -) +from datahub.configuration.source_common import DatasetSourceConfigMixin, EnvConfigMixin from datahub.emitter.mce_builder import ( + DEFAULT_ENV, dataset_urn_to_key, make_dashboard_urn, make_data_platform_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/csv_tsv.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/csv_tsv.py index 54f7dfb5b903c..ab7b887cba1d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/csv_tsv.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/csv_tsv.py @@ -3,15 +3,15 @@ from tableschema import Table from datahub.ingestion.source.schema_inference.base import SchemaInferenceBase -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, DateTypeClass, NullTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, StringTypeClass, TimeTypeClass, UnionTypeClass, diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/json.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/json.py index 1f2c73a2522d0..1659aaf6fa202 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/json.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/json.py @@ -7,14 +7,14 @@ from datahub.ingestion.source.schema_inference.base import SchemaInferenceBase from datahub.ingestion.source.schema_inference.object import construct_schema -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, NullTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, StringTypeClass, UnionTypeClass, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/parquet.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/parquet.py index 1f3f2e0a1e8a8..efc605e0df8ca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/parquet.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/parquet.py @@ -4,7 +4,7 @@ import pyarrow.parquet from datahub.ingestion.source.schema_inference.base import SchemaInferenceBase -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( +from datahub.metadata.schema_classes import ( ArrayTypeClass, BooleanTypeClass, BytesTypeClass, @@ -12,8 +12,8 @@ NullTypeClass, NumberTypeClass, RecordTypeClass, - SchemaField, - SchemaFieldDataType, + SchemaFieldClass as SchemaField, + SchemaFieldDataTypeClass as SchemaFieldDataType, StringTypeClass, TimeTypeClass, UnionTypeClass, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 6f9c9259b2784..ac47abf487449 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -31,14 +31,16 @@ ) from datahub.metadata.schema_classes import DatasetLineageTypeClass, UpstreamClass from datahub.sql_parsing.sql_parsing_aggregator import ( - ColumnLineageInfo, - ColumnRef, KnownLineageMapping, KnownQueryLineageInfo, SqlParsingAggregator, UrnStr, ) -from datahub.sql_parsing.sqlglot_lineage import DownstreamColumnRef +from datahub.sql_parsing.sqlglot_lineage import ( + ColumnLineageInfo, + ColumnRef, + DownstreamColumnRef, +) from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.time import ts_millis_to_datetime diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index dd7f73268fdc4..538841018067e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -48,11 +48,9 @@ SnowflakeQueriesExtractor, SnowflakeQueriesExtractorConfig, ) +from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report -from datahub.ingestion.source.snowflake.snowflake_schema import ( - SnowflakeDataDictionary, - SnowflakeQuery, -) +from datahub.ingestion.source.snowflake.snowflake_schema import SnowflakeDataDictionary from datahub.ingestion.source.snowflake.snowflake_schema_gen import ( SnowflakeSchemaGenerator, ) diff --git a/metadata-ingestion/src/datahub/specific/dataset.py b/metadata-ingestion/src/datahub/specific/dataset.py index 9dd2616078f08..b171dc4cc2939 100644 --- a/metadata-ingestion/src/datahub/specific/dataset.py +++ b/metadata-ingestion/src/datahub/specific/dataset.py @@ -13,7 +13,7 @@ KafkaAuditHeaderClass, OwnerClass as Owner, OwnershipTypeClass, - SchemaMetadataClass as SchemaMetadata, + SchemaMetadataClass, SystemMetadataClass, TagAssociationClass as Tag, UpstreamClass as Upstream, @@ -40,7 +40,7 @@ def __init__( self.aspect_name = ( EditableSchemaMetadata.ASPECT_NAME if editable - else SchemaMetadata.ASPECT_NAME + else SchemaMetadataClass.ASPECT_NAME ) self.aspect_field = "editableSchemaFieldInfo" if editable else "schemaFieldInfo" diff --git a/metadata-ingestion/src/datahub/sql_parsing/_models.py b/metadata-ingestion/src/datahub/sql_parsing/_models.py index d92d178b81cf4..d586e7d6d9045 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/_models.py +++ b/metadata-ingestion/src/datahub/sql_parsing/_models.py @@ -42,6 +42,8 @@ def __lt__(self, other: "_FrozenModel") -> bool: class _TableName(_FrozenModel): + # TODO: Move this into the schema_resolver.py file. + database: Optional[str] = None db_schema: Optional[str] = None table: str diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py index e7b0527d30d97..e3f2fbc786b43 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py +++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py @@ -13,7 +13,7 @@ from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass from datahub.metadata.urns import DataPlatformUrn -from datahub.sql_parsing._models import _TableName +from datahub.sql_parsing._models import _TableName as _TableName # noqa: I250 from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py index 72b5f6c5e26e4..13be45ec1be28 100644 --- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py +++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py @@ -6,12 +6,8 @@ import deepdiff from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier -from datahub.sql_parsing.schema_resolver import SchemaResolver -from datahub.sql_parsing.sqlglot_lineage import ( - SchemaInfo, - SqlParsingResult, - sqlglot_lineage, -) +from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver +from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/utilities/urns/corp_group_urn.py b/metadata-ingestion/src/datahub/utilities/urns/corp_group_urn.py index 37c1076925945..577f90215a635 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/corp_group_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/corp_group_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import CorpGroupUrn # noqa: F401 +from datahub.metadata.urns import CorpGroupUrn + +__all__ = ["CorpGroupUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/corpuser_urn.py b/metadata-ingestion/src/datahub/utilities/urns/corpuser_urn.py index 5f9ecf65951b9..8acb86be00f6c 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/corpuser_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/corpuser_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import CorpUserUrn as CorpuserUrn # noqa: F401 +from datahub.metadata.urns import CorpUserUrn as CorpuserUrn + +__all__ = ["CorpuserUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/data_flow_urn.py b/metadata-ingestion/src/datahub/utilities/urns/data_flow_urn.py index 5b2b45927c339..3508ae5c4a349 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/data_flow_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/data_flow_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DataFlowUrn # noqa: F401 +from datahub.metadata.urns import DataFlowUrn + +__all__ = ["DataFlowUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/data_job_urn.py b/metadata-ingestion/src/datahub/utilities/urns/data_job_urn.py index 53e3419ee7ecb..d003b6c6ad7a8 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/data_job_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/data_job_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DataJobUrn # noqa: F401 +from datahub.metadata.urns import DataJobUrn + +__all__ = ["DataJobUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/data_platform_urn.py b/metadata-ingestion/src/datahub/utilities/urns/data_platform_urn.py index 9d37e38f256e7..51e013e715d4f 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/data_platform_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/data_platform_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DataPlatformUrn # noqa: F401 +from datahub.metadata.urns import DataPlatformUrn + +__all__ = ["DataPlatformUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/data_process_instance_urn.py b/metadata-ingestion/src/datahub/utilities/urns/data_process_instance_urn.py index df6ba797d069c..22e6b36c5f7ae 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/data_process_instance_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/data_process_instance_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DataProcessInstanceUrn # noqa: F401 +from datahub.metadata.urns import DataProcessInstanceUrn + +__all__ = ["DataProcessInstanceUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py b/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py index 6078ffefc03d8..1652e17059995 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DatasetUrn # noqa: F401 +from datahub.metadata.urns import DatasetUrn + +__all__ = ["DatasetUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/domain_urn.py b/metadata-ingestion/src/datahub/utilities/urns/domain_urn.py index 442a6b27729bb..242a3d8228320 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/domain_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/domain_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import DomainUrn # noqa: F401 +from datahub.metadata.urns import DomainUrn + +__all__ = ["DomainUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/notebook_urn.py b/metadata-ingestion/src/datahub/utilities/urns/notebook_urn.py index 60a4f5396aa46..f9b861d7f0852 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/notebook_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/notebook_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import NotebookUrn # noqa: F401 +from datahub.metadata.urns import NotebookUrn + +__all__ = ["NotebookUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/structured_properties_urn.py b/metadata-ingestion/src/datahub/utilities/urns/structured_properties_urn.py index 5bd36a0656d99..6774978c7a76d 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/structured_properties_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/structured_properties_urn.py @@ -1,4 +1,6 @@ -from datahub.metadata.urns import StructuredPropertyUrn # noqa: F401 +from datahub.metadata.urns import StructuredPropertyUrn + +__all__ = ["StructuredPropertyUrn", "make_structured_property_urn"] def make_structured_property_urn(structured_property_id: str) -> str: diff --git a/metadata-ingestion/src/datahub/utilities/urns/tag_urn.py b/metadata-ingestion/src/datahub/utilities/urns/tag_urn.py index 0ac632ee40a01..f66d56a745a96 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/tag_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/tag_urn.py @@ -1 +1,3 @@ -from datahub.metadata.urns import TagUrn # noqa: F401 +from datahub.metadata.urns import TagUrn + +__all__ = ["TagUrn"] diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn.py b/metadata-ingestion/src/datahub/utilities/urns/urn.py index 2e5cebfd0e8f5..2ded2d4d9b32c 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/urn.py @@ -1,4 +1,6 @@ -from datahub.metadata.urns import Urn # noqa: F401 +from datahub.metadata.urns import Urn + +__all__ = ["Urn", "guess_entity_type"] def guess_entity_type(urn: str) -> str: diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 94b3b103d0548..a4cfbd5eadb7f 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -12,16 +12,14 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.file import read_metadata_file +from datahub.ingestion.source.looker.looker_dataclasses import LookerModel from datahub.ingestion.source.looker.looker_template_language import ( SpecialVariable, load_and_preprocess_file, resolve_liquid_variable, ) -from datahub.ingestion.source.looker.lookml_source import ( - LookerModel, - LookerRefinementResolver, - LookMLSourceConfig, -) +from datahub.ingestion.source.looker.lookml_config import LookMLSourceConfig +from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver from datahub.metadata.schema_classes import ( DatasetSnapshotClass, MetadataChangeEventClass, diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index edfc41616e44b..62f8f6a654b58 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -19,8 +19,7 @@ ) from tableauserverclient.models.reference_item import ResourceReference -from datahub.configuration.source_common import DEFAULT_ENV -from datahub.emitter.mce_builder import make_schema_field_urn +from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.run.pipeline import Pipeline, PipelineContext from datahub.ingestion.source.tableau.tableau import ( diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index 20aec975787e4..d0e943bbe63da 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -4,10 +4,10 @@ import pytest -from datahub.testing.docker_utils import ( # noqa: F401 - docker_compose_runner, - is_responsive, - wait_for_port, +from datahub.testing.docker_utils import ( # noqa: F401,I250 + docker_compose_runner as docker_compose_runner, + is_responsive as is_responsive, + wait_for_port as wait_for_port, ) logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py index 3b59481d8cb02..f4c629df7dba4 100644 --- a/metadata-ingestion/tests/test_helpers/mce_helpers.py +++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py @@ -17,15 +17,16 @@ Union, ) +import pytest + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.sink.file import write_metadata_file from datahub.metadata.schema_classes import MetadataChangeEventClass +from datahub.metadata.urns import Urn from datahub.testing.compare_metadata_json import ( assert_metadata_files_equal, load_json_file, ) -from datahub.utilities.urns.urn import Urn -from tests.test_helpers.type_helpers import PytestConfig logger = logging.getLogger(__name__) @@ -77,7 +78,7 @@ def clean_nones(value): def check_golden_file( - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, output_path: Union[str, os.PathLike], golden_path: Union[str, os.PathLike], ignore_paths: Sequence[str] = (), @@ -98,7 +99,7 @@ def check_golden_file( def check_goldens_stream( - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, outputs: List, golden_path: Union[str, os.PathLike], ignore_paths: Sequence[str] = (), diff --git a/metadata-ingestion/tests/test_helpers/type_helpers.py b/metadata-ingestion/tests/test_helpers/type_helpers.py index 154960bbf7fc4..3a2215ed81ca9 100644 --- a/metadata-ingestion/tests/test_helpers/type_helpers.py +++ b/metadata-ingestion/tests/test_helpers/type_helpers.py @@ -1,12 +1,5 @@ from typing import Optional, TypeVar -# The current PytestConfig solution is somewhat ugly and not ideal. -# However, it is currently the best solution available, as the type itself is not -# exported: https://docs.pytest.org/en/stable/reference.html#config. -# As pytest's type support improves, this will likely change. -# TODO: revisit pytestconfig as https://github.com/pytest-dev/pytest/issues/7469 progresses. -from _pytest.config import Config as PytestConfig # noqa: F401 - _T = TypeVar("_T") diff --git a/metadata-ingestion/tests/unit/glue/test_glue_source.py b/metadata-ingestion/tests/unit/glue/test_glue_source.py index 4df0c6d17b06c..693fd6bc336fd 100644 --- a/metadata-ingestion/tests/unit/glue/test_glue_source.py +++ b/metadata-ingestion/tests/unit/glue/test_glue_source.py @@ -34,7 +34,6 @@ run_and_get_pipeline, validate_all_providers_have_committed_successfully, ) -from tests.test_helpers.type_helpers import PytestConfig from tests.unit.glue.test_glue_source_stubs import ( databases_1, databases_2, @@ -174,7 +173,7 @@ def test_column_type(hive_column_type: str, expected_type: Type) -> None: @freeze_time(FROZEN_TIME) def test_glue_ingest( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, platform_instance: str, mce_file: str, mce_golden_file: str, @@ -410,7 +409,7 @@ def test_glue_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): def test_glue_with_delta_schema_ingest( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, ) -> None: glue_source_instance = glue_source( platform_instance="delta_platform_instance", @@ -446,7 +445,7 @@ def test_glue_with_delta_schema_ingest( def test_glue_with_malformed_delta_schema_ingest( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, ) -> None: glue_source_instance = glue_source( platform_instance="delta_platform_instance", @@ -489,7 +488,7 @@ def test_glue_with_malformed_delta_schema_ingest( @freeze_time(FROZEN_TIME) def test_glue_ingest_include_table_lineage( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, mock_datahub_graph_instance: DataHubGraph, platform_instance: str, mce_file: str, @@ -584,7 +583,7 @@ def test_glue_ingest_include_table_lineage( @freeze_time(FROZEN_TIME) def test_glue_ingest_include_column_lineage( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, mock_datahub_graph_instance: DataHubGraph, platform_instance: str, mce_file: str, @@ -684,7 +683,7 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: @freeze_time(FROZEN_TIME) def test_glue_ingest_with_profiling( tmp_path: Path, - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, ) -> None: glue_source_instance = glue_source_with_profiling() mce_file = "glue_mces.json" diff --git a/metadata-ingestion/tests/unit/redshift/test_redshift_source.py b/metadata-ingestion/tests/unit/redshift/test_redshift_source.py index 8198caf50df7f..f016312dfe47f 100644 --- a/metadata-ingestion/tests/unit/redshift/test_redshift_source.py +++ b/metadata-ingestion/tests/unit/redshift/test_redshift_source.py @@ -1,15 +1,15 @@ from typing import Iterable -from datahub.emitter.mcp import ( - MetadataChangeProposalClass, - MetadataChangeProposalWrapper, -) +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.redshift import RedshiftSource from datahub.ingestion.source.redshift.redshift_schema import RedshiftTable -from datahub.metadata.schema_classes import MetadataChangeEventClass +from datahub.metadata.schema_classes import ( + MetadataChangeEventClass, + MetadataChangeProposalClass, +) def redshift_source_setup(custom_props_flag: bool) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py index 727f2b10511b5..a131ac9ce2a1b 100644 --- a/metadata-ingestion/tests/unit/serde/test_serde.py +++ b/metadata-ingestion/tests/unit/serde/test_serde.py @@ -19,7 +19,6 @@ from datahub.metadata.schemas import getMetadataChangeEventSchema from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd -from tests.test_helpers.type_helpers import PytestConfig FROZEN_TIME = "2021-07-22 18:54:06" @@ -41,7 +40,7 @@ ], ) def test_serde_to_json( - pytestconfig: PytestConfig, tmp_path: pathlib.Path, json_filename: str + pytestconfig: pytest.Config, tmp_path: pathlib.Path, json_filename: str ) -> None: golden_file = pytestconfig.rootpath / json_filename output_file = tmp_path / "output.json" @@ -73,7 +72,7 @@ def test_serde_to_json( ) @freeze_time(FROZEN_TIME) def test_serde_to_avro( - pytestconfig: PytestConfig, + pytestconfig: pytest.Config, json_filename: str, ) -> None: # In this test, we want to read in from JSON -> MCE object. @@ -126,14 +125,14 @@ def test_serde_to_avro( ], ) @freeze_time(FROZEN_TIME) -def test_check_metadata_schema(pytestconfig: PytestConfig, json_filename: str) -> None: +def test_check_metadata_schema(pytestconfig: pytest.Config, json_filename: str) -> None: json_file_path = pytestconfig.rootpath / json_filename run_datahub_cmd(["check", "metadata-file", f"{json_file_path}"]) def test_check_metadata_rewrite( - pytestconfig: PytestConfig, tmp_path: pathlib.Path + pytestconfig: pytest.Config, tmp_path: pathlib.Path ) -> None: json_input = ( pytestconfig.rootpath / "tests/unit/serde/test_canonicalization_input.json" @@ -161,7 +160,7 @@ def test_check_metadata_rewrite( ], ) def test_check_mce_schema_failure( - pytestconfig: PytestConfig, json_filename: str + pytestconfig: pytest.Config, json_filename: str ) -> None: json_file_path = pytestconfig.rootpath / json_filename diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py index 744d43373a0a1..4e8ba8aa6b777 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py @@ -4,11 +4,9 @@ import pytest import sqlglot +from datahub.sql_parsing.query_types import get_query_type_of_sql from datahub.sql_parsing.sql_parsing_common import QueryType -from datahub.sql_parsing.sqlglot_lineage import ( - _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT, - get_query_type_of_sql, -) +from datahub.sql_parsing.sqlglot_lineage import _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT from datahub.sql_parsing.sqlglot_utils import ( generalize_query, generalize_query_fast, diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py index 66564dc856aba..96ab8f7a01a38 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py @@ -10,7 +10,8 @@ from datahub.api.entities.dataprocess.dataprocess_instance import DataProcessInstance from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.emitter.mce_builder import DEFAULT_ENV from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport