diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 0f26a649efe991..75d9634d123a9e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -410,18 +410,41 @@ def _get_columns_to_profile(self) -> List[str]: if not self.config.any_field_level_metrics_enabled(): return [] + # Get columns to ignore due to tags + ( + ignore_table_profiling, + columns_list_to_ignore_profiling, + ) = _get_columns_to_ignore_profiling( + self.dataset_name, + self.config.tags_to_ignore_profiling, + self.platform, + self.env, + ) + + # If the entire table is tagged to ignore profiling, return empty list + if ignore_table_profiling: + self.report.report_dropped( + f"The profile of table {self.dataset_name} (table is tagged with tags_to_ignore_profiling)" + ) + return [] + # Compute columns to profile columns_to_profile: List[str] = [] # Compute ignored columns ignored_columns_by_pattern: List[str] = [] ignored_columns_by_type: List[str] = [] + ignored_columns_by_tags: List[str] = [] for col_dict in self.dataset.columns: col = col_dict["name"] self.column_types[col] = str(col_dict["type"]) + + # Check if column is tagged to ignore profiling + if col in columns_list_to_ignore_profiling: + ignored_columns_by_tags.append(col) # We expect the allow/deny patterns to specify '.' - if ( + elif ( not self.config._allow_deny_patterns.allowed( f"{self.dataset_name}.{col}" ) @@ -442,6 +465,10 @@ def _get_columns_to_profile(self) -> List[str]: self.report.report_dropped( f"The profile of columns by type {self.dataset_name}({', '.join(sorted(ignored_columns_by_type))})" ) + if ignored_columns_by_tags: + self.report.report_dropped( + f"The profile of columns by tags {self.dataset_name}({', '.join(sorted(ignored_columns_by_tags))})" + ) if self.config.max_number_of_fields_to_profile is not None: if len(columns_to_profile) > self.config.max_number_of_fields_to_profile: @@ -1696,3 +1723,55 @@ def _get_columns_to_ignore_sampling( ) return ignore_table, columns_to_ignore + + +def _get_columns_to_ignore_profiling( + dataset_name: str, tags_to_ignore: Optional[List[str]], platform: str, env: str +) -> Tuple[bool, List[str]]: + logger.debug("Collecting columns to ignore for profiling") + + ignore_table: bool = False + columns_to_ignore: List[str] = [] + + if not tags_to_ignore: + return ignore_table, columns_to_ignore + + try: + dataset_urn = mce_builder.make_dataset_urn( + name=dataset_name, platform=platform, env=env + ) + + datahub_graph = get_default_graph(ClientMode.INGESTION) + + dataset_tags = datahub_graph.get_tags(dataset_urn) + if dataset_tags: + ignore_table = any( + tag_association.tag.split("urn:li:tag:")[1] in tags_to_ignore + for tag_association in dataset_tags.tags + if "urn:li:tag:" in tag_association.tag + and len(tag_association.tag.split("urn:li:tag:")) > 1 + ) + + if not ignore_table: + metadata = datahub_graph.get_aspect( + entity_urn=dataset_urn, aspect_type=EditableSchemaMetadata + ) + + if metadata: + for schemaField in metadata.editableSchemaFieldInfo: + if schemaField.globalTags: + columns_to_ignore.extend( + schemaField.fieldPath + for tag_association in schemaField.globalTags.tags + if "urn:li:tag:" in tag_association.tag + and len(tag_association.tag.split("urn:li:tag:")) > 1 + and tag_association.tag.split("urn:li:tag:")[1] + in tags_to_ignore + ) + + except Exception as e: + logger.warning(f"Error fetching tags for profiling ignore logic: {e}") + # Return default values on error - don't ignore anything + return False, [] + + return ignore_table, columns_to_ignore diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py index 420da1201906cf..06ca2f4d6b37ee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py @@ -207,6 +207,15 @@ class GEProfilingConfig(GEProfilingBaseConfig): ), ) + tags_to_ignore_profiling: Optional[List[str]] = pydantic.Field( + default=None, + description=( + "Fixed list of tags to completely ignore profiling for columns with these tags." + " Columns with these tags will be excluded from all profiling activities." + " If not specified, all columns will be profiled based on other configuration." + ), + ) + profile_nested_fields: bool = Field( default=False, description="Whether to profile complex types like structs, arrays and maps. ", diff --git a/metadata-ingestion/tests/unit/ge_profiling/__init__.py b/metadata-ingestion/tests/unit/ge_profiling/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/unit/ge_profiling/test_config.py b/metadata-ingestion/tests/unit/ge_profiling/test_config.py new file mode 100644 index 00000000000000..97b742b7582f41 --- /dev/null +++ b/metadata-ingestion/tests/unit/ge_profiling/test_config.py @@ -0,0 +1,144 @@ +import pytest + +from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig + + +def test_profile_table_level_only(): + config = GEProfilingConfig.model_validate( + {"enabled": True, "profile_table_level_only": True} + ) + assert config.any_field_level_metrics_enabled() is False + + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "profile_table_level_only": True, + "include_field_max_value": False, + } + ) + assert config.any_field_level_metrics_enabled() is False + + +def test_profile_table_level_only_fails_with_field_metric_enabled(): + with pytest.raises( + ValueError, + match="Cannot enable field-level metrics if profile_table_level_only is set", + ): + GEProfilingConfig.model_validate( + { + "enabled": True, + "profile_table_level_only": True, + "include_field_max_value": True, + } + ) + + +def test_tags_to_ignore_profiling_config(): + """Test that tags_to_ignore_profiling configuration is properly parsed.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII", "Sensitive"], + } + ) + assert config.tags_to_ignore_profiling == ["PII", "Sensitive"] + + # Test with None (default) + config = GEProfilingConfig.model_validate({"enabled": True}) + assert config.tags_to_ignore_profiling is None + + +def test_tags_to_ignore_profiling_vs_sampling(): + """Test that both tags_to_ignore_profiling and tags_to_ignore_sampling can be configured.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII", "Sensitive"], + "tags_to_ignore_sampling": ["HighCardinality"], + } + ) + assert config.tags_to_ignore_profiling == ["PII", "Sensitive"] + assert config.tags_to_ignore_sampling == ["HighCardinality"] + + +def test_tags_to_ignore_profiling_empty_list(): + """Test that tags_to_ignore_profiling can be an empty list.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": [], + } + ) + assert config.tags_to_ignore_profiling == [] + + +def test_tags_to_ignore_profiling_single_tag(): + """Test that tags_to_ignore_profiling works with a single tag.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII"], + } + ) + assert config.tags_to_ignore_profiling == ["PII"] + + +def test_tags_to_ignore_profiling_multiple_tags(): + """Test that tags_to_ignore_profiling works with multiple tags.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": [ + "PII", + "Sensitive", + "Confidential", + "Internal", + ], + } + ) + assert config.tags_to_ignore_profiling == [ + "PII", + "Sensitive", + "Confidential", + "Internal", + ] + + +def test_tags_to_ignore_profiling_with_other_config(): + """Test that tags_to_ignore_profiling works alongside other profiling configuration.""" + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII"], + "include_field_null_count": True, + "include_field_distinct_count": False, + "max_number_of_fields_to_profile": 100, + "profile_table_size_limit": 5000000, + } + ) + assert config.tags_to_ignore_profiling == ["PII"] + assert config.include_field_null_count is True + assert config.include_field_distinct_count is False + assert config.max_number_of_fields_to_profile == 100 + assert config.profile_table_size_limit == 5000000 + + +def test_tags_to_ignore_profiling_validation(): + """Test validation of tags_to_ignore_profiling field.""" + # Test that non-list values are rejected + with pytest.raises(ValueError): + GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": "PII", # Should be a list, not string + } + ) + + # Test that non-string list items are rejected + with pytest.raises(ValueError): + GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII", 123], # 123 is not a string + } + ) diff --git a/metadata-ingestion/tests/unit/ge_profiling/test_profiler_execution.py b/metadata-ingestion/tests/unit/ge_profiling/test_profiler_execution.py new file mode 100644 index 00000000000000..b85eb1ced9ecc6 --- /dev/null +++ b/metadata-ingestion/tests/unit/ge_profiling/test_profiler_execution.py @@ -0,0 +1,447 @@ +""" +Tests for GE profiler execution functionality, specifically testing the tags_to_ignore_profiling feature. + +These tests focus on the actual execution logic of the profiler, including: +- Column filtering based on tags +- Integration with DataHub graph client +- Profiler workflow with tagged columns +""" + +from unittest.mock import Mock, patch + +from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig +from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata +from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass + + +class TestGetColumnsToIgnoreProfiling: + """Test the _get_columns_to_ignore_profiling function.""" + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_no_tags_configured(self, mock_get_graph): + """Test _get_columns_to_ignore_profiling with no tags configured.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", None, "snowflake", "PROD" + ) + + assert ignore_table is False + assert columns_to_ignore == [] + mock_get_graph.assert_not_called() + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_table_level_tag_ignore(self, mock_get_graph): + """Test _get_columns_to_ignore_profiling with table-level tag that should ignore entire table.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock table-level tags + mock_dataset_tags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:PII")] + ) + mock_graph.get_tags.return_value = mock_dataset_tags + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + assert ignore_table is True + assert columns_to_ignore == [] + mock_graph.get_tags.assert_called_once() + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_column_level_tags_ignore(self, mock_get_graph): + """Test _get_columns_to_ignore_profiling with column-level tags.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock no table-level tags + mock_graph.get_tags.return_value = None + + # Mock column-level tags + mock_schema_field1 = Mock() + mock_schema_field1.fieldPath = "email" + mock_schema_field1.globalTags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:PII")] + ) + + mock_schema_field2 = Mock() + mock_schema_field2.fieldPath = "phone" + mock_schema_field2.globalTags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:Sensitive")] + ) + + mock_schema_field3 = Mock() + mock_schema_field3.fieldPath = "name" + mock_schema_field3.globalTags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:Public")] + ) + + mock_metadata = EditableSchemaMetadata( + editableSchemaFieldInfo=[ + mock_schema_field1, + mock_schema_field2, + mock_schema_field3, + ] + ) + mock_graph.get_aspect.return_value = mock_metadata + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII", "Sensitive"], "snowflake", "PROD" + ) + + assert ignore_table is False + assert set(columns_to_ignore) == {"email", "phone"} + mock_graph.get_tags.assert_called_once() + mock_graph.get_aspect.assert_called_once() + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_no_matching_tags(self, mock_get_graph): + """Test _get_columns_to_ignore_profiling with no matching tags.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock no table-level tags + mock_graph.get_tags.return_value = None + + # Mock column-level tags that don't match + mock_schema_field = Mock() + mock_schema_field.fieldPath = "email" + mock_schema_field.globalTags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:Public")] + ) + + mock_metadata = EditableSchemaMetadata( + editableSchemaFieldInfo=[mock_schema_field] + ) + mock_graph.get_aspect.return_value = mock_metadata + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII", "Sensitive"], "snowflake", "PROD" + ) + + assert ignore_table is False + assert columns_to_ignore == [] + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_mixed_tagged_and_untagged_columns(self, mock_get_graph): + """Test with a mix of tagged and untagged columns.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock no table-level tags + mock_graph.get_tags.return_value = None + + # Mock mix of tagged and untagged columns + mock_schema_field1 = Mock() + mock_schema_field1.fieldPath = "id" + mock_schema_field1.globalTags = None # No tags + + mock_schema_field2 = Mock() + mock_schema_field2.fieldPath = "email" + mock_schema_field2.globalTags = GlobalTagsClass( + tags=[TagAssociationClass(tag="urn:li:tag:PII")] + ) + + mock_schema_field3 = Mock() + mock_schema_field3.fieldPath = "name" + mock_schema_field3.globalTags = None # No tags + + mock_metadata = EditableSchemaMetadata( + editableSchemaFieldInfo=[ + mock_schema_field1, + mock_schema_field2, + mock_schema_field3, + ] + ) + mock_graph.get_aspect.return_value = mock_metadata + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + assert ignore_table is False + assert columns_to_ignore == ["email"] # Only the tagged column + + +class TestSingleDatasetProfilerIntegration: + """Test the integration of tags_to_ignore_profiling with _SingleDatasetProfiler.""" + + @patch("datahub.ingestion.source.ge_data_profiler._get_columns_to_ignore_profiling") + def test_get_columns_to_profile_excludes_tagged_columns( + self, mock_get_columns_to_ignore + ): + """Test that _get_columns_to_profile excludes tagged columns.""" + from datahub.ingestion.source.ge_data_profiler import _SingleDatasetProfiler + + # Mock the function to return tagged columns + mock_get_columns_to_ignore.return_value = (False, ["email", "ssn"]) + + # Create a mock dataset with columns + mock_dataset = Mock() + mock_dataset.columns = [ + {"name": "id", "type": "INTEGER"}, + {"name": "email", "type": "VARCHAR"}, + {"name": "name", "type": "VARCHAR"}, + {"name": "ssn", "type": "VARCHAR"}, + {"name": "age", "type": "INTEGER"}, + ] + + # Create a mock config + mock_config = Mock() + mock_config.tags_to_ignore_profiling = ["PII"] + mock_config.any_field_level_metrics_enabled.return_value = True + mock_config._allow_deny_patterns.allowed.return_value = True + mock_config.profile_nested_fields = True + mock_config.max_number_of_fields_to_profile = None + + # Create a mock report + mock_report = Mock() + + # Create profiler instance + profiler = _SingleDatasetProfiler( + dataset=mock_dataset, + dataset_name="test_dataset", + partition=None, + config=mock_config, + report=mock_report, + custom_sql=None, + query_combiner=Mock(), + platform="snowflake", + env="PROD", + ) + + # Call the method + columns_to_profile = profiler._get_columns_to_profile() + + # Verify tagged columns are excluded + assert set(columns_to_profile) == {"id", "name", "age"} + assert "email" not in columns_to_profile + assert "ssn" not in columns_to_profile + + # Verify the ignore function was called + mock_get_columns_to_ignore.assert_called_once_with( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + # Verify reporting + mock_report.report_dropped.assert_called_once() + call_args = mock_report.report_dropped.call_args[0][0] + assert "columns by tags" in call_args + assert "email" in call_args + assert "ssn" in call_args + + @patch("datahub.ingestion.source.ge_data_profiler._get_columns_to_ignore_profiling") + def test_get_columns_to_profile_ignores_entire_table( + self, mock_get_columns_to_ignore + ): + """Test that _get_columns_to_profile ignores entire table when tagged.""" + from datahub.ingestion.source.ge_data_profiler import _SingleDatasetProfiler + + # Mock the function to return table-level ignore + mock_get_columns_to_ignore.return_value = (True, []) + + # Create a mock dataset with columns + mock_dataset = Mock() + mock_dataset.columns = [ + {"name": "id", "type": "INTEGER"}, + {"name": "name", "type": "VARCHAR"}, + ] + + # Create a mock config + mock_config = Mock() + mock_config.tags_to_ignore_profiling = ["PII"] + mock_config.any_field_level_metrics_enabled.return_value = True + + # Create a mock report + mock_report = Mock() + + # Create profiler instance + profiler = _SingleDatasetProfiler( + dataset=mock_dataset, + dataset_name="test_dataset", + partition=None, + config=mock_config, + report=mock_report, + custom_sql=None, + query_combiner=Mock(), + platform="snowflake", + env="PROD", + ) + + # Call the method + columns_to_profile = profiler._get_columns_to_profile() + + # Verify entire table is ignored + assert columns_to_profile == [] + + # Verify the ignore function was called + mock_get_columns_to_ignore.assert_called_once_with( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + # Verify table-level reporting + mock_report.report_dropped.assert_called_once() + call_args = mock_report.report_dropped.call_args[0][0] + assert "table test_dataset" in call_args + assert "tagged with tags_to_ignore_profiling" in call_args + + @patch("datahub.ingestion.source.ge_data_profiler._get_columns_to_ignore_profiling") + def test_get_columns_to_profile_no_tags_configured( + self, mock_get_columns_to_ignore + ): + """Test that profiler works normally when no tags_to_ignore_profiling is configured.""" + from datahub.ingestion.source.ge_data_profiler import _SingleDatasetProfiler + + # Mock the function to return no ignored columns + mock_get_columns_to_ignore.return_value = (False, []) + + # Create a mock dataset with columns + mock_dataset = Mock() + mock_dataset.columns = [ + {"name": "id", "type": "INTEGER"}, + {"name": "name", "type": "VARCHAR"}, + ] + + # Create a mock config with no tags_to_ignore_profiling + mock_config = Mock() + mock_config.tags_to_ignore_profiling = None + mock_config.any_field_level_metrics_enabled.return_value = True + mock_config._allow_deny_patterns.allowed.return_value = True + mock_config.profile_nested_fields = True + mock_config.max_number_of_fields_to_profile = None + + # Create a mock report + mock_report = Mock() + + # Create profiler instance + profiler = _SingleDatasetProfiler( + dataset=mock_dataset, + dataset_name="test_dataset", + partition=None, + config=mock_config, + report=mock_report, + custom_sql=None, + query_combiner=Mock(), + platform="snowflake", + env="PROD", + ) + + # Call the method + columns_to_profile = profiler._get_columns_to_profile() + + # Verify all columns are included + assert set(columns_to_profile) == {"id", "name"} + + # Verify the ignore function was called with None + mock_get_columns_to_ignore.assert_called_once_with( + "test_dataset", None, "snowflake", "PROD" + ) + + def test_profiler_config_integration(self): + """Test that the profiler can access tags_to_ignore_profiling from config.""" + # Create a real config with tags_to_ignore_profiling + config = GEProfilingConfig.model_validate( + { + "enabled": True, + "tags_to_ignore_profiling": ["PII", "Sensitive"], + "include_field_null_count": True, + } + ) + + # Verify the config field is accessible + assert hasattr(config, "tags_to_ignore_profiling") + assert config.tags_to_ignore_profiling == ["PII", "Sensitive"] + assert config.include_field_null_count is True + + +class TestEdgeCases: + """Test edge cases and error scenarios.""" + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_datahub_graph_error_handling(self, mock_get_graph): + """Test error handling when DataHub graph client fails.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock DataHub graph to raise an exception + mock_graph.get_tags.side_effect = Exception("DataHub connection failed") + + # Function should handle the error gracefully and return no ignored columns + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + # Should default to not ignoring anything when there's an error + assert ignore_table is False + assert columns_to_ignore == [] + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_empty_tags_list(self, mock_get_graph): + """Test with empty tags list.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", [], "snowflake", "PROD" + ) + + # Empty tags list should be treated the same as None + assert ignore_table is False + assert columns_to_ignore == [] + mock_get_graph.assert_not_called() + + @patch("datahub.ingestion.source.ge_data_profiler.get_default_graph") + def test_malformed_tag_urns(self, mock_get_graph): + """Test handling of malformed tag URNs.""" + from datahub.ingestion.source.ge_data_profiler import ( + _get_columns_to_ignore_profiling, + ) + + mock_graph = Mock() + mock_get_graph.return_value = mock_graph + + # Mock table-level tags with malformed URN + mock_dataset_tags = GlobalTagsClass( + tags=[TagAssociationClass(tag="malformed-tag-urn")] + ) + mock_graph.get_tags.return_value = mock_dataset_tags + + # Function should handle malformed URNs gracefully + ignore_table, columns_to_ignore = _get_columns_to_ignore_profiling( + "test_dataset", ["PII"], "snowflake", "PROD" + ) + + # Should not crash and should not match malformed URNs + assert ignore_table is False + assert columns_to_ignore == [] diff --git a/metadata-ingestion/tests/unit/test_ge_profiling_config.py b/metadata-ingestion/tests/unit/test_ge_profiling_config.py deleted file mode 100644 index f4d73a6ffe1e4e..00000000000000 --- a/metadata-ingestion/tests/unit/test_ge_profiling_config.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig - - -def test_profile_table_level_only(): - config = GEProfilingConfig.parse_obj( - {"enabled": True, "profile_table_level_only": True} - ) - assert config.any_field_level_metrics_enabled() is False - - config = GEProfilingConfig.parse_obj( - { - "enabled": True, - "profile_table_level_only": True, - "include_field_max_value": False, - } - ) - assert config.any_field_level_metrics_enabled() is False - - -def test_profile_table_level_only_fails_with_field_metric_enabled(): - with pytest.raises( - ValueError, - match="Cannot enable field-level metrics if profile_table_level_only is set", - ): - GEProfilingConfig.parse_obj( - { - "enabled": True, - "profile_table_level_only": True, - "include_field_max_value": True, - } - )