Skip to content

Commit 1a47a51

Browse files
authored
fix(ingest/build): Fix sagemaker mypy and flake8 issues (#8530)
1 parent de1f23d commit 1a47a51

File tree

10 files changed

+25
-21
lines changed

10 files changed

+25
-21
lines changed

metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from mypy_boto3_sagemaker import SageMakerClient
2525
from mypy_boto3_sagemaker.type_defs import (
2626
DescribeFeatureGroupResponseTypeDef,
27-
FeatureDefinitionOutputTypeDef,
27+
FeatureDefinitionTypeDef,
2828
FeatureGroupSummaryTypeDef,
2929
)
3030

@@ -147,7 +147,7 @@ def get_feature_type(self, aws_type: str, feature_name: str) -> str:
147147
def get_feature_wu(
148148
self,
149149
feature_group_details: "DescribeFeatureGroupResponseTypeDef",
150-
feature: "FeatureDefinitionOutputTypeDef",
150+
feature: "FeatureDefinitionTypeDef",
151151
) -> MetadataWorkUnit:
152152
"""
153153
Generate an MLFeature workunit for a SageMaker feature.

metadata-ingestion/src/datahub/utilities/mapping.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,11 @@ def sanitize_owner_ids(self, owner_id: str) -> str:
252252

253253
def get_match(self, match_clause: Any, raw_props_value: Any) -> Optional[Match]:
254254
# function to check if a match clause is satisfied to a value.
255-
if type(raw_props_value) not in Constants.OPERAND_DATATYPE_SUPPORTED or type(
256-
raw_props_value
257-
) != type(match_clause):
255+
if not any(
256+
isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
257+
) or not isinstance(raw_props_value, type(match_clause)):
258258
return None
259-
elif type(raw_props_value) == str:
259+
elif isinstance(raw_props_value, str):
260260
return re.match(match_clause, raw_props_value)
261261
else:
262262
return re.match(str(match_clause), str(raw_props_value))

metadata-ingestion/src/datahub_provider/_plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def get_inlets_from_task(task: BaseOperator, context: Any) -> Iterable[Any]:
107107
]
108108

109109
for inlet in task_inlets:
110-
if type(inlet) != str:
110+
if isinstance(inlet, str):
111111
inlets.append(inlet)
112112

113113
return inlets

metadata-ingestion/src/datahub_provider/operators/datahub_assertion_operator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ def execute(self, context: Any) -> bool:
6262
return True
6363

6464
self.log.info(f"Checking if dataset {self.urn} is ready to be consumed")
65-
if type(self.urn) == str:
65+
if isinstance(self.urn, str):
6666
urns = [self.urn]
67-
elif type(self.urn) == list:
67+
elif isinstance(self.urn, list):
6868
urns = self.urn
6969
else:
7070
raise Exception(f"urn parameter has invalid type {type(self.urn)}")

metadata-ingestion/src/datahub_provider/operators/datahub_assertion_sensor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ def poke(self, context: Any) -> bool:
6161
return True
6262

6363
self.log.info(f"Checking if dataset {self.urn} is ready to be consumed")
64-
if type(self.urn) == str:
64+
if isinstance(self.urn, str):
6565
urns = [self.urn]
66-
elif type(self.urn) == list:
66+
elif isinstance(self.urn, list):
6767
urns = self.urn
6868
else:
6969
raise Exception(f"urn parameter has invalid type {type(self.urn)}")

metadata-ingestion/src/datahub_provider/operators/datahub_operation_operator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ def execute(self, context: Any) -> bool:
7676
return True
7777

7878
self.log.info(f"Checking if dataset {self.urn} is ready to be consumed")
79-
if type(self.urn) == str:
79+
if isinstance(self.urn, str):
8080
urns = [self.urn]
81-
elif type(self.urn) == list:
81+
elif isinstance(self.urn, list):
8282
urns = self.urn
8383
else:
8484
raise Exception(f"urn parameter has invalid type {type(self.urn)}")

metadata-ingestion/src/datahub_provider/operators/datahub_operation_sensor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ def poke(self, context: Any) -> bool:
7878
return True
7979

8080
self.log.info(f"Checking if dataset {self.urn} is ready to be consumed")
81-
if type(self.urn) == str:
81+
if isinstance(self.urn, str):
8282
urns = [self.urn]
83-
elif type(self.urn) == list:
83+
elif isinstance(self.urn, list):
8484
urns = self.urn
8585
else:
8686
raise Exception(f"urn parameter has invalid type {type(self.urn)}")

metadata-ingestion/tests/unit/test_glue_source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def test_column_type(hive_column_type: str, expected_type: Type) -> None:
8989
)
9090
schema_fields = avro_schema_to_mce_fields(json.dumps(avro_schema))
9191
actual_schema_field_type = schema_fields[0].type
92-
assert type(actual_schema_field_type.type) == expected_type
92+
assert isinstance(actual_schema_field_type.type, expected_type)
9393

9494

9595
@pytest.mark.parametrize(

metadata-ingestion/tests/unit/test_kafka_source.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ def test_kafka_source_workunits_with_platform_instance(mock_kafka, mock_admin_cl
132132

133133
# DataPlatform aspect should be present when platform_instance is configured
134134
data_platform_aspects = [
135-
asp for asp in proposed_snap.aspects if type(asp) == DataPlatformInstanceClass
135+
asp
136+
for asp in proposed_snap.aspects
137+
if isinstance(asp, DataPlatformInstanceClass)
136138
]
137139
assert len(data_platform_aspects) == 1
138140
assert data_platform_aspects[0].instance == make_dataplatform_instance_urn(
@@ -141,7 +143,7 @@ def test_kafka_source_workunits_with_platform_instance(mock_kafka, mock_admin_cl
141143

142144
# The default browse path should include the platform_instance value
143145
browse_path_aspects = [
144-
asp for asp in proposed_snap.aspects if type(asp) == BrowsePathsClass
146+
asp for asp in proposed_snap.aspects if isinstance(asp, BrowsePathsClass)
145147
]
146148
assert len(browse_path_aspects) == 1
147149
assert f"/prod/{PLATFORM}/{PLATFORM_INSTANCE}" in browse_path_aspects[0].paths
@@ -177,13 +179,15 @@ def test_kafka_source_workunits_no_platform_instance(mock_kafka, mock_admin_clie
177179

178180
# DataPlatform aspect should not be present when platform_instance is not configured
179181
data_platform_aspects = [
180-
asp for asp in proposed_snap.aspects if type(asp) == DataPlatformInstanceClass
182+
asp
183+
for asp in proposed_snap.aspects
184+
if isinstance(asp, DataPlatformInstanceClass)
181185
]
182186
assert len(data_platform_aspects) == 0
183187

184188
# The default browse path should include the platform_instance value
185189
browse_path_aspects = [
186-
asp for asp in proposed_snap.aspects if type(asp) == BrowsePathsClass
190+
asp for asp in proposed_snap.aspects if isinstance(asp, BrowsePathsClass)
187191
]
188192
assert len(browse_path_aspects) == 1
189193
assert f"/prod/{PLATFORM}" in browse_path_aspects[0].paths

metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_shared_connection() -> None:
256256
iterator = cache2.sql_query_iterator(
257257
f"SELECT y, sum(x) FROM {cache2.tablename} GROUP BY y ORDER BY y"
258258
)
259-
assert type(iterator) == sqlite3.Cursor
259+
assert isinstance(iterator, sqlite3.Cursor)
260260
assert [tuple(r) for r in iterator] == [("a", 15), ("b", 11)]
261261

262262
# Test joining between the two tables.

0 commit comments

Comments
 (0)