Skip to content

Commit 5497393

Browse files
authored
fix(ingest/powerbi): patch lineage for powerbi report (#10270)
1 parent 8ed87d6 commit 5497393

16 files changed

+342
-512
lines changed

metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
1-
from typing import Iterable, Optional
1+
from typing import Iterable, Optional, Union
22

33
from pydantic.fields import Field
44

55
from datahub.configuration.common import ConfigModel
66
from datahub.emitter.mce_builder import datahub_guid, set_aspect
77
from datahub.ingestion.api.workunit import MetadataWorkUnit
88
from datahub.metadata.schema_classes import (
9+
ChartInfoClass,
10+
DashboardInfoClass,
911
FineGrainedLineageClass,
1012
MetadataChangeEventClass,
1113
SystemMetadataClass,
1214
UpstreamLineageClass,
1315
)
16+
from datahub.specific.chart import ChartPatchBuilder
17+
from datahub.specific.dashboard import DashboardPatchBuilder
1418
from datahub.specific.dataset import DatasetPatchBuilder
1519

1620

@@ -28,6 +32,62 @@ def convert_upstream_lineage_to_patch(
2832
return MetadataWorkUnit(id=MetadataWorkUnit.generate_workunit_id(mcp), mcp_raw=mcp)
2933

3034

35+
def convert_chart_info_to_patch(
36+
urn: str, aspect: ChartInfoClass, system_metadata: Optional[SystemMetadataClass]
37+
) -> Union[MetadataWorkUnit, None]:
38+
patch_builder = ChartPatchBuilder(urn, system_metadata)
39+
40+
if aspect.customProperties:
41+
for key in aspect.customProperties:
42+
patch_builder.add_custom_property(
43+
key, str(aspect.customProperties.get(key))
44+
)
45+
46+
if aspect.inputEdges:
47+
for inputEdge in aspect.inputEdges:
48+
patch_builder.add_input_edge(inputEdge)
49+
50+
values = patch_builder.build()
51+
52+
if values:
53+
mcp = next(iter(values))
54+
return MetadataWorkUnit(
55+
id=MetadataWorkUnit.generate_workunit_id(mcp), mcp_raw=mcp
56+
)
57+
else:
58+
return None
59+
60+
61+
def convert_dashboard_info_to_patch(
62+
urn: str, aspect: DashboardInfoClass, system_metadata: Optional[SystemMetadataClass]
63+
) -> Union[MetadataWorkUnit, None]:
64+
patch_builder = DashboardPatchBuilder(urn, system_metadata)
65+
66+
if aspect.customProperties:
67+
for key in aspect.customProperties:
68+
patch_builder.add_custom_property(
69+
key, str(aspect.customProperties.get(key))
70+
)
71+
72+
if aspect.datasetEdges:
73+
for datasetEdge in aspect.datasetEdges:
74+
patch_builder.add_dataset_edge(datasetEdge)
75+
76+
if aspect.chartEdges:
77+
for chartEdge in aspect.chartEdges:
78+
patch_builder.add_chart_edge(chartEdge)
79+
80+
values = patch_builder.build()
81+
82+
if values:
83+
mcp = next(iter(values))
84+
return MetadataWorkUnit(
85+
id=MetadataWorkUnit.generate_workunit_id(mcp), mcp_raw=mcp
86+
)
87+
else:
88+
return None
89+
90+
3191
def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
3292
return datahub_guid(
3393
{

metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@
8484
from datahub.metadata.urns import ChartUrn
8585
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
8686
from datahub.utilities.dedup_list import deduplicate_list
87+
from src.datahub.ingestion.api.incremental_lineage_helper import (
88+
convert_dashboard_info_to_patch,
89+
)
8790

8891
# Logger instance
8992
logger = logging.getLogger(__name__)
@@ -1300,17 +1303,36 @@ def get_workspace_workunit(
13001303
# Convert PowerBi Dashboard and child entities to Datahub work unit to ingest into Datahub
13011304
workunits = self.mapper.to_datahub_work_units(dashboard, workspace)
13021305
for workunit in workunits:
1303-
# Return workunit to Datahub Ingestion framework
1304-
yield workunit
1306+
wu = self._get_dashboard_patch_work_unit(workunit)
1307+
if wu is not None:
1308+
yield wu
13051309

13061310
for report in workspace.reports:
13071311
for work_unit in self.mapper.report_to_datahub_work_units(
13081312
report, workspace
13091313
):
1310-
yield work_unit
1314+
wu = self._get_dashboard_patch_work_unit(work_unit)
1315+
if wu is not None:
1316+
yield wu
13111317

13121318
yield from self.extract_independent_datasets(workspace)
13131319

1320+
def _get_dashboard_patch_work_unit(
1321+
self, work_unit: MetadataWorkUnit
1322+
) -> MetadataWorkUnit:
1323+
dashboard_info_aspect: Optional[
1324+
DashboardInfoClass
1325+
] = work_unit.get_aspect_of_type(DashboardInfoClass)
1326+
1327+
if dashboard_info_aspect:
1328+
return convert_dashboard_info_to_patch(
1329+
work_unit.get_urn(),
1330+
dashboard_info_aspect,
1331+
work_unit.metadata.systemMetadata,
1332+
)
1333+
else:
1334+
return work_unit
1335+
13141336
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
13151337
# As modified_workspaces is not idempotent, hence workunit processors are run later for each workspace_id
13161338
# This will result in creating checkpoint for each workspace_id

metadata-ingestion/tests/integration/powerbi/golden_test_admin_access_not_allowed.json

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -294,34 +294,26 @@
294294
{
295295
"entityType": "dashboard",
296296
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
297-
"changeType": "UPSERT",
297+
"changeType": "PATCH",
298298
"aspectName": "dashboardInfo",
299299
"aspect": {
300-
"json": {
301-
"customProperties": {
302-
"chartCount": "2",
303-
"workspaceName": "demo-workspace",
304-
"workspaceId": "64ED5CAD-7C10-4684-8180-826122881108"
300+
"json": [
301+
{
302+
"op": "add",
303+
"path": "/customProperties/chartCount",
304+
"value": "2"
305305
},
306-
"title": "test_dashboard",
307-
"description": "Description of test dashboard",
308-
"charts": [
309-
"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
310-
"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
311-
],
312-
"datasets": [],
313-
"lastModified": {
314-
"created": {
315-
"time": 0,
316-
"actor": "urn:li:corpuser:unknown"
317-
},
318-
"lastModified": {
319-
"time": 0,
320-
"actor": "urn:li:corpuser:unknown"
321-
}
306+
{
307+
"op": "add",
308+
"path": "/customProperties/workspaceName",
309+
"value": "demo-workspace"
322310
},
323-
"dashboardUrl": "https://localhost/dashboards/web/1"
324-
}
311+
{
312+
"op": "add",
313+
"path": "/customProperties/workspaceId",
314+
"value": "64ED5CAD-7C10-4684-8180-826122881108"
315+
}
316+
]
325317
},
326318
"systemMetadata": {
327319
"lastObserved": 1643871600000,

metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json

Lines changed: 16 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,34 +1208,26 @@
12081208
{
12091209
"entityType": "dashboard",
12101210
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
1211-
"changeType": "UPSERT",
1211+
"changeType": "PATCH",
12121212
"aspectName": "dashboardInfo",
12131213
"aspect": {
1214-
"json": {
1215-
"customProperties": {
1216-
"chartCount": "2",
1217-
"workspaceName": "demo-workspace",
1218-
"workspaceId": "64ED5CAD-7C10-4684-8180-826122881108"
1214+
"json": [
1215+
{
1216+
"op": "add",
1217+
"path": "/customProperties/chartCount",
1218+
"value": "2"
12191219
},
1220-
"title": "test_dashboard",
1221-
"description": "",
1222-
"charts": [
1223-
"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
1224-
"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
1225-
],
1226-
"datasets": [],
1227-
"lastModified": {
1228-
"created": {
1229-
"time": 0,
1230-
"actor": "urn:li:corpuser:unknown"
1231-
},
1232-
"lastModified": {
1233-
"time": 0,
1234-
"actor": "urn:li:corpuser:unknown"
1235-
}
1220+
{
1221+
"op": "add",
1222+
"path": "/customProperties/workspaceName",
1223+
"value": "demo-workspace"
12361224
},
1237-
"dashboardUrl": "https://localhost/dashboards/web/1"
1238-
}
1225+
{
1226+
"op": "add",
1227+
"path": "/customProperties/workspaceId",
1228+
"value": "64ED5CAD-7C10-4684-8180-826122881108"
1229+
}
1230+
]
12391231
},
12401232
"systemMetadata": {
12411233
"lastObserved": 1643871600000,
@@ -1959,37 +1951,6 @@
19591951
"lastRunId": "no-run-id-provided"
19601952
}
19611953
},
1962-
{
1963-
"entityType": "dashboard",
1964-
"entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)",
1965-
"changeType": "UPSERT",
1966-
"aspectName": "dashboardInfo",
1967-
"aspect": {
1968-
"json": {
1969-
"customProperties": {},
1970-
"title": "SalesMarketing",
1971-
"description": "Acryl sales marketing report",
1972-
"charts": [],
1973-
"datasets": [],
1974-
"lastModified": {
1975-
"created": {
1976-
"time": 0,
1977-
"actor": "urn:li:corpuser:unknown"
1978-
},
1979-
"lastModified": {
1980-
"time": 0,
1981-
"actor": "urn:li:corpuser:unknown"
1982-
}
1983-
},
1984-
"dashboardUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715"
1985-
}
1986-
},
1987-
"systemMetadata": {
1988-
"lastObserved": 1643871600000,
1989-
"runId": "powerbi-test",
1990-
"lastRunId": "no-run-id-provided"
1991-
}
1992-
},
19931954
{
19941955
"entityType": "dashboard",
19951956
"entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)",

metadata-ingestion/tests/integration/powerbi/golden_test_cll.json

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,34 +1233,26 @@
12331233
{
12341234
"entityType": "dashboard",
12351235
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
1236-
"changeType": "UPSERT",
1236+
"changeType": "PATCH",
12371237
"aspectName": "dashboardInfo",
12381238
"aspect": {
1239-
"json": {
1240-
"customProperties": {
1241-
"chartCount": "2",
1242-
"workspaceName": "demo-workspace",
1243-
"workspaceId": "64ED5CAD-7C10-4684-8180-826122881108"
1239+
"json": [
1240+
{
1241+
"op": "add",
1242+
"path": "/customProperties/chartCount",
1243+
"value": "2"
12441244
},
1245-
"title": "test_dashboard",
1246-
"description": "Description of test dashboard",
1247-
"charts": [
1248-
"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
1249-
"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
1250-
],
1251-
"datasets": [],
1252-
"lastModified": {
1253-
"created": {
1254-
"time": 0,
1255-
"actor": "urn:li:corpuser:unknown"
1256-
},
1257-
"lastModified": {
1258-
"time": 0,
1259-
"actor": "urn:li:corpuser:unknown"
1260-
}
1245+
{
1246+
"op": "add",
1247+
"path": "/customProperties/workspaceName",
1248+
"value": "demo-workspace"
12611249
},
1262-
"dashboardUrl": "https://localhost/dashboards/web/1"
1263-
}
1250+
{
1251+
"op": "add",
1252+
"path": "/customProperties/workspaceId",
1253+
"value": "64ED5CAD-7C10-4684-8180-826122881108"
1254+
}
1255+
]
12641256
},
12651257
"systemMetadata": {
12661258
"lastObserved": 1643871600000,

0 commit comments

Comments
 (0)