Skip to content

Commit bf5f6f5

Browse files
zheyu001yoonhyejin
authored andcommitted
feat(ingest/presto-on-hive): enable partition key for presto-on-hive (#8380)
1 parent 8498991 commit bf5f6f5

File tree

12 files changed

+51
-0
lines changed

12 files changed

+51
-0
lines changed

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input, @
3434
result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn));
3535
}
3636
result.setIsPartOfKey(input.isIsPartOfKey());
37+
result.setIsPartitioningKey(input.isIsPartitioningKey());
3738
return result;
3839
}
3940

datahub-graphql-core/src/main/resources/entity.graphql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2872,6 +2872,11 @@ type SchemaField {
28722872
Whether the field is part of a key schema
28732873
"""
28742874
isPartOfKey: Boolean
2875+
2876+
"""
2877+
Whether the field is part of a partitioning key schema
2878+
"""
2879+
isPartitioningKey: Boolean
28752880
}
28762881

28772882
"""

datahub-web-react/src/Mocks.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ export const dataset3WithSchema = {
549549
type: SchemaFieldDataType.String,
550550
nativeDataType: 'varchar(100)',
551551
isPartOfKey: false,
552+
isPartitioningKey: false,
552553
jsonPath: null,
553554
globalTags: null,
554555
glossaryTerms: null,
@@ -563,6 +564,7 @@ export const dataset3WithSchema = {
563564
type: SchemaFieldDataType.String,
564565
nativeDataType: 'boolean',
565566
isPartOfKey: false,
567+
isPartitioningKey: false,
566568
jsonPath: null,
567569
globalTags: null,
568570
glossaryTerms: null,

datahub-web-react/src/app/entity/dataset/profile/schema/utils/schemaTitleRenderer.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { ExtendedSchemaFields } from './types';
77
import TypeLabel from '../../../../shared/tabs/Dataset/Schema/components/TypeLabel';
88
import { ForeignKeyConstraint, SchemaMetadata } from '../../../../../../types.generated';
99
import PrimaryKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PrimaryKeyLabel';
10+
import PartitioningKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PartitioningKeyLabel';
1011
import NullableLabel from '../../../../shared/tabs/Dataset/Schema/components/NullableLabel';
1112
import ForeignKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/ForeignKeyLabel';
1213

@@ -62,6 +63,7 @@ export default function useSchemaTitleRenderer(
6263
</FieldPathText>
6364
<TypeLabel type={record.type} nativeDataType={record.nativeDataType} />
6465
{(schemaMetadata?.primaryKeys?.includes(fieldPath) || record.isPartOfKey) && <PrimaryKeyLabel />}
66+
{record.isPartitioningKey && <PartitioningKeyLabel />}
6567
{record.nullable && <NullableLabel />}
6668
{schemaMetadata?.foreignKeys
6769
?.filter(
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import React from 'react';
2+
import { Badge } from 'antd';
3+
import styled from 'styled-components';
4+
import { blue } from '@ant-design/colors';
5+
import { ANTD_GRAY } from '../../../../constants';
6+
7+
const PartitioningKeyBadge = styled(Badge)`
8+
margin-left: 4px;
9+
&&& .ant-badge-count {
10+
background-color: ${ANTD_GRAY[1]};
11+
color: ${blue[5]};
12+
border: 1px solid ${blue[2]};
13+
font-size: 12px;
14+
font-weight: 400;
15+
height: 22px;
16+
}
17+
`;
18+
19+
export default function PartitioningKeyLabel() {
20+
return <PartitioningKeyBadge count="Partition Key" />;
21+
}

datahub-web-react/src/graphql/fragments.graphql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,7 @@ fragment schemaFieldFields on SchemaField {
678678
nativeDataType
679679
recursive
680680
isPartOfKey
681+
isPartitioningKey
681682
globalTags {
682683
...globalTagsFields
683684
}

datahub-web-react/src/graphql/versionedDataset.graphql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ query getVersionedDataset($urn: String!, $versionStamp: String) {
1010
nativeDataType
1111
recursive
1212
isPartOfKey
13+
isPartitioningKey
1314
}
1415
lastObserved
1516
}

metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,8 @@ def loop_tables(
524524
# add table schema fields
525525
schema_fields = self.get_schema_fields(dataset_name, columns)
526526

527+
self._set_partition_key(columns, schema_fields)
528+
527529
schema_metadata = get_schema_metadata(
528530
self.report,
529531
dataset_name,
@@ -888,6 +890,18 @@ def get_schema_fields_for_column(
888890
default_nullable=True,
889891
)
890892

893+
def _set_partition_key(self, columns, schema_fields):
894+
if len(columns) > 0:
895+
partition_key_names = set()
896+
for column in columns:
897+
if column["is_partition_col"]:
898+
partition_key_names.add(column["col_name"])
899+
900+
for schema_field in schema_fields:
901+
name = schema_field.fieldPath.split(".")[-1]
902+
if name in partition_key_names:
903+
schema_field.isPartitioningKey = True
904+
891905

892906
class SQLAlchemyClient:
893907
def __init__(self, config: SQLAlchemyConfig):

metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@
12771277
"nativeDataType": "string",
12781278
"recursive": false,
12791279
"isPartOfKey": false,
1280+
"isPartitioningKey": true,
12801281
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
12811282
},
12821283
{

metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@
12771277
"nativeDataType": "string",
12781278
"recursive": false,
12791279
"isPartOfKey": false,
1280+
"isPartitioningKey": true,
12801281
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
12811282
},
12821283
{

0 commit comments

Comments
 (0)