3939 SchemaFieldDataTypeClass ,
4040 SchemaMetadataClass ,
4141 StringTypeClass ,
42+ StructuredPropertiesClass ,
43+ StructuredPropertyDefinitionClass ,
44+ StructuredPropertySettingsClass ,
45+ StructuredPropertyValueAssignmentClass ,
4246 SubTypesClass ,
4347 TagAssociationClass ,
4448)
49+ from datahub .metadata .urns import StructuredPropertyUrn , Urn
4550
4651logger = logging .getLogger (__name__ )
4752
@@ -888,6 +893,54 @@ def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str:
888893 raise Exception (f"Failed to find information for entity: { entity_name } " )
889894
890895
896+ def create_search_field_name_property () -> List [MetadataChangeProposalWrapper ]:
897+ """
898+ Create the structured property for documenting search field names.
899+
900+ This property is used to capture the actual field name used in the search index
901+ when it differs from the field name in the schema (e.g., 'instance' field is
902+ indexed as 'platformInstance').
903+
904+ Returns:
905+ List of MCPs for the property definition and settings
906+ """
907+ property_id = "com.datahub.metadata.searchFieldName"
908+ property_urn = str (
909+ StructuredPropertyUrn .from_string (f"urn:li:structuredProperty:{ property_id } " )
910+ )
911+
912+ # Create property definition
913+ definition_mcp = MetadataChangeProposalWrapper (
914+ entityUrn = property_urn ,
915+ aspect = StructuredPropertyDefinitionClass (
916+ qualifiedName = property_id ,
917+ displayName = "Search Field Name" ,
918+ valueType = Urn .make_data_type_urn ("string" ),
919+ description = (
920+ "The field name used in the search index when it differs from the schema field name. "
921+ "Use this field name when constructing search queries for this field."
922+ ),
923+ entityTypes = [Urn .make_entity_type_urn ("schemaField" )],
924+ cardinality = "SINGLE" ,
925+ immutable = False ,
926+ ),
927+ )
928+
929+ # Create property settings for display
930+ settings_mcp = MetadataChangeProposalWrapper (
931+ entityUrn = property_urn ,
932+ aspect = StructuredPropertySettingsClass (
933+ isHidden = False ,
934+ showInSearchFilters = False ,
935+ showInAssetSummary = True ,
936+ showAsAssetBadge = False ,
937+ showInColumnsTable = True , # Show as a column in schema tables
938+ ),
939+ )
940+
941+ return [definition_mcp , settings_mcp ]
942+
943+
891944def generate_stitched_record (
892945 relnships_graph : RelationshipGraph ,
893946) -> Iterable [MetadataChangeProposalWrapper ]:
@@ -897,6 +950,11 @@ def strip_types(field_path: str) -> str:
897950 final_path = re .sub (r"^\[version=2.0\]\." , "" , final_path )
898951 return final_path
899952
953+ # Track schema fields that need structured properties
954+ schema_field_properties : Dict [
955+ str , str
956+ ] = {} # schema_field_urn -> search_field_name
957+
900958 for entity_name , entity_def in entity_registry .items ():
901959 entity_display_name = entity_def .display_name
902960 entity_fields = []
@@ -981,6 +1039,28 @@ def strip_types(field_path: str) -> str:
9811039 f_field .globalTags .tags .append (
9821040 TagAssociationClass (tag = "urn:li:tag:Searchable" )
9831041 )
1042+
1043+ # Check if search field name differs from actual field name
1044+ searchable_config = json_dict ["Searchable" ]
1045+ if (
1046+ isinstance (searchable_config , dict )
1047+ and "fieldName" in searchable_config
1048+ ):
1049+ search_field_name = searchable_config ["fieldName" ]
1050+ # Extract the actual field name from the field path
1051+ # Field path format: "[version=2.0].[type=...].<fieldName>"
1052+ actual_field_name = strip_types (f_field .fieldPath ).split (
1053+ "."
1054+ )[- 1 ]
1055+
1056+ if search_field_name != actual_field_name :
1057+ # Track this for later - we'll emit a separate MCP for the schema field entity
1058+ schema_field_urn = make_schema_field_urn (
1059+ source_dataset_urn , f_field .fieldPath
1060+ )
1061+ schema_field_properties [schema_field_urn ] = (
1062+ search_field_name
1063+ )
9841064 if "Relationship" in json_dict :
9851065 relationship_info = json_dict ["Relationship" ]
9861066 # detect if we have relationship specified at leaf level or thru path specs
@@ -1064,6 +1144,21 @@ def strip_types(field_path: str) -> str:
10641144 ],
10651145 )
10661146
1147+ # Emit structured properties for schema fields
1148+ property_urn = "urn:li:structuredProperty:com.datahub.metadata.searchFieldName"
1149+ for schema_field_urn , search_field_name in schema_field_properties .items ():
1150+ yield MetadataChangeProposalWrapper (
1151+ entityUrn = schema_field_urn ,
1152+ aspect = StructuredPropertiesClass (
1153+ properties = [
1154+ StructuredPropertyValueAssignmentClass (
1155+ propertyUrn = property_urn ,
1156+ values = [search_field_name ],
1157+ )
1158+ ]
1159+ ),
1160+ )
1161+
10671162
10681163@dataclass
10691164class EntityAspectName :
@@ -1256,8 +1351,15 @@ def generate( # noqa: C901
12561351 logger .error (f"Failed to generate lineage JSON: { e } " )
12571352 raise
12581353
1354+ # Create structured property for search field names first
1355+ logger .info ("Creating structured property for search field names" )
1356+ structured_property_mcps = create_search_field_name_property ()
1357+
12591358 relationship_graph = RelationshipGraph ()
1260- mcps = list (generate_stitched_record (relationship_graph ))
1359+ entity_mcps = list (generate_stitched_record (relationship_graph ))
1360+
1361+ # Combine MCPs with structured property first
1362+ mcps = structured_property_mcps + entity_mcps
12611363
12621364 shutil .rmtree (f"{ generated_docs_dir } /entities" , ignore_errors = True )
12631365 entity_names = [(x , entity_registry [x ]) for x in generated_documentation ]
0 commit comments