Skip to content

Commit bbecbe4

Browse files
authored
feat: apply_knowledge supports non-dataset entities (#180) (#187)
Upgrade mcp-datahub v1.1.1 → v1.2.0 for entity-type-aware UpdateDescription. Add pre-flight validation so column descriptions and curated queries are rejected for non-dataset URNs before any writes occur. Error messages list the supported operations for the target entity type.
1 parent ac43a42 commit bbecbe4

File tree

7 files changed

+621
-7
lines changed

7 files changed

+621
-7
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ require (
1616
github.com/swaggo/swag v1.16.6
1717
github.com/testcontainers/testcontainers-go v0.40.0
1818
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0
19-
github.com/txn2/mcp-datahub v1.1.1
19+
github.com/txn2/mcp-datahub v1.2.0
2020
github.com/txn2/mcp-s3 v1.0.0
2121
github.com/txn2/mcp-trino v1.1.0
2222
github.com/yosida95/uritemplate/v3 v3.0.2

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,8 @@ github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+F
274274
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
275275
github.com/trinodb/trino-go-client v0.333.0 h1:+bsW8/uLFNF00MEL9JZJym94LlUnle25VgDlWGPEZos=
276276
github.com/trinodb/trino-go-client v0.333.0/go.mod h1:91okdYtRUZoj3XJu/tqdzu11sNliQuN4A+vMFEB8GVE=
277-
github.com/txn2/mcp-datahub v1.1.1 h1:dZcBC9buSV7XKCCHmIamf0SLl7pIilVZLo0HsgUIfIU=
278-
github.com/txn2/mcp-datahub v1.1.1/go.mod h1:4RMSmUYrcoGwlmmJBLuiGPbz9kqT5dqZ65eKaMhqDX0=
277+
github.com/txn2/mcp-datahub v1.2.0 h1:7ua4DUvCzt4EIY9fDOrIMvufqMeGtsjbvb9/xIbRHwc=
278+
github.com/txn2/mcp-datahub v1.2.0/go.mod h1:uktl1c12qQwInw0XIS03jxYusLOFj8h5UO3+YBThzTI=
279279
github.com/txn2/mcp-s3 v1.0.0 h1:0772X3H7bAJPqDtuvDNlZTGEK2m1egInfuqQL/Jlq8Y=
280280
github.com/txn2/mcp-s3 v1.0.0/go.mod h1:hQc0xBl0t/afEgFmrOSKH3OW9uyKdeliFknQwfAzqG0=
281281
github.com/txn2/mcp-trino v1.1.0 h1:5/cSIIzciTT/cV1p8enM+4k4SI+0OcK5uFwcQhOBWhk=
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package knowledge
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"strings"
7+
8+
dhclient "github.com/txn2/mcp-datahub/pkg/client"
9+
)
10+
11+
// entityTypeDataset is the DataHub entity type string for datasets.
12+
const entityTypeDataset = "dataset"
13+
14+
// entityTypeFromURN extracts the entity type from a DataHub URN.
15+
// For example, "urn:li:dataset:(...)" returns "dataset".
16+
func entityTypeFromURN(urn string) (string, error) {
17+
parsed, err := dhclient.ParseURN(urn)
18+
if err != nil {
19+
return "", fmt.Errorf("invalid URN %q: %w", urn, err)
20+
}
21+
return parsed.EntityType, nil
22+
}
23+
24+
// datasetOnlyOperations are change types that only work on dataset entities.
25+
var datasetOnlyOperations = map[actionType]bool{
26+
actionAddCuratedQuery: true,
27+
}
28+
29+
// supportedOpsForType returns the list of supported operations for a given entity type.
30+
// All entity types support tag, glossary term, documentation, and quality issue operations.
31+
// Only datasets support column descriptions and curated queries.
32+
// update_description is supported for the 10 entity types handled by mcp-datahub.
33+
func supportedOpsForType(entityType string) []string {
34+
ops := []string{
35+
"add_tag", "remove_tag", "add_glossary_term",
36+
"add_documentation", "flag_quality_issue",
37+
}
38+
39+
if descriptionSupportedTypes[entityType] {
40+
ops = append([]string{"update_description"}, ops...)
41+
}
42+
43+
if entityType == entityTypeDataset {
44+
ops = append(ops, "add_curated_query")
45+
}
46+
47+
return ops
48+
}
49+
50+
// descriptionSupportedTypes are entity types that support update_description.
51+
// This matches the upstream mcp-datahub descriptionAspectMap.
52+
var descriptionSupportedTypes = map[string]bool{
53+
"dataset": true,
54+
"dashboard": true,
55+
"chart": true,
56+
"dataFlow": true,
57+
"dataJob": true,
58+
"container": true,
59+
"dataProduct": true,
60+
"domain": true,
61+
"glossaryTerm": true,
62+
"glossaryNode": true,
63+
}
64+
65+
// validateEntityTypeForChange checks whether a change type is supported for the
66+
// given entity URN. Returns a user-friendly error message when incompatible.
67+
func validateEntityTypeForChange(urn string, c ApplyChange) error {
68+
entityType, err := entityTypeFromURN(urn)
69+
if err != nil {
70+
return err
71+
}
72+
73+
// Column-level descriptions are dataset-only (schema metadata is a dataset concept).
74+
if c.ChangeType == string(actionUpdateDescription) {
75+
if _, isColumn := parseColumnTarget(c.Target); isColumn {
76+
if entityType != "dataset" {
77+
return fmt.Errorf(
78+
"column-level update_description is only supported for datasets, not %s entities. "+
79+
"Supported operations for %s: %s",
80+
entityType, entityType, strings.Join(supportedOpsForType(entityType), ", "),
81+
)
82+
}
83+
return nil
84+
}
85+
}
86+
87+
// Dataset-only operations.
88+
if datasetOnlyOperations[actionType(c.ChangeType)] && entityType != "dataset" {
89+
return fmt.Errorf(
90+
"%s is only supported for datasets, not %s entities. "+
91+
"Supported operations for %s: %s",
92+
c.ChangeType, entityType, entityType, strings.Join(supportedOpsForType(entityType), ", "),
93+
)
94+
}
95+
96+
return nil
97+
}
98+
99+
// wrapUnsupportedEntityTypeError checks if an error is an ErrUnsupportedEntityType
100+
// from the upstream mcp-datahub library and wraps it with a user-friendly message.
101+
func wrapUnsupportedEntityTypeError(err error, urn string) error {
102+
if err == nil {
103+
return nil
104+
}
105+
106+
if !errors.Is(err, dhclient.ErrUnsupportedEntityType) {
107+
return err
108+
}
109+
110+
entityType, parseErr := entityTypeFromURN(urn)
111+
if parseErr != nil {
112+
return err // Fall back to original error if URN parsing fails
113+
}
114+
115+
return fmt.Errorf(
116+
"update_description is not supported for %s entities. "+
117+
"Supported operations for %s: %s",
118+
entityType, entityType, strings.Join(supportedOpsForType(entityType), ", "),
119+
)
120+
}
121+
122+
// wrapDescriptionError converts ErrUnsupportedEntityType into a user-friendly message,
123+
// and falls back to a generic "description update" wrapper for all other errors.
124+
func wrapDescriptionError(err error, urn string) error {
125+
if errors.Is(err, dhclient.ErrUnsupportedEntityType) {
126+
return wrapUnsupportedEntityTypeError(err, urn)
127+
}
128+
return fmt.Errorf("description update: %w", err)
129+
}

0 commit comments

Comments
 (0)