Skip to content

Commit 44d1e63

Browse files
authored
Normalize YAML export formatting for repo-backed namespaces (#1947)
* Make sure that the generated YAML has consistent ordering to prevent spurious changes * Add tests for consistency of node YAML export * Fix
1 parent cf60a2c commit 44d1e63

File tree

2 files changed

+142
-1
lines changed

2 files changed

+142
-1
lines changed

datajunction-server/datajunction_server/internal/namespaces.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from ruamel.yaml.comments import CommentedSeq
1919
from ruamel.yaml.comments import CommentedMap
2020
from ruamel.yaml.comments import Comment
21+
from yamlfix import fix_code
22+
from yamlfix.model import YamlfixConfig
2123

2224
from datajunction_server.database.deployment import Deployment
2325
from datajunction_server.models.deployment import (
@@ -1450,6 +1452,16 @@ def _node_spec_to_yaml_dict(node_spec, include_all_columns=False) -> dict:
14501452
# If columns is explicitly None, remove it
14511453
del data["columns"]
14521454

1455+
# Sort list fields for deterministic output
1456+
if "owners" in data and isinstance(data["owners"], list): # pragma: no branch
1457+
data["owners"] = sorted(data["owners"])
1458+
if "tags" in data and isinstance(data["tags"], list): # pragma: no branch
1459+
data["tags"] = sorted(data["tags"])
1460+
if "columns" in data and isinstance(data["columns"], list):
1461+
for col in data["columns"]:
1462+
if "attributes" in col and isinstance(col["attributes"], list):
1463+
col["attributes"] = sorted(col["attributes"])
1464+
14531465
# Remove empty lists/dicts for cleaner YAML
14541466
data = {k: v for k, v in data.items() if v or v == 0 or v is False}
14551467

@@ -1529,7 +1541,13 @@ def node_spec_to_yaml(node_spec, existing_yaml: str | None = None) -> str:
15291541
# Export to YAML
15301542
stream = StringIO()
15311543
yaml_handler.dump(yaml_dict, stream)
1532-
return stream.getvalue()
1544+
raw = stream.getvalue()
1545+
1546+
# Apply yamlfix for consistent formatting (matches repo pre-commit hooks)
1547+
yamlfix_config = YamlfixConfig()
1548+
yamlfix_config.explicit_start = False
1549+
yamlfix_config.line_length = 120
1550+
return fix_code(raw, yamlfix_config)
15331551

15341552

15351553
# Git configuration validation helpers

datajunction-server/tests/internal/namespaces_test.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
from datajunction_server.internal.namespaces import (
99
_merge_list_with_key,
1010
_merge_yaml_preserving_comments,
11+
node_spec_to_yaml,
1112
)
13+
from datajunction_server.models.deployment import MetricSpec, TransformSpec
14+
from datajunction_server.models.node_type import NodeType
1215

1316

1417
class TestMergeListWithKey:
@@ -301,3 +304,123 @@ def test_merge_yaml_cube_removes_deleted_metrics(self):
301304
# Should only include items that are in new data, in original order
302305
assert result["metrics"] == ["metric_a", "metric_c"]
303306
assert result["dimensions"] == ["dim_x"]
307+
308+
309+
class TestNodeSpecToYaml:
310+
"""Tests for node_spec_to_yaml formatting and determinism"""
311+
312+
def test_owners_are_sorted(self):
313+
"""owners list is sorted alphabetically regardless of input order"""
314+
spec = MetricSpec(
315+
name="ns.metrics.revenue",
316+
node_type=NodeType.METRIC,
317+
owners=["zara@netflix.com", "alice@netflix.com", "bob@netflix.com"],
318+
query="SELECT SUM(rev) FROM ns.transforms.t",
319+
)
320+
assert node_spec_to_yaml(spec).splitlines() == [
321+
"name: ns.metrics.revenue",
322+
"node_type: metric",
323+
"owners: [alice@netflix.com, bob@netflix.com, zara@netflix.com]",
324+
"mode: published",
325+
"query: SELECT SUM(rev) FROM ns.transforms.t",
326+
]
327+
328+
def test_tags_are_sorted(self):
329+
"""tags list is sorted alphabetically regardless of input order"""
330+
spec = MetricSpec(
331+
name="ns.metrics.revenue",
332+
node_type=NodeType.METRIC,
333+
tags=["ratio_metric", "core", "finance"],
334+
query="SELECT SUM(rev) FROM ns.transforms.t",
335+
)
336+
assert node_spec_to_yaml(spec).splitlines() == [
337+
"name: ns.metrics.revenue",
338+
"node_type: metric",
339+
"tags: [core, finance, ratio_metric]",
340+
"mode: published",
341+
"query: SELECT SUM(rev) FROM ns.transforms.t",
342+
]
343+
344+
def test_column_attributes_are_sorted(self):
345+
"""column attributes are sorted alphabetically regardless of input order"""
346+
spec = TransformSpec(
347+
name="ns.transforms.t",
348+
node_type=NodeType.TRANSFORM,
349+
query="SELECT id FROM ns.source.s",
350+
columns=[
351+
{
352+
"name": "id",
353+
"display_name": "ID",
354+
"attributes": ["dimension", "primary_key"],
355+
},
356+
],
357+
)
358+
assert node_spec_to_yaml(spec).splitlines() == [
359+
"name: ns.transforms.t",
360+
"node_type: transform",
361+
"mode: published",
362+
"columns:",
363+
" - name: id",
364+
" display_name: ID",
365+
" attributes: [dimension, primary_key]",
366+
"query: SELECT id FROM ns.source.s",
367+
]
368+
369+
def test_output_is_deterministic(self):
370+
"""calling node_spec_to_yaml twice with the same spec gives identical output"""
371+
spec = MetricSpec(
372+
name="ns.metrics.revenue",
373+
node_type=NodeType.METRIC,
374+
owners=["zara@netflix.com", "alice@netflix.com"],
375+
tags=["ratio_metric", "core"],
376+
query="SELECT SUM(rev) FROM ns.transforms.t",
377+
)
378+
assert node_spec_to_yaml(spec) == node_spec_to_yaml(spec)
379+
380+
def test_no_yaml_document_start_marker(self):
381+
"""output does not start with --- (explicit_start=False)"""
382+
spec = MetricSpec(
383+
name="ns.metrics.revenue",
384+
node_type=NodeType.METRIC,
385+
query="SELECT SUM(rev) FROM ns.transforms.t",
386+
)
387+
assert node_spec_to_yaml(spec).splitlines() == [
388+
"name: ns.metrics.revenue",
389+
"node_type: metric",
390+
"mode: published",
391+
"query: SELECT SUM(rev) FROM ns.transforms.t",
392+
]
393+
394+
def test_multiline_query_uses_literal_block_style(self):
395+
"""multiline queries are serialized with |- literal block style"""
396+
spec = MetricSpec(
397+
name="ns.metrics.revenue",
398+
node_type=NodeType.METRIC,
399+
query="SELECT SUM(rev)\nFROM ns.transforms.t",
400+
)
401+
assert node_spec_to_yaml(spec).splitlines() == [
402+
"name: ns.metrics.revenue",
403+
"node_type: metric",
404+
"mode: published",
405+
"query: |-",
406+
" SELECT SUM(rev)",
407+
" FROM ns.transforms.t",
408+
]
409+
410+
def test_short_lists_use_inline_style(self):
411+
"""short lists (owners, tags) use inline [a, b] style after yamlfix"""
412+
spec = MetricSpec(
413+
name="ns.metrics.revenue",
414+
node_type=NodeType.METRIC,
415+
owners=["alice@netflix.com"],
416+
tags=["core"],
417+
query="SELECT SUM(rev) FROM ns.transforms.t",
418+
)
419+
assert node_spec_to_yaml(spec).splitlines() == [
420+
"name: ns.metrics.revenue",
421+
"node_type: metric",
422+
"owners: [alice@netflix.com]",
423+
"tags: [core]",
424+
"mode: published",
425+
"query: SELECT SUM(rev) FROM ns.transforms.t",
426+
]

0 commit comments

Comments
 (0)