-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathdiffer.py
More file actions
119 lines (99 loc) · 4.79 KB
/
differ.py
File metadata and controls
119 lines (99 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import streamlit as st
from streamlit.runtime.uploaded_file_manager import UploadedFile
import json
import jsondiff
import pandas as pd
from functions.flatten import flatten_keys
from functions import tidy
# Minimal viable imports from dbt-core
from dbt.contracts.graph.manifest import WritableManifest
from dbt.graph.selector_methods import StateSelectorMethod
# To support logging events, mock the 'list' command with default CLI options
from dbt.cli.flags import Flags
from dbt.flags import set_flags
from dbt.cli.types import Command as CliCommand
flags = Flags.from_dict(CliCommand.LIST, {})
set_flags(flags)
class MockPreviousState:
def __init__(self, manifest: WritableManifest) -> None:
self.manifest: Manifest = manifest
st.title("dbt Manifest Differ")
left_col, right_col = st.columns(2)
left_manifest: WritableManifest = None
right_manifest: WritableManifest = None
# Copy-paste from https://github.com/dbt-labs/dbt-core/blob/0ab954e1af9bb2be01fa4ebad2df7626249a1fab/core/dbt/graph/selector_methods.py#L676
state_options = [
"modified",
"new",
"modified.body",
"modified.configs",
"modified.persisted_descriptions",
"modified.relation",
"modified.macros",
"modified.contract"
]
state_method = st.selectbox(label="State comparison method:", options=state_options)
properties_to_ignore = st.multiselect("Properties to ignore when showing node-level diffs:", ['created_at', 'root_path', 'build_path', 'compiled_path', 'deferred', 'schema', 'checksum', 'compiled_code'], default=['created_at', 'checksum'])
skipped_large_seeds = set()
def load_manifest(file: UploadedFile) -> WritableManifest:
data = json.load(file)
data, large_seeds = tidy.remove_large_seeds(data)
skipped_large_seeds.update(large_seeds)
return WritableManifest.upgrade_schema_version(data)
left_file = left_col.file_uploader("First manifest", type='json', help="Pick your left json file")
if left_file is not None:
left_manifest = load_manifest(left_file)
right_file = right_col.file_uploader("Second manifest", type='json', help="Pick your right json file")
if right_file is not None:
right_manifest = load_manifest(right_file)
if left_file and right_file:
# TODO: also calculate diffs for sources, exposures, semantic_models, metrics
included_nodes = set(left_manifest.nodes.keys())
previous_state = MockPreviousState(right_manifest)
state_comparator = StateSelectorMethod(left_manifest, previous_state, "")
if len(skipped_large_seeds) > 0:
st.warning(f"Some large seeds couldn't be compared from the manifest alone: {skipped_large_seeds}" )
state_inclusion_counts = {}
state_inclusion_reasons_by_node = {}
for state_option in state_options:
results = list(state_comparator.search(included_nodes, state_option))
for node in results:
if node in state_inclusion_reasons_by_node:
state_inclusion_reasons_by_node[node].append(state_option)
else:
state_inclusion_reasons_by_node[node] = [state_option]
state_inclusion_counts[state_option] = len((results))
st.bar_chart(state_inclusion_counts)
selected_nodes = list(state_comparator.search(included_nodes, state_method))
if state_comparator.modified_macros:
st.header("Modified macros")
st.write(state_comparator.modified_macros)
if len(selected_nodes) == 0:
st.write("No nodes selected!")
st.header(f"{len(selected_nodes)} Selected node{'s' if len(selected_nodes) != 1 else ''}")
for unique_id in selected_nodes:
left_node = left_manifest.nodes.get(unique_id)
right_node = right_manifest.nodes.get(unique_id)
st.subheader(unique_id)
if left_node and right_node:
left_dict = left_node.to_dict()
right_dict = right_node.to_dict()
diffs = {
k: jsondiff.diff(left_dict[k], right_dict[k], syntax='symmetric', marshal=True)
for k in left_dict if left_dict[k] != right_dict[k] and k not in properties_to_ignore
}
st.write("State methods that pick this node up:")
st.code(state_inclusion_reasons_by_node[unique_id])
if left_node.depends_on.macros and state_comparator.modified_macros:
st.write(f"Depends on macros: {left_node.depends_on.macros}")
st.json(diffs, expanded=False)
flattened_diff = flatten_keys(diffs)
df = pd.DataFrame.from_dict(flattened_diff, orient='index')
st.dataframe(df)
elif not left_node:
st.write(f"Missing from left manifest (brand new node)")
elif not right_node:
st.write(f"Missing from right manifest (deleted node)")
st.divider()
else:
st.warning("Upload two manifests to begin comparison", icon="👯")