Skip to content

Commit 2fc9e32

Browse files
authored
Clean up (#399)
* feat: construct trees with nested_dict_key_to_tree * refactor: test clean up * feat: export trees with tree_to_nested_dict_key * feat: add docs * docs: update CHANGELOG * docs: enhance docstring * refactor: abstract out export tree logic * docs: update CHANGELOG * refactor: shift to common file * refactor: shift to common file * refactor: shift to common file * docs: misc fix * refactor: clean up * refactor: clean up + error check for dataframe_to_dag * refactor: clean up * refactor: enhance assemble_attributes * refactor: enhance assemble_attributes * test: fix test coverage
1 parent 7e27412 commit 2fc9e32

File tree

10 files changed

+129
-125
lines changed

10 files changed

+129
-125
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88
### Changed
9-
- Misc: Some code refactoring
9+
- Misc: Some code refactoring, enhance assemble_attributes.
10+
### Fixed
11+
- Test: Test for `tree_to_nested_dict_key` for BinaryNode.
12+
- Error: Check and throw error for `dataframe_to_dag`, previously this error will not have been found out.
1013

1114
## [0.30.0] - 2025-09-05
1215
### Added:

bigtree/dag/construct.py

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,14 @@ def list_to_dag(
4040
assertions.assert_length_not_empty(relations, "Input list", "relations")
4141

4242
node_dict: Dict[str, T] = dict()
43-
parent_node: T = dagnode.DAGNode() # type: ignore[assignment]
43+
child_name: str = ""
4444

4545
for parent_name, child_name in relations:
46-
if parent_name not in node_dict:
47-
parent_node = node_type(parent_name)
48-
node_dict[parent_name] = parent_node
49-
else:
50-
parent_node = node_dict[parent_name]
51-
if child_name not in node_dict:
52-
child_node = node_type(child_name)
53-
node_dict[child_name] = child_node
54-
else:
55-
child_node = node_dict[child_name]
46+
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
47+
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
48+
node_dict[child_name].parents = [node_dict[parent_name]]
5649

57-
child_node.parents = [parent_node]
58-
59-
return parent_node
50+
return node_dict[child_name]
6051

6152

6253
def dict_to_dag(
@@ -92,35 +83,27 @@ def dict_to_dag(
9283
assertions.assert_length_not_empty(relation_attrs, "Dictionary", "relation_attrs")
9384

9485
node_dict: Dict[str, T] = dict()
95-
parent_node: T | None = None
86+
_parent_name: Optional[str] = None
9687

9788
for child_name, node_attrs in relation_attrs.items():
9889
node_attrs = node_attrs.copy()
99-
parent_names: List[str] = []
100-
if parent_key in node_attrs:
101-
parent_names = node_attrs.pop(parent_key)
90+
parent_names = node_attrs.pop(parent_key, [])
10291
assertions.assert_not_reserved_keywords(
10392
node_attrs, ["parent", "parents", "children"]
10493
)
10594

106-
if child_name in node_dict:
107-
child_node = node_dict[child_name]
108-
child_node.set_attrs(node_attrs)
109-
else:
110-
child_node = node_type(child_name, **node_attrs)
111-
node_dict[child_name] = child_node
95+
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
96+
node_dict[child_name].set_attrs(node_attrs)
11297

11398
for parent_name in parent_names:
114-
parent_node = node_dict.get(parent_name, node_type(parent_name))
115-
node_dict[parent_name] = parent_node
116-
child_node.parents = [parent_node]
99+
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
100+
node_dict[child_name].parents = [node_dict[parent_name]]
101+
_parent_name = parent_name
117102

118-
if parent_node is None:
119-
raise ValueError(
120-
f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`"
121-
)
103+
if _parent_name is None:
104+
raise ValueError("No parent specified, check `relation_attrs` and `parent_key`")
122105

123-
return parent_node
106+
return node_dict[_parent_name]
124107

125108

126109
@exceptions.optional_dependencies_pandas
@@ -196,21 +179,23 @@ def dataframe_to_dag(
196179
raise ValueError(f"Child name cannot be empty, check column: {child_col}")
197180

198181
node_dict: Dict[str, T] = dict()
199-
parent_node: T = dagnode.DAGNode() # type: ignore[assignment]
182+
_parent_name: Optional[str] = None
200183

201184
for row in data.reset_index(drop=True).to_dict(orient="index").values():
202185
child_name = row[child_col]
203186
parent_name = row[parent_col]
204187
node_attrs = common.filter_attributes(
205188
row, omit_keys=["name", child_col, parent_col], omit_null_values=True
206189
)
207-
child_node = node_dict.get(child_name, node_type(child_name, **node_attrs))
208-
child_node.set_attrs(node_attrs)
209-
node_dict[child_name] = child_node
190+
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
191+
node_dict[child_name].set_attrs(node_attrs)
210192

211193
if not common.isnull(parent_name):
212-
parent_node = node_dict.get(parent_name, node_type(parent_name))
213-
node_dict[parent_name] = parent_node
214-
child_node.parents = [parent_node]
194+
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
195+
node_dict[child_name].parents = [node_dict[parent_name]]
196+
_parent_name = parent_name
197+
198+
if _parent_name is None:
199+
raise ValueError("No parent specified, check `data` and `parent_col`")
215200

216-
return parent_node
201+
return node_dict[_parent_name]

bigtree/dag/export.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,17 @@ def dag_to_dict(
8585

8686
for parent_node, child_node in iterators.dag_iterator(dag):
8787
if parent_node.is_root:
88-
data_parent: Dict[str, Any] = {}
89-
data_parent = common.assemble_attributes(
90-
parent_node, attr_dict, all_attrs, data_parent
91-
)
88+
data_parent = common.assemble_attributes(parent_node, attr_dict, all_attrs)
9289
data_dict[parent_node.node_name] = data_parent
9390

9491
if data_dict.get(child_node.node_name):
9592
data_dict[child_node.node_name][parent_key].append(parent_node.node_name)
9693
else:
97-
data_child = {parent_key: [parent_node.node_name]}
9894
data_child = common.assemble_attributes(
99-
child_node, attr_dict, all_attrs, data_child
95+
child_node,
96+
attr_dict,
97+
all_attrs,
98+
parent_col=(parent_key, [parent_node.node_name]),
10099
)
101100
data_dict[child_node.node_name] = data_child
102101
return data_dict
@@ -144,15 +143,21 @@ def dag_to_dataframe(
144143

145144
for parent_node, child_node in iterators.dag_iterator(dag):
146145
if parent_node.is_root:
147-
data_parent = {name_col: parent_node.node_name, parent_col: None}
148146
data_parent = common.assemble_attributes(
149-
parent_node, attr_dict, all_attrs, data_parent
147+
parent_node,
148+
attr_dict,
149+
all_attrs,
150+
name_col=name_col,
151+
parent_col=(parent_col, None),
150152
)
151153
data_list.append(data_parent)
152154

153-
data_child = {name_col: child_node.node_name, parent_col: parent_node.node_name}
154155
data_child = common.assemble_attributes(
155-
child_node, attr_dict, all_attrs, data_child
156+
child_node,
157+
attr_dict,
158+
all_attrs,
159+
name_col=name_col,
160+
parent_col=(parent_col, parent_node.node_name),
156161
)
157162
data_list.append(data_child)
158163
return pd.DataFrame(data_list).drop_duplicates().reset_index(drop=True)

bigtree/tree/construct/dataframes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,8 @@ def _recursive_add_child(parent_node: T) -> None:
576576
"""
577577
child_rows = data[data[parent_col] == parent_node.node_name]
578578

579-
for row in child_rows.to_dict(orient="index").values():
580-
child_node = node_type(**_retrieve_attr(row))
579+
for _row in child_rows.to_dict(orient="index").values():
580+
child_node = node_type(**_retrieve_attr(_row))
581581
child_node.parent = parent_node
582582
_recursive_add_child(child_node)
583583

bigtree/tree/export/dataframes.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, Dict, Optional, TypeVar
3+
from typing import Dict, Optional, TypeVar
44

55
from bigtree.node import node
66
from bigtree.utils import common, exceptions
@@ -95,19 +95,13 @@ def _recursive_append(_node: T) -> None:
9595
and (not skip_depth or _node.depth > skip_depth)
9696
and (not leaf_only or _node.is_leaf)
9797
):
98-
data_child: Dict[str, Any] = {}
99-
if path_col:
100-
data_child[path_col] = _node.path_name
101-
if name_col:
102-
data_child[name_col] = _node.node_name
103-
if parent_col:
104-
parent_name = None
105-
if _node.parent:
106-
parent_name = _node.parent.node_name
107-
data_child[parent_col] = parent_name
108-
10998
data_child = common.assemble_attributes(
110-
_node, attr_dict, all_attrs, data_child
99+
_node,
100+
attr_dict,
101+
all_attrs,
102+
path_col=path_col,
103+
name_col=name_col,
104+
parent_col=parent_col,
111105
)
112106
data_list.append(data_child)
113107
for _child in _node.children:
@@ -196,19 +190,13 @@ def _recursive_append(_node: T) -> None:
196190
and (not skip_depth or _node.depth > skip_depth)
197191
and (not leaf_only or _node.is_leaf)
198192
):
199-
data_child: Dict[str, Any] = {}
200-
if path_col:
201-
data_child[path_col] = _node.path_name
202-
if name_col:
203-
data_child[name_col] = _node.node_name
204-
if parent_col:
205-
parent_name = None
206-
if _node.parent:
207-
parent_name = _node.parent.node_name
208-
data_child[parent_col] = parent_name
209-
210193
data_child = common.assemble_attributes(
211-
_node, attr_dict, all_attrs, data_child
194+
_node,
195+
attr_dict,
196+
all_attrs,
197+
path_col=path_col,
198+
name_col=name_col,
199+
parent_col=parent_col,
212200
)
213201
data_list.append(data_child)
214202
for _child in _node.children:

bigtree/tree/export/dictionaries.py

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,6 @@
1414
T = TypeVar("T", bound=node.Node)
1515

1616

17-
def _assemble_attributes(
18-
_node: T,
19-
attr_dict: Optional[Dict[str, str]],
20-
all_attrs: bool,
21-
data_child: Dict[str, Any] = None,
22-
) -> Dict[str, Any]:
23-
"""Assemble attributes of node into a dictionary.
24-
25-
Args:
26-
_node: node
27-
attr_dict: node attributes mapped to dictionary key, key: node attributes, value: corresponding dictionary key
28-
all_attrs: indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`
29-
data_child: existing attributes, if any
30-
31-
Returns:
32-
node attributes
33-
"""
34-
data_child = data_child or {}
35-
if all_attrs:
36-
data_child.update(
37-
dict(_node.describe(exclude_attributes=["name"], exclude_prefix="_"))
38-
)
39-
elif attr_dict:
40-
for k, v in attr_dict.items():
41-
data_child[v] = _node.get_attr(k)
42-
return data_child
43-
44-
4517
def tree_to_dict(
4618
tree: T,
4719
name_key: Optional[str] = "name",
@@ -100,16 +72,12 @@ def _recursive_append(_node: T) -> None:
10072
and (not skip_depth or _node.depth > skip_depth)
10173
and (not leaf_only or _node.is_leaf)
10274
):
103-
data_child: Dict[str, Any] = {}
104-
if name_key:
105-
data_child[name_key] = _node.node_name
106-
if parent_key:
107-
parent_name = None
108-
if _node.parent:
109-
parent_name = _node.parent.node_name
110-
data_child[parent_key] = parent_name
11175
data_child = common.assemble_attributes(
112-
_node, attr_dict, all_attrs, data_child
76+
_node,
77+
attr_dict,
78+
all_attrs,
79+
name_col=name_key,
80+
parent_col=parent_key,
11381
)
11482
data_dict[_node.path_name] = data_child
11583
for _child in _node.children:
@@ -165,9 +133,8 @@ def _recursive_append(_node: T, parent_dict: Dict[str, Any]) -> None:
165133
"""
166134
if _node:
167135
if not max_depth or _node.depth <= max_depth:
168-
data_child = {name_key: _node.node_name}
169136
data_child = common.assemble_attributes(
170-
_node, attr_dict, all_attrs, data_child
137+
_node, attr_dict, all_attrs, name_col=name_key
171138
)
172139
if child_key in parent_dict:
173140
parent_dict[child_key].append(data_child)

bigtree/utils/common.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, Collection, Dict, Mapping, Optional, TypeVar, Union
3+
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, TypeVar, Union
44

55
from bigtree.node import dagnode, node
66

@@ -57,25 +57,50 @@ def assemble_attributes(
5757
_node: T,
5858
attr_dict: Optional[Mapping[str, str]],
5959
all_attrs: bool,
60-
existing_data: Dict[str, Any] = None,
60+
path_col: Optional[str] = None,
61+
name_col: Optional[str] = None,
62+
parent_col: Optional[Union[str, Tuple[str, Any]]] = None,
6163
) -> Dict[str, Any]:
6264
"""Assemble attributes of node into a dictionary.
6365
6466
Args:
6567
_node: node
6668
attr_dict: node attributes mapped to dictionary key, key: node attributes, value: corresponding dictionary key
6769
all_attrs: indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`
68-
existing_data: existing attributes, if any
70+
path_col: column name for `_node.path_name`, if present
71+
name_col: column name for `_node.node_name`, if present
72+
parent_col: if Node, column name for `_node.parent.node_name`. If DAGNode, tuple of column name and value for
73+
`_node.parent.node_name`.
6974
7075
Returns:
7176
node attributes
7277
"""
73-
data_attrs = existing_data or {}
78+
data_attrs = {}
79+
80+
# Main attributes
81+
if path_col:
82+
assert isinstance(_node, node.Node)
83+
data_attrs[path_col] = _node.path_name
84+
if name_col:
85+
data_attrs[name_col] = _node.node_name
86+
if parent_col:
87+
if isinstance(_node, node.Node):
88+
assert isinstance(parent_col, str)
89+
parent_name = None
90+
if _node.parent:
91+
parent_name = _node.parent.node_name
92+
data_attrs[parent_col] = parent_name
93+
else:
94+
assert isinstance(parent_col, tuple)
95+
data_attrs[parent_col[0]] = parent_col[1]
96+
97+
# Other attributes
7498
if all_attrs:
7599
data_attrs.update(
76100
dict(_node.describe(exclude_attributes=["name"], exclude_prefix="_"))
77101
)
78102
elif attr_dict:
79103
for k, v in attr_dict.items():
80104
data_attrs[v] = _node.get_attr(k)
105+
81106
return data_attrs

tests/binarytree/test_export.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,26 @@ def test_tree_to_nested_dict(binarytree_node):
9292
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"
9393

9494

95+
class TestTreeToNestedDictKey:
96+
@staticmethod
97+
def test_tree_to_nested_dict_key(binarytree_node):
98+
expected = {
99+
"1": {
100+
"children": {
101+
"2": {
102+
"children": {
103+
"4": {"children": {"8": {}}},
104+
"5": {},
105+
},
106+
},
107+
"3": {"children": {"6": {}, "7": {}}},
108+
},
109+
}
110+
}
111+
actual = export.tree_to_nested_dict_key(binarytree_node)
112+
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"
113+
114+
95115
class TestTreeToDot:
96116
@staticmethod
97117
def test_tree_to_dot(binarytree_node):

0 commit comments

Comments
 (0)