Skip to content

Commit 20964b7

Browse files
committed
enh(jsonp) mas-concats when update_paths() ...
- BUG: sorting messes up column-order!
1 parent 3284405 commit 20964b7

File tree

1 file changed

+54
-19
lines changed

1 file changed

+54
-19
lines changed

graphtik/jsonpointer.py

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,29 @@ def set_path_value(
548548
)
549549

550550

551+
def _index_or_delay_concat(
552+
doc: Doc, key: str, value, delayed_concats: Optional[list]
553+
) -> None:
554+
"""
555+
Set Indexed value, or delay :term:`pandas concatenation`, for the recurse parent to do it.
556+
557+
:param delayed_concats:
558+
if given (not ``None``), pandas-concats are enabled and
559+
should add further values into it
560+
(may contain past values to be mass-concatenated by the caller)
561+
"""
562+
if delayed_concats is None or not isinstance(doc, NDFrame):
563+
doc[key] = value
564+
else:
565+
## Delay further concats or Index non-pandas value.
566+
#
567+
if isinstance(value, NDFrame):
568+
delayed_concats.append(value)
569+
else:
570+
assert not delayed_concats, f"Parent left delayed_concats? {locals()}"
571+
doc[key] = value
572+
573+
551574
def _update_paths(
552575
doc: Doc,
553576
paths_vals: Collection[Tuple[List[str], Any]],
@@ -557,34 +580,45 @@ def _update_paths(
557580
concat_axis,
558581
) -> Optional[Doc]:
559582
"""
560-
(recursive) mass-update `path_vals` (jsonp, value) pairs into doc.
583+
(recursive) mass-update `path_vals` (jsonp, value) pairs into doc, with ..
561584
562-
Special treatment (i.e. concat) if must insert a DataFrame into a DataFrame
563-
with steps ``.``(vertical) and ``-``(horizontal) denoting concatanation axis.
585+
special treatment for :term:`pandas concatenation`.
586+
587+
:return:
588+
`doc` which might have changed, if it as a pandas concatenated.
564589
565590
FIXME: ROOT in mass-update_paths NOT IMPLEMENTED
591+
FIXME: SET_OBJECT_ATTR in mass-update_paths NOT IMPLEMENTED
566592
"""
567-
ret_doc = None # Collect here any changed dataframe, to return.
568-
# A `group` is a list of paths with common prefix (root)
569-
# currently being built.
570-
# The `last_prefix` & `next_prefix` detect when group's 1st step
571-
# has changed (proceeded to the next `group)`.
593+
#: A `group` is a subset of the paths iterated below
594+
#: that have a common "prefix", i.e. their 1st step,
595+
#: seen as "root" for each recursed call.
572596
group: List[Tuple[str, Any]] = () # Begin with a blocker value.
597+
#: The `last_prefix` & `next_prefix` detect when group's 1st step
598+
#: has changed while iterating (path, value) pairs
599+
#: (meaning we have proceeded to the next `group)`.
573600
last_prefix = None
601+
#: Consecutive Pandas values to mass-concat.
602+
delayed_concats: list = None if concat_axis is None else []
574603
for i, (path, value) in enumerate((*paths_vals, ((UNSET,), UNSET))):
575604
assert len(path) >= 1 or value is UNSET, locals()
576605

606+
## Concate any delayed values.
607+
#
608+
if delayed_concats and (len(path) > 1 or not isinstance(value, NDFrame)):
609+
assert concat_axis is not None and isinstance(
610+
doc, NDFrame
611+
), f"Delayed without permission? {locals}"
612+
doc = pd.concat((doc, *delayed_concats), axis=concat_axis)
613+
delayed_concats = None
614+
577615
next_prefix = path[0]
578616
if next_prefix != last_prefix:
579617
if len(path) == 1 and value is not UNSET:
580-
# Assign "tip" value before proceeding to the next group,
618+
# Assign "tip" value of the before proceeding to the next group,
581619
# THOUGH if a deeper path with this same prefix follows,
582620
# it will overwrite the value just written.
583-
new_doc = set_or_concatenate_dataframe(
584-
doc, next_prefix, value, concat_axis
585-
)
586-
if new_doc is not None:
587-
doc = ret_doc = new_doc
621+
_index_or_delay_concat(doc, next_prefix, value, delayed_concats)
588622
else:
589623
if last_prefix: # Is it past the 1st loop?
590624
child = None
@@ -597,11 +631,12 @@ def _update_paths(
597631
if child is None:
598632
child = doc[last_prefix] = container_factory()
599633

600-
## Recurse into sub-group.
634+
## Recurse into collected sub-group.
601635
#
636+
sub_group = [(path[1:], value) for path, value in group]
602637
new_child = _update_paths(
603638
child,
604-
[(path[1:], value) for path, value in group],
639+
sub_group,
605640
container_factory,
606641
root,
607642
descend_objects,
@@ -616,7 +651,7 @@ def _update_paths(
616651
assert len(path) > 1, locals() # shortest path switches group.
617652
group.append((path, value)) # pylint: disable=no-member
618653

619-
return ret_doc
654+
return doc
620655

621656

622657
def update_paths(
@@ -643,10 +678,10 @@ def update_paths(
643678
new_doc = _update_paths(
644679
doc, pvs, container_factory, root, descend_objects, concat_axis
645680
)
646-
if new_doc is not None:
681+
if new_doc is not doc:
647682
# Changed-doc would be lost in vain...
648683
raise ValueError(
649-
f"Cannot mass-update given doc:"
684+
f"Cannot mass-update Pandas @ ROOT:"
650685
f"\n +--(path, values): {pvs}"
651686
f"\n +--doc: {doc}"
652687
f"\n +--new_doc: {new_doc}"

0 commit comments

Comments
 (0)