Skip to content

Commit f31d471

Browse files
committed
REFACT(NET): autocreated SOLUTION with overwrites & executed ...
+ ENH(NETOP): `compute()` return Solutions. + refact(net): use special-purpose methods to update Solution delegating to ChainMap. + drop(base): collect_overwrites consolidated into Solution. + DROP(jetsam): drop `executed` arg; now embeded in `solution`. + fix(doc): mention forgotten `solution` in jetsams. + doc(api): mention new class; add special meths.
1 parent c3b2157 commit f31d471

File tree

8 files changed

+141
-176
lines changed

8 files changed

+141
-176
lines changed

docs/source/plotting.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ with the folllowing properties, as a debug aid:
8787
... except ValueError as ex:
8888
... pprint(ex.jetsam)
8989
{'args': {'args': [None], 'kwargs': {}},
90-
'executed': set(),
9190
'network': Network(
9291
+--a
9392
+--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'], fn='scream')
@@ -99,7 +98,7 @@ with the folllowing properties, as a debug aid:
9998
'provides': None,
10099
'results_fn': None,
101100
'results_op': None,
102-
'solution': ChainMap({'a': None})}
101+
'solution': {'a': None}}
103102

104103

105104
In interactive *REPL* console you may use this to get the last raised exception::
@@ -137,8 +136,9 @@ The following annotated attributes *might* have meaningfull value on an exceptio
137136
``op_results``
138137
the results, always a dictionary, as matched with operation's `provides`
139138

140-
``executed```
141-
a set with the operation nodes & instructions executed till the error happened.
139+
``solution``
140+
an instance of :class:`.Solution`, contains `inputs` & `outputs` till the error happened;
141+
note that :attr:`.Solution.executed` contain the list of executed `operations` so far.
142142

143143
Ofcourse you may use many of the above "jetsam" values when plotting.
144144

docs/source/reference.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,16 @@ Module: `network`
4040
.. autoclass:: graphtik.network.Network
4141
:members:
4242
:private-members:
43+
:special-members:
4344
:undoc-members:
4445
.. autoclass:: graphtik.network.ExecutionPlan
4546
:members:
4647
:private-members:
48+
:special-members:
4749
:undoc-members:
50+
.. autoclass:: graphtik.network.Solution
51+
:members:
52+
:special-members:
4853

4954
Module: `plot`
5055
===============

graphtik/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,3 @@
2020
)
2121
from .netop import compose
2222
from .op import operation
23-
from .base import collect_overwrites

graphtik/base.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,6 @@ def astuple(i, argname, allowed_types=tuple):
4545
return i
4646

4747

48-
def collect_overwrites(maps) -> Mapping[Any, List]:
49-
"""
50-
Collect items in the maps that exist more than once.
51-
52-
:return:
53-
a dictionary with keys only those items that existed in more than one map,
54-
an values, all those values, in the order of given `maps`
55-
"""
56-
dd = defaultdict(list)
57-
for d in maps:
58-
for k, v in d.items():
59-
dd[k].append(v)
60-
61-
return {k: v for k, v in dd.items() if len(v) > 1}
62-
63-
6448
def jetsam(ex, locs, *salvage_vars: str, annotation="jetsam", **salvage_mappings):
6549
"""
6650
Annotate exception with salvaged values from locals() and raise!

graphtik/netop.py

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44

55
import logging
66
import re
7-
from collections import ChainMap, abc
7+
from collections import abc
88
from typing import Any, Callable, Mapping
99

1010
import networkx as nx
1111
from boltons.setutils import IndexedSet as iset
1212

1313
from .base import Items, Plotter, aslist, astuple, jetsam
1414
from .modifiers import optional, sideffect
15-
from .network import Network, yield_ops
15+
from .network import Network, Solution, yield_ops
1616
from .op import FunctionalOperation, Operation, reparse_operation_data
1717

1818
log = logging.getLogger(__name__)
@@ -161,7 +161,7 @@ def _build_pydot(self, **kws):
161161
plotter = self.last_plan or self.net
162162
return plotter._build_pydot(**kws)
163163

164-
def compute(self, named_inputs, outputs=None, solution: ChainMap = None) -> dict:
164+
def compute(self, named_inputs, outputs=None) -> Solution:
165165
"""
166166
Solve & execute the graph, sequentially or parallel.
167167
@@ -176,18 +176,10 @@ def compute(self, named_inputs, outputs=None, solution: ChainMap = None) -> dict
176176
a string or a list of strings with all data asked to compute.
177177
If you set this variable to ``None``, all data nodes will be kept
178178
and returned at runtime.
179-
:param solution:
180-
If not None, it must be a :class:`collections.ChainMap`, which will
181-
collect all results in a separate dictionary for each operation execution.
182-
The 1st dictionary in its maplist will collect the inputs, but will endup
183-
to the be last one when execution finishes.
184-
185-
See :term:`solution`
186179
187180
:return:
188-
the chained-map `solution` "compressed" as a plain dictionary;
189-
if you you want to acccess all intermediate values provide your own
190-
``ChainMap`` instance in this method.
181+
The :term:`solution` which contains the results of each operation executed
182+
+1 for inputs in separate dictionaries.
191183
192184
:raises ValueError:
193185
- If `outputs` asked do not exist in network, with msg:
@@ -209,23 +201,16 @@ def compute(self, named_inputs, outputs=None, solution: ChainMap = None) -> dict
209201
try:
210202
net = self.net # jetsam
211203

212-
if solution is not None and not isinstance(solution, ChainMap):
213-
raise ValueError(
214-
f"Solution was not ChainMap, but {type(solution)}!\n solution; {solution}"
215-
)
216-
217204
# Build the execution plan.
218205
self.last_plan = plan = net.compile(named_inputs.keys(), outputs)
219206

220-
solution = plan.execute(
221-
named_inputs, outputs, solution=solution, method=self.execution_method
222-
)
207+
solution = plan.execute(named_inputs, outputs, method=self.execution_method)
223208

224-
return dict(solution) # Convert ChainMap --> plain dict.
209+
return solution
225210
except Exception as ex:
226211
jetsam(ex, locals(), "plan", "solution", "outputs", network="net")
227212

228-
def __call__(self, **input_kwargs) -> dict:
213+
def __call__(self, **input_kwargs) -> Solution:
229214
"""
230215
Delegates to :meth:`compute()`, respecting any narrowed `outputs`.
231216
"""

graphtik/network.py

Lines changed: 72 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,52 @@ def is_skip_evictions():
6363
return _execution_configs.get()["skip_evictions"]
6464

6565

66-
def _del_chain_key(amap: ChainMap, key):
67-
log.debug("removing data '%s' from solution.", key)
68-
for d in amap.maps:
69-
d.pop(key, None)
66+
class Solution(ChainMap):
67+
"""Collects outputs from operations, preserving :term:`overwrites`."""
68+
69+
def __init__(self, plan, *args, **kw):
70+
super().__init__(*args, **kw)
71+
self.executed = iset()
72+
self.finished = False
73+
self.plan = plan
74+
75+
def __repr__(self):
76+
items = ", ".join(f"{k!r}: {v!r}" for k, v in self.items())
77+
return f"{{{items}}}"
78+
79+
def operation_executed(self, op, outputs):
80+
"""invoked once per operation, with its results"""
81+
assert not self.finished, f"Cannot reuse solution: {self}"
82+
self.maps.append(outputs)
83+
self.executed.add(op)
84+
85+
def finish(self):
86+
"""invoked only once, after all ops have been executed"""
87+
# Invert solution so that last value wins
88+
if not self.finished:
89+
self.maps = self.maps[::-1]
90+
self.finised = True
91+
92+
def __delitem__(self, key):
93+
log.debug("removing data '%s' from solution.", key)
94+
for d in self.maps:
95+
d.pop(key, None)
96+
97+
def overwrites(self) -> Mapping[Any, List]:
98+
"""
99+
Collect items in the maps that exist more than once.
100+
101+
:return:
102+
a dictionary with keys only those items that existed in more than one map,
103+
an values, all those values, in the order of given `maps`
104+
"""
105+
maps = self.maps
106+
dd = defaultdict(list)
107+
for d in maps:
108+
for k, v in d.items():
109+
dd[k].append(v)
110+
111+
return {k: v for k, v in dd.items() if len(v) > 1}
70112

71113

72114
class _DataNode(str):
@@ -241,14 +283,14 @@ def _call_operation(self, op, solution):
241283
try:
242284
return op.compute(solution)
243285
except Exception as ex:
244-
jetsam(ex, locals(), plan="self")
286+
jetsam(ex, locals(), "solution", plan="self")
245287
finally:
246288
# record execution time
247289
t_complete = round(time.time() - t0, 5)
248290
self.times[op.name] = t_complete
249291
log.debug("...step completion time: %s", t_complete)
250292

251-
def _execute_thread_pool_barrier_method(self, solution: ChainMap, executed):
293+
def _execute_thread_pool_barrier_method(self, solution: Solution):
252294
"""
253295
This method runs the graph using a parallel pool of thread executors.
254296
You may achieve lower total latency if your graph is sufficiently
@@ -263,7 +305,7 @@ def _execute_thread_pool_barrier_method(self, solution: ChainMap, executed):
263305
# scheduled, then schedule them onto a thread pool, then collect their
264306
# results onto a memory solution for use upon the next iteration.
265307
while True:
266-
self._check_if_aborted(executed)
308+
self._check_if_aborted(solution.executed)
267309

268310
# the upnext list contains a list of operations for scheduling
269311
# in the current round of scheduling
@@ -273,14 +315,14 @@ def _execute_thread_pool_barrier_method(self, solution: ChainMap, executed):
273315
# based on what has already been executed.
274316
if (
275317
isinstance(node, Operation)
276-
and node not in executed
318+
and node not in solution.executed
277319
# Use `broken_dag` to allow executing operations from given inputs
278320
# regardless of whether their producers have yet to re-calc them.
279321
and set(
280322
n
281323
for n in nx.ancestors(self.broken_dag, node)
282324
if isinstance(n, Operation)
283-
).issubset(executed)
325+
).issubset(solution.executed)
284326
):
285327
upnext.append(node)
286328
elif isinstance(node, _EvictInstruction):
@@ -294,10 +336,12 @@ def _execute_thread_pool_barrier_method(self, solution: ChainMap, executed):
294336
node not in self.dag.nodes
295337
# Scan node's successors in `broken_dag`, not to block
296338
# an op waiting for calced data already given as input.
297-
or set(self.broken_dag.successors(node)).issubset(executed)
339+
or set(self.broken_dag.successors(node)).issubset(
340+
solution.executed
341+
)
298342
)
299343
):
300-
_del_chain_key(solution, node)
344+
del solution[node]
301345

302346
# stop if no nodes left to schedule, exit out of the loop
303347
if not upnext:
@@ -308,36 +352,33 @@ def _execute_thread_pool_barrier_method(self, solution: ChainMap, executed):
308352
)
309353

310354
for op, outputs in done_iterator:
311-
solution.maps.append(outputs)
312-
executed.add(op)
355+
solution.operation_executed(op, outputs)
313356

314-
def _execute_sequential_method(self, solution: ChainMap, executed):
357+
def _execute_sequential_method(self, solution: Solution):
315358
"""
316359
This method runs the graph one operation at a time in a single thread
317360
318361
:param solution:
319362
must contain the input values only, gets modified
320363
"""
321364
for step in self.steps:
322-
self._check_if_aborted(executed)
365+
self._check_if_aborted(solution.executed)
323366

324367
if isinstance(step, Operation):
325368
log.debug("%sexecuting step: %s", "-" * 32, step.name)
326369

327370
outputs = self._call_operation(step, solution)
328-
solution.maps.append(outputs)
329-
executed.add(step)
371+
solution.operation_executed(step, outputs)
372+
330373
elif isinstance(step, _EvictInstruction):
331374
# Cache value may be missing if it is optional.
332375
if step in solution:
333-
_del_chain_key(solution, step)
376+
del solution[step]
334377

335378
else:
336379
raise AssertionError(f"Unrecognized instruction.{step}")
337380

338-
def execute(
339-
self, named_inputs, outputs=None, *, solution: ChainMap = None, method=None
340-
):
381+
def execute(self, named_inputs, outputs=None, *, method=None) -> Solution:
341382
"""
342383
:param named_inputs:
343384
A maping of names --> values that must contain at least
@@ -347,16 +388,10 @@ def execute(
347388
:param outputs:
348389
If not None, they are just checked if possible, based on :attr:`provides`,
349390
and scream if not.
350-
:param solution:
351-
If not None, it must be a :class:`collections.ChainMap`, which will
352-
collect all results in a separate dictionary for each operation execution.
353-
The 1st dictionary in its maplist will collect the inputs, but will endup
354-
to the be last one when execution finishes.
355-
356-
See :term:`solution`
357391
358392
:return:
359-
the populates `solution`
393+
The :term:`solution` which contains the results of each operation executed
394+
+1 for inputs in separate dictionaries.
360395
361396
:raises ValueError:
362397
- If plan does not contain any operations, with msg:
@@ -382,25 +417,20 @@ def execute(
382417
else self._execute_sequential_method
383418
)
384419

385-
if solution is None:
386-
solution = ChainMap()
387-
preload_layers = len(solution.maps) # TODO: move to solution
388-
389420
# If certain outputs asked, put relevant-only inputs in solution,
390421
# otherwise, keep'em all.
391422
#
392423
# Note: clone and keep original `inputs` in the 1st chained-map.
393-
solution.update(
424+
solution = Solution(
425+
self,
394426
{k: v for k, v in named_inputs.items() if k in self.dag.nodes}
395427
if self.evict
396-
else named_inputs
428+
else named_inputs,
397429
)
398-
executed = set()
399-
executor(solution, executed)
400-
401-
# Invert solution so that last value wins
402-
# TODO: move to solution
403-
solution.maps = solution.maps[::-1]
430+
try:
431+
executor(solution)
432+
finally:
433+
solution.finish()
404434

405435
# Validate eviction was perfect
406436
#
@@ -411,20 +441,9 @@ def execute(
411441
or set(solution).issubset(self.provides)
412442
), f"Evictions left more data{list(iset(solution) - set(self.provides))} than {self}!"
413443

414-
# Validate solution layers match operations executed + 1(inputs)
415-
# TODO: move to solution
416-
#
417-
assert len(solution.maps) - preload_layers == sum(
418-
1 for i in yield_ops(self.steps)
419-
), (
420-
f"Solution layers({len(solution.maps)}, preloaded: {preload_layers}) mismatched "
421-
f"operations executed({sum(1 for i in yield_ops(self.dag))})!"
422-
f"\n {self}\n solution: {solution}"
423-
)
424-
425444
return solution
426445
except Exception as ex:
427-
jetsam(ex, locals(), "solution", "executed")
446+
jetsam(ex, locals(), "solution")
428447

429448

430449
class Network(Plotter):

0 commit comments

Comments
 (0)