Skip to content

Commit 10d8680

Browse files
authored
Merge pull request #71 from BAMresearch/tracing_trouble
Tracing trouble
2 parents 30dc0b8 + cbe599e commit 10d8680

19 files changed

+1129
-53
lines changed

src/modacor/dataclasses/process_step.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@
2525
from .messagehandler import MessageHandler
2626
from .process_step_describer import ProcessStepDescriber
2727
from .processing_data import ProcessingData
28-
from .validators import is_list_of_ints
2928

29+
# from .validators import is_list_of_ints
3030

31-
@define
31+
32+
@define(eq=False)
3233
class ProcessStep:
3334
"""A base class defining a processing step"""
3435

@@ -54,7 +55,7 @@ class ProcessStep:
5455
# class attribute for a machine-readable description of the process step
5556
documentation = ProcessStepDescriber(
5657
calling_name="Generic Process step",
57-
calling_id=None,
58+
calling_id="", # to be filled in by the process
5859
calling_module_path=Path(__file__),
5960
calling_version=__version__,
6061
)
@@ -66,7 +67,7 @@ class ProcessStep:
6667
)
6768

6869
# flags and attributes for running the pipeline
69-
requires_steps: list[int] = field(factory=list, validator=is_list_of_ints)
70+
requires_steps: list[str] = field(factory=list)
7071
step_id: int | str = field(default=-1, validator=v.instance_of((Integral, str)))
7172
executed: bool = field(default=False, validator=v.instance_of(bool))
7273

@@ -77,7 +78,7 @@ class ProcessStep:
7778

7879
# a message handler, supporting logging, warnings, errors, etc. emitted by the process
7980
# during execution
80-
message_handler: MessageHandler = field(default=MessageHandler(), validator=v.instance_of(MessageHandler))
81+
message_handler: MessageHandler = field(factory=MessageHandler, validator=v.instance_of(MessageHandler))
8182

8283
# internal variables:
8384
__prepared: bool = field(default=False, validator=v.instance_of(bool))
@@ -88,15 +89,17 @@ def __attrs_post_init__(self):
8889
Post-initialization method to set up the process step.
8990
"""
9091
self.configuration = self.default_config()
91-
self.configuration.update(self.documentation.calling_arguments)
92+
self.configuration.update(self.documentation.default_configuration)
9293

9394
def __call__(self, processing_data: ProcessingData) -> None:
9495
"""Allow the process step to be called like a function"""
9596
self.execute(processing_data)
9697

9798
# add hash function. equality can be checked
98-
def __hash__(self):
99-
return hash((self.documentation.__repr__(), self.configuration.__repr__(), self.step_id))
99+
# def __hash__(self):
100+
# return hash((self.documentation.__repr__(), self.configuration.__repr__(), self.step_id))
101+
def __hash__(self) -> int:
102+
return object.__hash__(self)
100103

101104
def prepare_execution(self):
102105
"""

src/modacor/dataclasses/process_step_describer.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pathlib import Path
1515
from typing import Any
1616

17-
from attrs import define, field
17+
from attrs import define, evolve, field
1818
from attrs import validators as v
1919

2020
__all__ = ["ProcessStepDescriber"]
@@ -26,17 +26,9 @@
2626
def validate_required_keys(instance, attribute, value):
2727
# keys = [key.strip() for key in value.keys()]
2828
keys = [key.strip() for key in instance.required_arguments]
29-
missing = [key for key in keys if key not in instance.calling_arguments]
29+
missing = [key for key in keys if key not in instance.default_configuration]
3030
if missing:
31-
raise ValueError(f"Missing required argument keys in calling_arguments: {missing}")
32-
33-
34-
def validate_required_data_keys(instance, attribute, value):
35-
# keys = [key.strip() for key in value.keys()]
36-
keys = [key.strip() for key in instance.documentation.required_data_keys]
37-
missing = [key for key in keys if key not in instance.data.data]
38-
if missing:
39-
raise ValueError(f"Missing required data keys in instance.data: {missing}")
31+
raise ValueError(f"Missing required argument keys in default_configuration: {missing}")
4032

4133

4234
@define
@@ -49,7 +41,7 @@ class ProcessStepDescriber:
4941
calling_version: str = field() # module version being executed
5042
required_data_keys: list[str] = field(factory=list) # list of data keys required by the process
5143
required_arguments: list[str] = field(factory=list) # list of argument key-val combos required by the process
52-
calling_arguments: dict[str, Any] = field(factory=dict, validator=validate_required_keys)
44+
default_configuration: dict[str, Any] = field(factory=dict, validator=validate_required_keys)
5345
modifies: dict[str, list] = field(
5446
factory=dict, validator=v.instance_of(dict)
5547
) # which aspects of BaseData are modified by this
@@ -68,5 +60,5 @@ class ProcessStepDescriber:
6860
# # on first run, and reused on subsequent runs. Maybe two chaches, one for per-file and
6961
# # one for per-execution.
7062

71-
def copy(self) -> ProcessStepDescriber:
72-
raise NotImplementedError()
63+
def copy(self) -> "ProcessStepDescriber":
64+
return evolve(self)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# /usr/bin/env python3
3+
# -*- coding: utf-8 -*-
4+
5+
from __future__ import annotations
6+
7+
__coding__ = "utf-8"
8+
__authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9+
__copyright__ = "Copyright 2025, The MoDaCor team"
10+
__date__ = "13/12/2025"
11+
__status__ = "Development" # "Development", "Production"
12+
__version__ = "20251213.1"
13+
14+
__all__ = ["TraceEvent"]
15+
16+
import json
17+
from hashlib import sha256
18+
from typing import Any
19+
20+
from attrs import define, field, validators
21+
22+
23+
def _to_jsonable(value: Any) -> Any:
24+
"""
25+
Convert arbitrary objects into a JSON-serializable structure.
26+
27+
Rules:
28+
- dict keys become strings
29+
- tuples/sets become lists
30+
- unknown objects become str(value)
31+
"""
32+
if value is None or isinstance(value, (str, int, float, bool)):
33+
return value
34+
35+
if isinstance(value, dict):
36+
return {str(k): _to_jsonable(v) for k, v in value.items()}
37+
38+
if isinstance(value, (list, tuple, set)):
39+
return [_to_jsonable(v) for v in value]
40+
41+
# Common numpy-like scalars without importing numpy
42+
if hasattr(value, "item") and callable(getattr(value, "item")):
43+
try:
44+
return _to_jsonable(value.item())
45+
except Exception:
46+
pass
47+
48+
return str(value)
49+
50+
51+
def _stable_hash_dict(d: dict[str, Any]) -> str:
52+
"""
53+
Stable content hash of a dict (order-independent).
54+
"""
55+
canonical = json.dumps(_to_jsonable(d), sort_keys=True, separators=(",", ":"), ensure_ascii=False)
56+
return sha256(canonical.encode("utf-8")).hexdigest()
57+
58+
59+
@define(frozen=True, slots=True)
60+
class TraceEvent:
61+
"""
62+
A small, UI-friendly trace record for a single executed step.
63+
64+
Intended to be embedded into Pipeline.to_spec() so graph viewers can show:
65+
- configuration used by the step
66+
- what changed (units/dimensionality/shape/NaNs/etc.)
67+
- optional human messages (later)
68+
69+
Notes
70+
-----
71+
Keep this JSON-friendly and lightweight: no arrays, no heavy objects.
72+
"""
73+
74+
step_id: str
75+
module: str
76+
label: str = ""
77+
78+
module_path: str = ""
79+
version: str = ""
80+
81+
requires_steps: tuple[str, ...] = field(factory=tuple)
82+
83+
# configuration as used for execution (JSON-friendly)
84+
config: dict[str, Any] = field(factory=dict)
85+
86+
# computed stable hash of config
87+
config_hash: str = field(init=False)
88+
89+
# dataset key -> { "diff": [...], "prev": {...} | None, "now": {...} }
90+
# Use a simple key like "sample.signal" or "sample_background.signal"
91+
datasets: dict[str, Any] = field(factory=dict)
92+
93+
# reserved for later (MessageHandler, timing, etc.)
94+
messages: list[dict[str, Any]] = field(factory=list)
95+
96+
# wall-clock runtime for this step execution (seconds)
97+
duration_s: float | None = field(default=None, validator=validators.optional(validators.instance_of(float)))
98+
99+
def __attrs_post_init__(self) -> None:
100+
object.__setattr__(self, "config_hash", _stable_hash_dict(self.config))
101+
102+
def to_dict(self) -> dict[str, Any]:
103+
"""
104+
JSON-serializable representation suitable for Pipeline.to_spec().
105+
"""
106+
return {
107+
"step_id": self.step_id,
108+
"module": self.module,
109+
"label": self.label,
110+
"module_path": self.module_path,
111+
"version": self.version,
112+
"requires_steps": list(self.requires_steps),
113+
"config": _to_jsonable(self.config),
114+
"config_hash": self.config_hash,
115+
"duration_s": self.duration_s,
116+
"datasets": _to_jsonable(self.datasets),
117+
"messages": _to_jsonable(self.messages),
118+
}

0 commit comments

Comments
 (0)