aiidateam · superstar54 · Aug 15, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/docs/gallery/autogen/pyfunction.py b/docs/gallery/autogen/pyfunction.py
@@ -6,14 +6,14 @@
 
 ######################################################################
 # Default outputs
-# --------------
+# -----------------
 #
 # The default output of the function is `result`. The `pyfunction` task
 # will store the result as one node in the database with the key `result`.
 #
 from aiida import load_profile
 from aiida.engine import run_get_node
-from aiida_pythonjob import pyfunction
+from aiida_pythonjob import pyfunction, spec
 
 load_profile()
 
@@ -35,7 +35,7 @@ def add(x, y):
 #
 
 
-@pyfunction(outputs=[{"name": "sum"}, {"name": "diff"}])
+@pyfunction(outputs=spec.namespace(sum=any, diff=any))
 def add(x, y):
     return {"sum": x + y, "diff": x - y}
 
@@ -48,7 +48,7 @@ def add(x, y):
 
 ######################################################################
 # Namespace Output
-# --------------
+# -----------------
 #
 # The `pyfunction` allows users to define namespace outputs. A namespace output
 # is a dictionary with keys and values returned by a function. Each value in
@@ -70,20 +70,20 @@ def add(x, y):
 from ase.build import bulk  # noqa: E402
 
 
-@pyfunction(outputs=[{"name": "scaled_structures", "identifier": "namespace"}])
+@pyfunction(outputs=spec.dynamic(Atoms))
 def generate_structures(structure: Atoms, factor_lst: list) -> dict:
     """Scale the structure by the given factor_lst."""
     scaled_structures = {}
     for i in range(len(factor_lst)):
         atoms = structure.copy()
         atoms.set_cell(atoms.cell * factor_lst[i], scale_atoms=True)
         scaled_structures[f"s_{i}"] = atoms
-    return {"scaled_structures": scaled_structures}
+    return scaled_structures
 
 
 result, node = run_get_node(generate_structures, structure=bulk("Al"), factor_lst=[0.95, 1.0, 1.05])
 print("scaled_structures: ")
-for key, value in result["scaled_structures"].items():
+for key, value in result.items():
     print(key, value)
 
 
@@ -115,7 +115,7 @@ def add(x, y):
 
 ######################################################################
 # Define your data serializer and deserializer
-# --------------
+# ----------------------------------------------
 #
 # PythonJob search data serializer from the `aiida.data` entry point by the
 # module name and class name (e.g., `ase.atoms.Atoms`).

diff --git a/docs/gallery/autogen/pythonjob.py b/docs/gallery/autogen/pythonjob.py
@@ -63,14 +63,14 @@
 
 ######################################################################
 # Default outputs
-# --------------
+# ----------------
 #
 # The default output of the function is `result`. The `PythonJob` task
 # will store the result as one node in the database with the key `result`.
 #
 from aiida import load_profile
 from aiida.engine import run_get_node
-from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs
+from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs, spec
 
 load_profile()
 
@@ -91,7 +91,7 @@ def add(x, y):
 # Custom outputs
 # --------------
 # If the function return a dictionary with fixed number of keys, and you
-# want to store the values as separate outputs, you can specify the `output_ports` parameter.
+# want to store the values as separate outputs, you can specify the `outputs_spec` parameter.
 # For a dynamic number of outputs, you can use the namespace output, which is explained later.
 #
 
@@ -103,10 +103,7 @@ def add(x, y):
 inputs = prepare_pythonjob_inputs(
     add,
     function_inputs={"x": 1, "y": 2},
-    output_ports=[
-        {"name": "sum"},
-        {"name": "diff"},
-    ],
+    outputs_spec=spec.namespace(sum=any, diff=any),
 )
 result, node = run_get_node(PythonJob, **inputs)
 
@@ -117,7 +114,7 @@ def add(x, y):
 
 ######################################################################
 # Using parent folder
-# --------------
+# -----------------------
 # The parent_folder parameter allows a task to access the output files of
 # a parent task. This feature is particularly useful when you want to reuse
 # data generated by a previous computation in subsequent computations. In
@@ -142,15 +139,13 @@ def multiply(x, y):
 inputs1 = prepare_pythonjob_inputs(
     add,
     function_inputs={"x": 1, "y": 2},
-    output_ports=[{"name": "sum"}],
 )
 
 result1, node1 = run_get_node(PythonJob, inputs=inputs1)
 
 inputs2 = prepare_pythonjob_inputs(
     multiply,
     function_inputs={"x": 1, "y": 2},
-    output_ports=[{"name": "product"}],
     parent_folder=result1["remote_folder"],
 )
 
@@ -160,7 +155,7 @@ def multiply(x, y):
 
 ######################################################################
 # Upload files or folders to the remote computer
-# --------------
+# -------------------------------------------------
 # The `upload_files` parameter allows users to upload files or folders to
 # the remote computer. The files will be uploaded to the working directory of the remote computer.
 #
@@ -202,7 +197,7 @@ def add():
 
 ######################################################################
 # Retrieve additional files from the remote computer
-# --------------
+# ----------------------------------------------------
 # Sometimes, one may want to retrieve additional files from the remote
 # computer after the job has finished. For example, one may want to retrieve
 # the output files generated by the `pw.x` calculation in Quantum ESPRESSO.
@@ -235,7 +230,7 @@ def add(x, y):
 
 ######################################################################
 # Namespace Output
-# --------------
+# ------------------
 #
 # The `PythonJob` allows users to define namespace outputs. A namespace output
 # is a dictionary with keys and values returned by a function. Each value in
@@ -264,18 +259,18 @@ def generate_structures(structure: Atoms, factor_lst: list) -> dict:
         atoms = structure.copy()
         atoms.set_cell(atoms.cell * factor_lst[i], scale_atoms=True)
         scaled_structures[f"s_{i}"] = atoms
-    return {"scaled_structures": scaled_structures}
+    return scaled_structures
 
 
 inputs = prepare_pythonjob_inputs(
     generate_structures,
     function_inputs={"structure": bulk("Al"), "factor_lst": [0.95, 1.0, 1.05]},
-    output_ports=[{"name": "scaled_structures", "identifier": "namespace"}],
+    outputs_spec=spec.dynamic(Atoms),
 )
 
 result, node = run_get_node(PythonJob, inputs=inputs)
 print("scaled_structures: ")
-for key, value in result["scaled_structures"].items():
+for key, value in result.items():
     print(key, value)
 
 
@@ -297,31 +292,20 @@ def generate_structures(structure: Atoms, factor_lst: list) -> dict:
         scaled_structures[f"s_{i}"] = atoms
         volumes[f"v_{i}"] = atoms.get_volume()
     return {
-        "outputs": {
-            "scaled_structures": scaled_structures,
-            "volume": volumes,
-        }
+        "scaled_structures": scaled_structures,
+        "volume": volumes,
     }
 
 
 inputs = prepare_pythonjob_inputs(
     generate_structures,
     function_inputs={"structure": bulk("Al"), "factor_lst": [0.95, 1.0, 1.05]},
-    output_ports=[
-        {
-            "name": "outputs",
-            "identifier": "namespace",
-            "ports": [
-                {"name": "scaled_structures", "identifier": "namespace"},
-                {"name": "volume", "identifier": "namespace"},
-            ],
-        }
-    ],
+    outputs_spec=spec.namespace(scaled_structures=spec.dynamic(Atoms), volume=spec.dynamic(float)),
 )
 
 result, node = run_get_node(PythonJob, inputs=inputs)
-print("result: ", result["outputs"]["scaled_structures"])
-print("volumes: ", result["outputs"]["volume"])
+print("result: ", result["scaled_structures"])
+print("volumes: ", result["volume"])
 
 
 ######################################################################
@@ -420,7 +404,7 @@ def add(x, y):
 
 ######################################################################
 # Define your data serializer and deserializer
-# --------------
+# ----------------------------------------------
 #
 # PythonJob search data serializer from the `aiida.data` entry point by the
 # module name and class name (e.g., `ase.atoms.Atoms`).

diff --git a/examples/test_add.py b/examples/test_add.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "aiida-core>=2.3,<3",
     "ase",
     "cloudpickle",
+    "node-graph==0.2.22",
 ]
 
 [project.optional-dependencies]
@@ -163,3 +164,6 @@ features = ["docs"]
 build = [
   "make -C docs"
 ]
+
+[tool.hatch.metadata]
+allow-direct-references = true
diff --git a/src/aiida_pythonjob/__init__.py b/src/aiida_pythonjob/__init__.py
@@ -2,6 +2,8 @@
 
 __version__ = "0.2.5"
 
+from node_graph import spec
+
 from .calculations import PythonJob
 from .decorator import pyfunction
 from .launch import prepare_pythonjob_inputs
@@ -13,4 +15,5 @@
     "PickledData",
     "prepare_pythonjob_inputs",
     "PythonJobParser",
+    "spec",
 )
diff --git a/src/aiida_pythonjob/calculations/pyfunction.py b/src/aiida_pythonjob/calculations/pyfunction.py
@@ -34,7 +34,7 @@ def __init__(self, *args, **kwargs) -> None:
     def func(self) -> t.Callable[..., t.Any]:
         import cloudpickle
 
-        if self._func is None:
+        if not getattr(self, "_func", None):
             self._func = cloudpickle.loads(self.inputs.function_data.pickled_function)
         return self._func
 
@@ -189,7 +189,8 @@ def parse(self, results):
         if exit_code:
             return exit_code
         # Store the outputs
-        for output in self.output_ports["ports"]:
-            self.out(output["name"], output["value"])
+        for name, port in self.output_ports["ports"].items():
+            if "value" in port:
+                self.out(name, port["value"])
 
         return ExitCode()
diff --git a/src/aiida_pythonjob/decorator.py b/src/aiida_pythonjob/decorator.py
@@ -60,8 +60,10 @@ def run_get_node(*args, **kwargs) -> tuple[dict[str, t.Any] | None, "ProcessNode
             manager = get_manager()
             runner = manager.get_runner()
             # # Remove all the known inputs from the kwargs
-            output_ports = kwargs.pop("output_ports", None) or outputs
-            input_ports = kwargs.pop("input_ports", None) or inputs
+            outputs_spec = kwargs.pop("outputs_spec", None) or outputs
+            inputs_spec = kwargs.pop("inputs_spec", None) or inputs
+            input_ports = kwargs.pop("input_ports", None)
+            output_ports = kwargs.pop("output_ports", None)
             metadata = kwargs.pop("metadata", None)
             function_data = kwargs.pop("function_data", None)
             deserializers = kwargs.pop("deserializers", None)
@@ -73,6 +75,8 @@ def run_get_node(*args, **kwargs) -> tuple[dict[str, t.Any] | None, "ProcessNode
             process_inputs = prepare_pyfunction_inputs(
                 function=function,
                 function_inputs=function_inputs,
+                inputs_spec=inputs_spec,
+                outputs_spec=outputs_spec,
                 input_ports=input_ports,
                 output_ports=output_ports,
                 metadata=metadata,