pymc-devs
diff --git a/‎pytensor/compile/mode.py‎
Lines changed: 13 additions & 0 deletions b/‎pytensor/compile/mode.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎pytensor/configdefaults.py‎
Lines changed: 1 addition & 0 deletions b/‎pytensor/configdefaults.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pytensor/link/numba/cache.py‎
Lines changed: 76 additions & 0 deletions b/‎pytensor/link/numba/cache.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎pytensor/link/numba/compile.py‎
Lines changed: 36 additions & 0 deletions b/‎pytensor/link/numba/compile.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎pytensor/link/numba/dispatch/basic.py‎
Lines changed: 69 additions & 19 deletions b/‎pytensor/link/numba/dispatch/basic.py‎
Lines changed: 69 additions & 19 deletions
diff --git a/‎pytensor/link/numba/dispatch/blockwise.py‎
Lines changed: 26 additions & 2 deletions b/‎pytensor/link/numba/dispatch/blockwise.py‎
Lines changed: 26 additions & 2 deletions
@@ -50,6 +50,7 @@
     "jax": JAXLinker(),
     "pytorch": PytorchLinker(),
     "numba": NumbaLinker(),
+    "numba_vm": NumbaLinker(vm=True),
 }
 
 
@@ -351,6 +352,11 @@ def __setstate__(self, state):
             optimizer = predefined_optimizers[optimizer]
         if isinstance(optimizer, RewriteDatabaseQuery):
             self.provided_optimizer = optimizer
+
+        # Force numba-required rewrites if using NumbaLinker
+        if isinstance(linker, NumbaLinker):
+            optimizer = optimizer.including("numba")
+
         self._optimizer = optimizer
         self.call_time = 0
         self.fn_time = 0
@@ -475,6 +481,11 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
     ),
 )
 
+NUMBA_VM = Mode(
+    "numba_vm",
+    NUMBA._optimizer,
+)
+
 JAX = Mode(
     "jax",
     RewriteDatabaseQuery(
@@ -515,6 +526,7 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
     "C_VM": C_VM,
     "JAX": JAX,
     "NUMBA": NUMBA,
+    "NUMBA_VM": NUMBA_VM,
     "PYTORCH": PYTORCH,
 }
 
@@ -579,6 +591,7 @@ def register_mode(name, mode):
     Add a `Mode` which can be referred to by `name` in `function`.
 
     """
+    # TODO: Remove me
     if name in predefined_modes:
         raise ValueError(f"Mode name already taken: {name}")
     predefined_modes[name] = mode
 
@@ -379,6 +379,7 @@ def add_compile_configvars():
             "cvm_nogc",
             "jax",
             "numba",
+            "numba_vm",
         ]
     else:
         # g++ is not present or the user disabled it,
 
@@ -0,0 +1,76 @@
+import weakref
+from hashlib import sha256
+from pathlib import Path
+
+from numba.core.caching import CacheImpl, _CacheLocator
+
+from pytensor import config
+from pytensor.graph.basic import Apply
+
+
+NUMBA_PYTENSOR_CACHE_ENABLED = True
+NUMBA_CACHE_PATH = config.base_compiledir / "numba"
+NUMBA_CACHE_PATH.mkdir(exist_ok=True)
+CACHED_SRC_FUNCTIONS = weakref.WeakKeyDictionary()
+
+
+class NumbaPyTensorCacheLocator(_CacheLocator):
+    def __init__(self, py_func, py_file, hash):
+        self._py_func = py_func
+        self._py_file = py_file
+        self._hash = hash
+        # src_hash = hash(pytensor_loader._module_sources[self._py_file])
+        # self._hash = hash((src_hash, py_file, pytensor.__version__))
+
+    def ensure_cache_path(self):
+        pass
+
+    def get_cache_path(self):
+        """
+        Return the directory the function is cached in.
+        """
+        return NUMBA_CACHE_PATH
+
+    def get_source_stamp(self):
+        """
+        Get a timestamp representing the source code's freshness.
+        Can return any picklable Python object.
+        """
+        return 0
+
+    def get_disambiguator(self):
+        """
+        Get a string disambiguator for this locator's function.
+        It should allow disambiguating different but similarly-named functions.
+        """
+        return self._hash
+
+    @classmethod
+    def from_function(cls, py_func, py_file):
+        """
+        Create a locator instance for the given function located in the given file.
+        """
+        # py_file = Path(py_file).parent
+        # if py_file == (config.base_compiledir / "numba"):
+        if NUMBA_PYTENSOR_CACHE_ENABLED and py_func in CACHED_SRC_FUNCTIONS:
+            # print(f"Applies to {py_file}")
+            return cls(py_func, Path(py_file).parent, CACHED_SRC_FUNCTIONS[py_func])
+
+
+CacheImpl._locator_classes.insert(0, NumbaPyTensorCacheLocator)
+
+
+def cache_node_key(node: Apply, extra_key="") -> str:
+    op = node.op
+    return sha256(
+        str(
+            (
+                # Op signature
+                (type(op), op._props_dict() if hasattr(op, "_props_dict") else ""),
+                # Node signature
+                tuple((type(inp_type := inp.type), inp_type) for inp in node.inputs),
+                # Extra key given by the caller
+                extra_key,
+            ),
+        ).encode()
+    ).hexdigest()
@@ -1,4 +1,7 @@
 import warnings
+from collections.abc import Callable
+from tempfile import NamedTemporaryFile
+from typing import Any
 
 import numba
 import numpy as np
@@ -8,6 +11,7 @@
 
 from pytensor import config
 from pytensor.graph import Apply, FunctionGraph, Type
+from pytensor.link.numba.cache import CACHED_SRC_FUNCTIONS
 from pytensor.link.numba.dispatch.sparse import CSCMatrixType, CSRMatrixType
 from pytensor.scalar import ScalarType
 from pytensor.sparse import SparseTensorType
@@ -55,6 +59,38 @@ def numba_njit(*args, fastmath=None, final_function: bool = False, **kwargs):
         return func(*args, fastmath=fastmath, **kwargs)
 
 
+def compile_and_cache_numba_function_src(
+    src: str,
+    function_name: str,
+    global_env: dict[Any, Any] | None = None,
+    local_env: dict[Any, Any] | None = None,
+    store_to_disk: bool = False,
+    cache_key: str | None = None,
+) -> Callable:
+    if store_to_disk:
+        with NamedTemporaryFile(delete=False) as f:
+            filename = f.name
+            f.write(src.encode())
+    else:
+        filename = "<string>"
+
+    if global_env is None:
+        global_env = {}
+
+    if local_env is None:
+        local_env = {}
+
+    mod_code = compile(src, filename, mode="exec")
+    exec(mod_code, global_env, local_env)
+
+    res = local_env[function_name]
+    res.__source__ = src  # type: ignore
+
+    if cache_key is not None:
+        CACHED_SRC_FUNCTIONS[res] = cache_key
+    return res
+
+
 def get_numba_type(
     pytensor_type: Type,
     layout: str = "A",
 
@@ -1,6 +1,7 @@
 import operator
 import sys
 import warnings
+from collections.abc import Callable
 from functools import singledispatch
 
 import numba
@@ -18,7 +19,11 @@
 from pytensor.compile.ops import DeepCopyOp
 from pytensor.graph.fg import FunctionGraph
 from pytensor.ifelse import IfElse
+from pytensor.link.numba.cache import (
+    cache_node_key,
+)
 from pytensor.link.numba.compile import (
+    compile_and_cache_numba_function_src,
     get_numba_type,
     numba_njit,
 )
@@ -208,20 +213,80 @@ def perform(*inputs):
             ret = py_perform_return(inputs)
         return ret
 
-    return perform
+    # Assume we can't cache python functions
+    return perform, None
 
 
 @singledispatch
-def numba_funcify(op, node=None, storage_map=None, **kwargs):
+def numba_funcify(
+    op, node=None, storage_map=None, **kwargs
+) -> Callable | tuple[Callable, str | int | None]:
     """Generate a numba function for a given op and apply node.
 
     The resulting function will usually use the `no_cpython_wrapper`
     argument in numba, so it can not be called directly from python,
     but only from other jit functions.
+
+    Optionally, the function can return a key that can be used to provide
+    extra caching context or to disable caching (by returning `None`).
+    When nothing is returned, PyTensor will assume the function can be cached
+    based on the op and node signature alone.
     """
     return generate_fallback_impl(op, node, storage_map, **kwargs)
 
 
+def numba_funcify_njit(op, node, **kwargs):
+    jitable_func_and_key = numba_funcify(op, node=node, **kwargs)
+
+    match jitable_func_and_key:
+        case Callable():
+            jitable_func = jitable_func_and_key
+            key = cache_node_key(node)
+        case (Callable(), str() | int()):
+            jitable_func, funcify_key = jitable_func_and_key
+            key = cache_node_key(node, funcify_key)
+        case (Callable(), None):
+            # We were explicitly told by the dispatch not to try and cache this function
+            jitable_func, key = jitable_func_and_key
+        case _:
+            raise TypeError(
+                f"numpy_funcify should return a callable or a (callable, key) pair, got {jitable_func_and_key}"
+            )
+
+    if key is not None:
+        # To force numba to use our cache, we must compile the function so that any closure
+        # becomes a global variable...
+        op_name = op.__class__.__name__
+        cached_func = compile_and_cache_numba_function_src(
+            src=f"def {op_name}(*args): return jitable_func(*args)",
+            function_name=op_name,
+            global_env=globals() | dict(jitable_func=jitable_func),
+            cache_key=key,
+        )
+        return numba_njit(cached_func, final_function=True, cache=True)
+    else:
+        return numba_njit(
+            lambda *args: jitable_func(*args), final_function=True, cache=False
+        )
+
+
+@numba_funcify.register(FunctionGraph)
+def numba_funcify_FunctionGraph(
+    fgraph,
+    node=None,
+    fgraph_name="numba_funcified_fgraph",
+    **kwargs,
+):
+    # TODO: Create hash key for whole graph
+    return fgraph_to_python(
+        fgraph,
+        op_conversion_fn=numba_funcify_njit,
+        type_conversion_fn=numba_typify,
+        fgraph_name=fgraph_name,
+        **kwargs,
+    )
+
+
 @numba_funcify.register(OpFromGraph)
 def numba_funcify_OpFromGraph(op, node=None, **kwargs):
     _ = kwargs.pop("storage_map", None)
@@ -251,23 +316,8 @@ def opfromgraph(*inputs):
         def opfromgraph(*inputs):
             return fgraph_fn(*inputs)
 
-    return opfromgraph
-
-
-@numba_funcify.register(FunctionGraph)
-def numba_funcify_FunctionGraph(
-    fgraph,
-    node=None,
-    fgraph_name="numba_funcified_fgraph",
-    **kwargs,
-):
-    return fgraph_to_python(
-        fgraph,
-        numba_funcify,
-        type_conversion_fn=numba_typify,
-        fgraph_name=fgraph_name,
-        **kwargs,
-    )
+    # We can't cache this correctly until we can define a key for it
+    return opfromgraph, None
 
 
 @numba_funcify.register(DeepCopyOp)
 
@@ -1,4 +1,5 @@
 import sys
+from hashlib import sha256
 from typing import cast
 
 from numba.core.extending import overload
@@ -30,12 +31,17 @@ def numba_funcify_Blockwise(op: BlockwiseWithCoreShape, node, **kwargs):
         cast(tuple[TensorVariable], node.inputs[:nin]),
         propagate_unbatched_core_inputs=True,
     )
-    core_op_fn = numba_funcify(
+    core_op_fn_and_key = numba_funcify(
         core_op,
         node=core_node,
         parent_node=node,
         **kwargs,
     )
+    if isinstance(core_op_fn_and_key, tuple):
+        core_op_fn, core_op_key = core_op_fn_and_key
+    else:
+        # Assume we can cache core_op_fn
+        core_op_fn, core_op_key = core_op_fn_and_key, 0
     core_op_fn = store_core_outputs(core_op_fn, nin=nin, nout=nout)
 
     batch_ndim = blockwise_op.batch_ndim(node)
@@ -90,4 +96,22 @@ def blockwise(*inputs_and_core_shapes):
     def ov_blockwise(*inputs_and_core_shapes):
         return blockwise_wrapper
 
-    return blockwise
+    if core_op_key is None:
+        # We were told the scalar op cannot be cached
+        blockwise_key = None
+    else:
+        blockwise_key = "_".join(
+            map(
+                str,
+                (
+                    type(op),
+                    type(op.scalar_op),
+                    tuple(op.inplace_pattern.items()),
+                    tuple(getattr(op.scalar_op, "props_dict", lambda: {})().items()),
+                    core_op_key,
+                ),
+            )
+        )
+        blockwise_key = sha256(blockwise_key.encode()).hexdigest()
+
+    return blockwise, blockwise_key
Original file line number	Diff line number	Diff line change
`@@ -379,6 +379,7 @@ def add_compile_configvars():`
`379`	`379`	`"cvm_nogc",`
`380`	`380`	`"jax",`
`381`	`381`	`"numba",`
	`382`	`+ "numba_vm",`
`382`	`383`	`]`
`383`	`384`	`else:`
`384`	`385`	`# g++ is not present or the user disabled it,`