diff --git a/frontend/catalyst/api_extensions/callbacks.py b/frontend/catalyst/api_extensions/callbacks.py
index 6573ed86d5..9085d675db 100644
--- a/frontend/catalyst/api_extensions/callbacks.py
+++ b/frontend/catalyst/api_extensions/callbacks.py
@@ -138,7 +138,7 @@ def f(x):
     return accelerate_impl(func, dev=dev)
 
 
-def pure_callback(callback_fn, result_type=None):
+def pure_callback(callback_fn=None, *, result_type=None):
     """Execute and return the results of a functionally pure Python
     function from within a qjit-compiled function.
 
@@ -261,6 +261,11 @@ def vjp(x, dy) -> (jax.ShapeDtypeStruct((2,), jnp.float64),):
         >>> f(jnp.array([0.1, 0.2]))
         Array([-0.01071923,  0.82698717], dtype=float64)
     """
+    # Just for convenience
+    if callback_fn is None:
+        kwargs = copy.copy(locals())
+        kwargs.pop("callback_fn")
+        return functools.partial(pure_callback, **kwargs)
 
     # Verify inputs
     if result_type is None:
diff --git a/frontend/catalyst/compiled_functions.py b/frontend/catalyst/compiled_functions.py
index 6cea61e91c..2b14b2d35e 100644
--- a/frontend/catalyst/compiled_functions.py
+++ b/frontend/catalyst/compiled_functions.py
@@ -92,36 +92,45 @@ def load_symbols(self):
             CFuncPtr: handle to the teardown function, which tears down the device
             CFuncPtr: handle to the memory transfer function for program results
         """
+        try:
 
-        setup = self.shared_object.setup
-        setup.argtypes = [ctypes.c_int, ctypes.POINTER(ctypes.c_char_p)]
-        setup.restypes = ctypes.c_int
+            setup = self.shared_object.setup
+            setup.argtypes = [ctypes.c_int, ctypes.POINTER(ctypes.c_char_p)]
+            setup.restypes = ctypes.c_int
 
-        teardown = self.shared_object.teardown
-        teardown.argtypes = None
-        teardown.restypes = None
+            teardown = self.shared_object.teardown
+            teardown.argtypes = None
+            teardown.restypes = None
 
-        # We are calling the c-interface
-        function = self.shared_object["_catalyst_pyface_" + self.func_name]
-        # Guaranteed from _mlir_ciface specification
-        function.restypes = None
-        # Not needed, computed from the arguments.
-        # function.argyptes
+        except:
+            setup = None
+            teardown = None
+        finally:
 
-        mem_transfer = self.shared_object["_mlir_memory_transfer"]
+            # We are calling the c-interface
+            function = self.shared_object["_catalyst_pyface_" + self.func_name]
+            # Guaranteed from _mlir_ciface specification
+            function.restypes = None
+            # Not needed, computed from the arguments.
+            # function.argyptes
 
-        return function, setup, teardown, mem_transfer
+            mem_transfer = self.shared_object["_mlir_memory_transfer"]
+
+            return function, setup, teardown, mem_transfer
 
     def __enter__(self):
-        params_to_setup = [b"jitted-function"]
-        argc = len(params_to_setup)
-        array_of_char_ptrs = (ctypes.c_char_p * len(params_to_setup))()
-        array_of_char_ptrs[:] = params_to_setup
-        self.setup(ctypes.c_int(argc), array_of_char_ptrs)
+        if self.setup:
+            params_to_setup = [b"jitted-function"]
+            argc = len(params_to_setup)
+            array_of_char_ptrs = (ctypes.c_char_p * len(params_to_setup))()
+            array_of_char_ptrs[:] = params_to_setup
+            self.setup(ctypes.c_int(argc), array_of_char_ptrs)
+            return self
         return self
 
     def __exit__(self, _type, _value, _traceback):
-        self.teardown()
+        if self.teardown:
+            self.teardown()
 
 
 class CompiledFunction:
@@ -329,10 +338,10 @@ def get_cmain(self, *args):
         return get_template(self.func_name, self.restype, *buffer)
 
     def __call__(self, *args, **kwargs):
-        static_argnums = self.compile_options.static_argnums
+        static_argnums = self.compile_options.static_argnums if self.compile_options else ()
         dynamic_args = filter_static_args(args, static_argnums)
 
-        if self.compile_options.abstracted_axes is not None:
+        if self.compile_options and self.compile_options.abstracted_axes is not None:
             abstracted_axes = self.compile_options.abstracted_axes
             dynamic_args = get_implicit_and_explicit_flat_args(
                 abstracted_axes, *dynamic_args, **kwargs
diff --git a/mlir/include/Quantum/Transforms/Passes.h b/mlir/include/Quantum/Transforms/Passes.h
index 00f33d8fa4..6d24549461 100644
--- a/mlir/include/Quantum/Transforms/Passes.h
+++ b/mlir/include/Quantum/Transforms/Passes.h
@@ -34,5 +34,6 @@ std::unique_ptr<mlir::Pass> createDisentangleCNOTPass();
 std::unique_ptr<mlir::Pass> createDisentangleSWAPPass();
 std::unique_ptr<mlir::Pass> createIonsDecompositionPass();
 std::unique_ptr<mlir::Pass> createLoopBoundaryOptimizationPass();
+std::unique_ptr<mlir::Pass> createRoutingPass();
 
 } // namespace catalyst
diff --git a/mlir/include/Quantum/Transforms/Passes.td b/mlir/include/Quantum/Transforms/Passes.td
index c120f5a501..171f420e05 100644
--- a/mlir/include/Quantum/Transforms/Passes.td
+++ b/mlir/include/Quantum/Transforms/Passes.td
@@ -125,6 +125,22 @@ def LoopBoundaryOptimizationPass : Pass<"loop-boundary"> {
 
     let constructor = "catalyst::createLoopBoundaryOptimizationPass()";
 }
+
+def RoutingPass : Pass<"route-circuit"> {
+    let summary = "Perform mapping and routing of quantum circuit to a hardware.";
+
+    let constructor = "catalyst::createRoutingPass()";
+
+  let options = [
+    Option<
+      /*C++ variable name=*/"hardwareGraph",
+      /*CLI argument=*/"hardware-graph",
+      /*type=*/"std::string",
+      /*default=*/"",
+      /*description=*/"Hardware graph represented by a list of edges connecting physical qubits."
+    >,
+  ];
+}
 // ----- Quantum circuit transformation passes end ----- //
 
 #endif // QUANTUM_PASSES
diff --git a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp
index 58ee131f45..e6aa7bd591 100644
--- a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp
+++ b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp
@@ -71,4 +71,5 @@ void catalyst::registerAllCatalystPasses()
     mlir::registerPass(catalyst::createScatterLoweringPass);
     mlir::registerPass(catalyst::createSplitMultipleTapesPass);
     mlir::registerPass(catalyst::createTestPass);
+    mlir::registerPass(catalyst::createRoutingPass);
 }
diff --git a/mlir/lib/Quantum/Transforms/CMakeLists.txt b/mlir/lib/Quantum/Transforms/CMakeLists.txt
index 3a244ac4d6..d4200000c8 100644
--- a/mlir/lib/Quantum/Transforms/CMakeLists.txt
+++ b/mlir/lib/Quantum/Transforms/CMakeLists.txt
@@ -20,6 +20,7 @@ file(GLOB SRC
     IonsDecompositionPatterns.cpp
     loop_boundary_optimization.cpp
     LoopBoundaryOptimizationPatterns.cpp
+    route_circuit.cpp
 )
 
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
diff --git a/mlir/lib/Quantum/Transforms/route_circuit.cpp b/mlir/lib/Quantum/Transforms/route_circuit.cpp
new file mode 100644
index 0000000000..56810cf3fa
--- /dev/null
+++ b/mlir/lib/Quantum/Transforms/route_circuit.cpp
@@ -0,0 +1,789 @@
+// Copyright 2025 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This algorithm is taken from https://arxiv.org/pdf/2012.07711, table 6 (Equivalences for
+// basis-states in SWAP gate)
+
+#define DEBUG_TYPE "routecircuit"
+
+#include <iostream>
+#include <random>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Pass/Pass.h"
+#include "stablehlo/dialect/StablehloOps.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Debug.h"
+
+#include "Catalyst/IR/CatalystDialect.h"
+#include "Quantum/IR/QuantumOps.h"
+
+using namespace mlir;
+using namespace catalyst;
+
+namespace catalyst {
+#define GEN_PASS_DEF_ROUTINGPASS
+#define GEN_PASS_DECL_ROUTINGPASS
+#include "Quantum/Transforms/Passes.h.inc"
+
+const int MAXIMUM = 1e9;
+
+struct RoutingPass : public impl::RoutingPassBase<RoutingPass> {
+    using impl::RoutingPassBase<RoutingPass>::RoutingPassBase;
+
+    int countLogicalQubit(Operation *op)
+    {
+        int numQubits = cast<quantum::AllocOp>(op).getNqubitsAttr().value_or(-1);
+        assert(numQubits != -1 && "PPM specs with dynamic number of qubits is not implemented");
+        return numQubits;
+    }
+
+    llvm::DenseMap<std::pair<int, int>, bool>
+    parseHardwareGraph(std::string s, std::string delimiter, std::set<int> *physicalQubits)
+    {
+        size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+        std::string token;
+        llvm::DenseMap<std::pair<int, int>, bool> res;
+
+        while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
+            token = s.substr(pos_start, pos_end - pos_start);
+            pos_start = pos_end + delim_len;
+
+            size_t commaPos = token.find(',');
+            int u = std::stoi(token.substr(1, commaPos));
+            int v = std::stoi(token.substr(commaPos + 1, token.size() - 2));
+            (*physicalQubits).insert(u);
+            (*physicalQubits).insert(v);
+            res[std::make_pair(u, v)] = true;
+            res[std::make_pair(v, u)] = true;
+        }
+        return res;
+    }
+
+    std::vector<int> generateRandomInitialMapping
+                                (std::set<int> *physicalQubits, 
+                                llvm::DenseMap<std::pair<int, int>, bool> &couplingMap)
+    {
+        std::vector<int> randomInitialMapping((*physicalQubits).begin(), (*physicalQubits).end());
+        // TODO: Generating completely random mapping is inefficient
+        // Replace this with some initial mapping algorithm like BFS or Simulated Annealing
+
+        // Random number generator
+        std::random_device rd;
+        std::mt19937 g(rd());
+        std::shuffle(randomInitialMapping.begin(), randomInitialMapping.end(), g);
+        int randomStart = randomInitialMapping[0];
+
+        std::queue<int> q;
+        std::set<int> visited;
+
+        q.push(randomStart);
+        visited.insert(randomStart);
+        randomInitialMapping.clear();
+        while (!q.empty()) {
+            int currentNode = q.front();
+            q.pop();
+            randomInitialMapping.push_back(currentNode);
+            for (const auto& pair : couplingMap) {
+                int neighbor = -1;
+                if (pair.first.first == currentNode && pair.second == true) {
+                    neighbor = pair.first.second;
+                } else if (pair.first.second == currentNode && pair.second == true) {
+                    neighbor = pair.first.first;
+                }
+
+                if (neighbor != -1 && visited.find(neighbor) == visited.end()) {
+                    q.push(neighbor);
+                    visited.insert(neighbor);
+                }
+            }
+        }
+        return randomInitialMapping;
+    }
+
+    quantum::ExtractOp getRegisterIndexOfOp(Value inQubit)
+    {
+        Operation *prevOp = inQubit.getDefiningOp();
+        if (isa<quantum::ExtractOp>(prevOp))
+            return (cast<quantum::ExtractOp>(prevOp));
+        else {
+            auto iteratePrevOpOutQubit = cast<quantum::CustomOp>(prevOp).getOutQubits();
+            auto iteratePrevOpInQubit = cast<quantum::CustomOp>(prevOp).getInQubits();
+            for (size_t iter = 0; iter < (iteratePrevOpOutQubit.size()); iter++) {
+                if (iteratePrevOpOutQubit[iter] == inQubit)
+                    return getRegisterIndexOfOp(iteratePrevOpInQubit[iter]);
+            }
+        }
+        return nullptr;
+    }
+
+    void getFrontLayer(std::set<quantum::CustomOp> *frontLayer) {
+        getOperation()->walk([&](Operation *op) {
+            if (isa<quantum::CustomOp>(op)) {
+                int nQubits = cast<quantum::CustomOp>(op).getInQubits().size();
+                auto inQubits = cast<quantum::CustomOp>(op).getInQubits();
+
+                if (nQubits == 2) {
+                    Operation *prevOp_0 = inQubits[0].getDefiningOp();
+                    Operation *prevOp_1 = inQubits[1].getDefiningOp();
+                    if (isa<quantum::ExtractOp>(prevOp_0) && isa<quantum::ExtractOp>(prevOp_1))
+                        (*frontLayer).insert(cast<quantum::CustomOp>(op));
+                }
+                else if (nQubits == 1) {
+                    Operation *prevOp_0 = inQubits[0].getDefiningOp();
+                    if (isa<quantum::ExtractOp>(prevOp_0))
+                        (*frontLayer).insert(cast<quantum::CustomOp>(op));
+                }
+            }
+        });
+        return;
+    }
+
+    void getFrontLayerReverse(std::set<quantum::CustomOp> *frontLayer) {
+        getOperation()->walk([&](Operation *op) {
+            if (isa<quantum::CustomOp>(op)) {
+                if (!( isa<quantum::CustomOp>(op->getNextNode()) ))
+                    (*frontLayer).insert(cast<quantum::CustomOp>(op));
+            }
+        });
+        return;
+    }
+
+    void preProcessing(
+        std::set<int> *physicalQubits, std::vector<int> *randomInitialMapping,
+        llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+        llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap, int *dagLogicalQubits,
+        llvm::DenseMap<std::pair<int, int>, bool> &couplingMap)
+    {
+        auto logicalQubitIndex = 0;
+        getOperation()->walk([&](Operation *op) {
+            if (isa<quantum::AllocOp>(op)) {
+                *dagLogicalQubits = countLogicalQubit(op);
+                *randomInitialMapping = generateRandomInitialMapping(physicalQubits, couplingMap);
+            }
+            else if (isa<quantum::ExtractOp>(op)) {
+                ExtractOpToQubitMap[cast<quantum::ExtractOp>(op)] = logicalQubitIndex;
+                logicalQubitIndex = logicalQubitIndex + 1;
+            }
+            else if (isa<quantum::CustomOp>(op)) {
+                auto inQubits = cast<quantum::CustomOp>(op).getInQubits();
+
+                for (auto inQubit : inQubits) {
+                    OpToExtractMap[cast<quantum::CustomOp>(op)].push_back(
+                        getRegisterIndexOfOp(inQubit));
+                }
+            }
+        });
+        return;
+    }
+
+    void distanceMatrices(std::set<int> *physicalQubits,
+                          llvm::DenseMap<std::pair<int, int>, int> &distanceMatrix,
+                          llvm::DenseMap<std::pair<int, int>, int> &predecessorMatrix,
+                          llvm::DenseMap<std::pair<int, int>, bool> &couplingMap)
+    {
+        // initial distances between non-connected physical qubits maximum
+        for (auto i_itr = (*physicalQubits).begin(); i_itr != (*physicalQubits).end(); i_itr++) {
+            for (auto j_itr = (*physicalQubits).begin(); j_itr != (*physicalQubits).end();
+                 j_itr++) {
+                distanceMatrix[std::make_pair(*i_itr, *j_itr)] = MAXIMUM;
+                predecessorMatrix[std::make_pair(*i_itr, *j_itr)] = -1;
+            }
+        }
+
+        // distance from self to self -> 0
+        for (auto i : (*physicalQubits)) {
+            predecessorMatrix[std::make_pair(i, i)] = i;
+            distanceMatrix[std::make_pair(i, i)] = 0;
+        }
+
+        // distance between physical qubits connected by edge => 1s
+        for (auto &entry : couplingMap) {
+            std::pair<int, int> &key = entry.first;
+            if (entry.second) {
+                distanceMatrix[std::make_pair(key.first, key.second)] = 1;
+                predecessorMatrix[std::make_pair(key.first, key.second)] = key.first;
+            }
+        }
+
+        // All-pair-shortest-path
+        for (auto i_itr = (*physicalQubits).begin(); i_itr != (*physicalQubits).end(); i_itr++) {
+            for (auto j_itr = (*physicalQubits).begin(); j_itr != (*physicalQubits).end();
+                 j_itr++) {
+                for (auto k_itr = (*physicalQubits).begin(); k_itr != (*physicalQubits).end();
+                     k_itr++) {
+                    if (distanceMatrix[std::make_pair(*j_itr, *i_itr)] +
+                            distanceMatrix[std::make_pair(*i_itr, *k_itr)] <
+                        distanceMatrix[std::make_pair(*j_itr, *k_itr)]) {
+                        distanceMatrix[std::make_pair(*j_itr, *k_itr)] =
+                            distanceMatrix[std::make_pair(*j_itr, *i_itr)] +
+                            distanceMatrix[std::make_pair(*i_itr, *k_itr)];
+                        predecessorMatrix[std::make_pair(*j_itr, *k_itr)] =
+                            predecessorMatrix[std::make_pair(*i_itr, *k_itr)];
+                    }
+                }
+            }
+        }
+        return;
+    }
+
+    std::vector<int> getShortestPath(int source, int target,
+                                     llvm::DenseMap<std::pair<int, int>, int> &predecessorMatrix)
+    {
+        std::vector<int> path;
+        if (predecessorMatrix[std::make_pair(source, target)] == -1 && source != target) {
+            return path;
+        }
+
+        int current = target;
+        while (current != source) {
+            path.push_back(current);
+            current = predecessorMatrix[std::make_pair(source, current)];
+            if (current == -1 && path.size() > 0) {
+                path.clear();
+                return path;
+            }
+        }
+        path.push_back(source);
+        std::reverse(path.begin(), path.end());
+        return path;
+    }
+
+    void getExecuteGateList(
+        std::set<quantum::CustomOp> *frontLayer, std::set<quantum::CustomOp> *executeGateList,
+        llvm::DenseMap<std::pair<int, int>, bool> &couplingMap,
+        std::vector<int> *randomInitialMapping,
+        llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+        llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap)
+    {
+        for (auto op : *frontLayer) {
+            int nQubits = op.getInQubits().size();
+            if (nQubits == 1)
+                (*executeGateList).insert(op);
+            else if (nQubits == 2) {
+                auto extractOps = OpToExtractMap[op];
+                int physical_Qubit_0 = (*randomInitialMapping)[ExtractOpToQubitMap[extractOps[0]]];
+                int physical_Qubit_1 = (*randomInitialMapping)[ExtractOpToQubitMap[extractOps[1]]];
+
+                std::pair<int, int> is_physical_Edge =
+                    std::make_pair(physical_Qubit_0, physical_Qubit_1);
+                if (couplingMap[is_physical_Edge])
+                    (*executeGateList).insert(op);
+            }
+        }
+        return;
+    }
+
+    void getExecuteGateListReverse(
+        std::set<quantum::CustomOp> *frontLayer, std::set<quantum::CustomOp> *executeGateList,
+        llvm::DenseMap<std::pair<int, int>, bool> &couplingMap,
+        std::vector<int> *randomInitialMapping,
+        llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+        llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap)
+    {
+        for (auto op : *frontLayer) {
+            int nQubits = op.getOutQubits().size();
+            if (nQubits == 1)
+                (*executeGateList).insert(op);
+            else if (nQubits == 2) {
+                auto extractOps = OpToExtractMap[op];
+                int physical_Qubit_0 = (*randomInitialMapping)[ExtractOpToQubitMap[extractOps[0]]];
+                int physical_Qubit_1 = (*randomInitialMapping)[ExtractOpToQubitMap[extractOps[1]]];
+
+                std::pair<int, int> is_physical_Edge =
+                    std::make_pair(physical_Qubit_0, physical_Qubit_1);
+                if (couplingMap[is_physical_Edge])
+                    (*executeGateList).insert(op);
+            }
+        }
+        return;
+    }
+
+    void
+    Heuristic(std::set<quantum::CustomOp> *frontLayer,
+              llvm::DenseMap<std::pair<int, int>, int> &swap_candidates,
+              std::vector<int> *randomInitialMapping,
+              llvm::DenseMap<std::pair<int, int>, int> &distanceMatrix,
+              llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+              llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap)
+    {
+
+        for (auto &entry : swap_candidates) {
+            std::pair<int, int> &swap_pair = entry.first;
+            std::vector<int> temp_mapping(*randomInitialMapping);
+
+            // update temp mapping
+            for (size_t temp_mapping_index = 0; temp_mapping_index < temp_mapping.size();
+                 temp_mapping_index++) {
+                if (temp_mapping[temp_mapping_index] == swap_pair.first)
+                    temp_mapping[temp_mapping_index] = swap_pair.second;
+                else if (temp_mapping[temp_mapping_index] == swap_pair.second)
+                    temp_mapping[temp_mapping_index] = swap_pair.first;
+            }
+            int temp_score = 0;
+            for (auto op : *frontLayer) {
+                auto extractOps = OpToExtractMap[op];
+                int physical_Qubit_0 = temp_mapping[ExtractOpToQubitMap[extractOps[0]]];
+                int physical_Qubit_1 = temp_mapping[ExtractOpToQubitMap[extractOps[1]]];
+                temp_score =
+                    temp_score + distanceMatrix[std::make_pair(physical_Qubit_0, physical_Qubit_1)];
+            }
+            swap_candidates[swap_pair] = std::min(swap_candidates[swap_pair], temp_score);
+        }
+    }
+    void sabreIterator(std::set<int> *physicalQubits,
+        std::set<quantum::CustomOp> *frontLayer, std::set<quantum::CustomOp> *executeGateList,
+        llvm::DenseMap<std::pair<int, int>, int> &distanceMatrix,
+        llvm::DenseMap<std::pair<int, int>, int> &predecessorMatrix,
+        llvm::DenseMap<std::pair<int, int>, bool> &couplingMap,
+        std::vector<int> *initialMapping,
+        int *dagLogicalQubits,
+        llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+        llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap,
+        std::vector<StringRef> *compiledGateNames,
+        std::vector<std::vector<int>> *compiledGateQubits,
+        std::vector<mlir::ValueRange> *compiledGateParams)
+    {
+        int search_steps = 0;
+        int max_iterations_without_progress = 10 * (*dagLogicalQubits);
+        while ((*frontLayer).size()) {
+            getExecuteGateList(frontLayer, executeGateList, couplingMap, initialMapping,
+                               OpToExtractMap, ExtractOpToQubitMap);
+            if ((*executeGateList).size()) {
+                for (auto op : *executeGateList) {
+                    (*compiledGateNames).push_back(op.getGateName());
+                    // this doesn't work as Params are SSA values
+                    // when original function is deleted, they are lost
+                    // Runtime segmentation fault occurs if parametric gates are used
+                    (*compiledGateParams).push_back(op.getParams());
+                    std::vector<int> currOpPhysicalQubits;
+                    for (auto currOpExtract : OpToExtractMap[op])
+                        currOpPhysicalQubits.push_back(
+                            (*initialMapping)[ExtractOpToQubitMap[currOpExtract]]);
+                    (*compiledGateQubits).push_back(currOpPhysicalQubits);
+
+                    // remove the executed op from front layer
+                    (*frontLayer).erase(op);
+                    // get successor of op
+                    auto outQubits = op.getOutQubits();
+                    for (auto outQubit : outQubits) {
+                        for (auto &use : outQubit.getUses()) {
+                            Operation *successorOp = use.getOwner();
+                            if (isa<quantum::CustomOp>(*successorOp))
+                                (*frontLayer).insert(cast<quantum::CustomOp>(*successorOp));
+                        }
+                    }
+                }
+                (*executeGateList).clear(); // clear execute gate list
+            }
+            else if (search_steps >= max_iterations_without_progress) {
+                search_steps = 0;
+                while ((*compiledGateNames).back() == "SWAP") {
+                    (*compiledGateNames).pop_back();
+                    (*compiledGateQubits).pop_back();
+                    (*compiledGateParams).pop_back();
+                }
+                auto greedyGate = *((*frontLayer).begin());
+                auto inExtract = OpToExtractMap[greedyGate];
+                int physical_Qubit_0 = (*initialMapping)[ExtractOpToQubitMap[inExtract[0]]];
+                int physical_Qubit_1 = (*initialMapping)[ExtractOpToQubitMap[inExtract[1]]];
+                std::vector<int> swapPath =
+                    getShortestPath(physical_Qubit_0, physical_Qubit_1, predecessorMatrix);
+                for (size_t i = 1; i < swapPath.size() - 1; i++) {
+                    int u = swapPath[i - 1];
+                    int v = swapPath[i];
+                    (*compiledGateNames).push_back("SWAP");
+                    (*compiledGateQubits).push_back({u, v});
+                    (*compiledGateParams).push_back(mlir::ValueRange());
+                    // update mapping
+                    for (size_t random_init_mapping_index = 0;
+                         random_init_mapping_index < (*initialMapping).size();
+                         random_init_mapping_index++) {
+                        if ((*initialMapping)[random_init_mapping_index] == u)
+                            (*initialMapping)[random_init_mapping_index] = v;
+                        else if ((*initialMapping)[random_init_mapping_index] == v)
+                            (*initialMapping)[random_init_mapping_index] = u;
+                    }
+                }
+            }
+            else {
+                llvm::DenseMap<std::pair<int, int>, int> swap_candidates;
+                for (auto op : *frontLayer) {
+                    for (auto logivalQubitExtractToBeRouted : OpToExtractMap[op]) {
+                        int firstPhysicalQubitToBeRouted = (*initialMapping)
+                            [ExtractOpToQubitMap[logivalQubitExtractToBeRouted]];
+                        for (auto secondPhysicalQubitToBeRouted : *physicalQubits)
+                            if (distanceMatrix[std::make_pair(firstPhysicalQubitToBeRouted,
+                                                              secondPhysicalQubitToBeRouted)] == 1)
+                                swap_candidates[std::make_pair(firstPhysicalQubitToBeRouted,
+                                                               secondPhysicalQubitToBeRouted)] =
+                                    MAXIMUM;
+                    }
+                }
+                Heuristic(frontLayer, swap_candidates, initialMapping, distanceMatrix,
+                          OpToExtractMap, ExtractOpToQubitMap);
+                int min_dist_swap = MAXIMUM;
+                std::pair<int, int> min_swap;
+                for (auto &entry : swap_candidates) {
+                    std::pair<int, int> &key = entry.first;
+                    if (entry.second < min_dist_swap) {
+                        min_swap = key;
+                        min_dist_swap = entry.second;
+                    }
+                }
+                // add the min SWAP
+                (*compiledGateNames).push_back("SWAP");
+                (*compiledGateQubits).push_back({min_swap.first, min_swap.second});
+                (*compiledGateParams).push_back(mlir::ValueRange());
+                // update mapping
+                for (size_t random_init_mapping_index = 0;
+                     random_init_mapping_index < (*initialMapping).size();
+                     random_init_mapping_index++) {
+                    if ((*initialMapping)[random_init_mapping_index] == min_swap.first)
+                        (*initialMapping)[random_init_mapping_index] = min_swap.second;
+                    else if ((*initialMapping)[random_init_mapping_index] == min_swap.second)
+                        (*initialMapping)[random_init_mapping_index] = min_swap.first;
+                }
+                search_steps++;
+            }
+        }
+        return;
+    }
+
+    void sabreIteratorReverse(std::set<int> *physicalQubits,
+    std::set<quantum::CustomOp> *frontLayer, std::set<quantum::CustomOp> *executeGateList,
+    llvm::DenseMap<std::pair<int, int>, int> &distanceMatrix,
+    llvm::DenseMap<std::pair<int, int>, int> &predecessorMatrix,
+    llvm::DenseMap<std::pair<int, int>, bool> &couplingMap,
+    std::vector<int> *initialMapping,
+    int *dagLogicalQubits,
+    llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> &OpToExtractMap,
+    llvm::DenseMap<quantum::ExtractOp, int> &ExtractOpToQubitMap,
+    std::vector<StringRef> *compiledGateNames,
+    std::vector<std::vector<int>> *compiledGateQubits,
+    std::vector<mlir::ValueRange> *compiledGateParams)
+    {
+        int search_steps = 0;
+        int max_iterations_without_progress = 10 * (*dagLogicalQubits);
+        while ((*frontLayer).size()) {
+            getExecuteGateListReverse(frontLayer, executeGateList, couplingMap, initialMapping,
+                               OpToExtractMap, ExtractOpToQubitMap);
+            if ((*executeGateList).size()) {
+                for (auto op : *executeGateList) {
+                    (*compiledGateNames).push_back(op.getGateName());
+                    // this doesn't work as Params are SSA values
+                    // when original function is deleted, they are lost
+                    // Runtime segmentation fault occurs if parametric gates are used
+                    (*compiledGateParams).push_back(op.getParams());
+                    std::vector<int> currOpPhysicalQubits;
+                    for (auto currOpExtract : OpToExtractMap[op])
+                        currOpPhysicalQubits.push_back(
+                            (*initialMapping)[ExtractOpToQubitMap[currOpExtract]]);
+                    (*compiledGateQubits).push_back(currOpPhysicalQubits);
+
+                    // remove the executed op from front layer
+                    (*frontLayer).erase(op);
+                    // get successor of op
+                    auto outQubits = op.getOutQubits();
+                    for (auto outQubit : outQubits) {
+                        for (auto &use : outQubit.getUses()) {
+                            Operation *successorOp = use.getOwner();
+                            if (isa<quantum::CustomOp>(*successorOp))
+                                (*frontLayer).insert(cast<quantum::CustomOp>(*successorOp));
+                        }
+                    }
+                }
+                (*executeGateList).clear(); // clear execute gate list
+            }
+            else if (search_steps >= max_iterations_without_progress) {
+                search_steps = 0;
+                while ((*compiledGateNames).back() == "SWAP") {
+                    (*compiledGateNames).pop_back();
+                    (*compiledGateQubits).pop_back();
+                    (*compiledGateParams).pop_back();
+                }
+                auto greedyGate = *((*frontLayer).begin());
+                auto inExtract = OpToExtractMap[greedyGate];
+                int physical_Qubit_0 = (*initialMapping)[ExtractOpToQubitMap[inExtract[0]]];
+                int physical_Qubit_1 = (*initialMapping)[ExtractOpToQubitMap[inExtract[1]]];
+                std::vector<int> swapPath =
+                    getShortestPath(physical_Qubit_0, physical_Qubit_1, predecessorMatrix);
+                for (size_t i = 1; i < swapPath.size() - 1; i++) {
+                    int u = swapPath[i - 1];
+                    int v = swapPath[i];
+                    (*compiledGateNames).push_back("SWAP");
+                    (*compiledGateQubits).push_back({u, v});
+                    (*compiledGateParams).push_back(mlir::ValueRange());
+                    // update mapping
+                    for (size_t random_init_mapping_index = 0;
+                         random_init_mapping_index < (*initialMapping).size();
+                         random_init_mapping_index++) {
+                        if ((*initialMapping)[random_init_mapping_index] == u)
+                            (*initialMapping)[random_init_mapping_index] = v;
+                        else if ((*initialMapping)[random_init_mapping_index] == v)
+                            (*initialMapping)[random_init_mapping_index] = u;
+                    }
+                }
+            }
+            else {
+                llvm::DenseMap<std::pair<int, int>, int> swap_candidates;
+                for (auto op : *frontLayer) {
+                    for (auto logivalQubitExtractToBeRouted : OpToExtractMap[op]) {
+                        int firstPhysicalQubitToBeRouted = (*initialMapping)
+                            [ExtractOpToQubitMap[logivalQubitExtractToBeRouted]];
+                        for (auto secondPhysicalQubitToBeRouted : *physicalQubits)
+                            if (distanceMatrix[std::make_pair(firstPhysicalQubitToBeRouted,
+                                                              secondPhysicalQubitToBeRouted)] == 1)
+                                swap_candidates[std::make_pair(firstPhysicalQubitToBeRouted,
+                                                               secondPhysicalQubitToBeRouted)] =
+                                    MAXIMUM;
+                    }
+                }
+                Heuristic(frontLayer, swap_candidates, initialMapping, distanceMatrix,
+                          OpToExtractMap, ExtractOpToQubitMap);
+                int min_dist_swap = MAXIMUM;
+                std::pair<int, int> min_swap;
+                for (auto &entry : swap_candidates) {
+                    std::pair<int, int> &key = entry.first;
+                    if (entry.second < min_dist_swap) {
+                        min_swap = key;
+                        min_dist_swap = entry.second;
+                    }
+                }
+                // add the min SWAP
+                (*compiledGateNames).push_back("SWAP");
+                (*compiledGateQubits).push_back({min_swap.first, min_swap.second});
+                (*compiledGateParams).push_back(mlir::ValueRange());
+                // update mapping
+                for (size_t random_init_mapping_index = 0;
+                     random_init_mapping_index < (*initialMapping).size();
+                     random_init_mapping_index++) {
+                    if ((*initialMapping)[random_init_mapping_index] == min_swap.first)
+                        (*initialMapping)[random_init_mapping_index] = min_swap.second;
+                    else if ((*initialMapping)[random_init_mapping_index] == min_swap.second)
+                        (*initialMapping)[random_init_mapping_index] = min_swap.first;
+                }
+                search_steps++;
+            }
+        }
+        return;
+    }
+
+    void runOnOperation() override
+    {
+
+        std::set<int> physicalQubits;
+        llvm::DenseMap<std::pair<int, int>, bool> couplingMap =
+            parseHardwareGraph(hardwareGraph, ";", &physicalQubits);
+        int dagLogicalQubits;
+        int numLogicalQubits = physicalQubits.size(); // works with automatic qubit management
+
+        // distance matrix
+        llvm::DenseMap<std::pair<int, int>, int> distanceMatrix;
+        llvm::DenseMap<std::pair<int, int>, int> predecessorMatrix;
+        distanceMatrices(&physicalQubits, distanceMatrix, predecessorMatrix, couplingMap);
+
+        std::vector<int> randomInitialMapping;
+        std::set<quantum::CustomOp> frontLayer;
+        llvm::DenseMap<quantum::CustomOp, std::vector<quantum::ExtractOp>> OpToExtractMap;
+        llvm::DenseMap<quantum::ExtractOp, int> ExtractOpToQubitMap;
+        preProcessing(&physicalQubits, &randomInitialMapping, OpToExtractMap,
+                      ExtractOpToQubitMap, &dagLogicalQubits, couplingMap);
+
+        // print init mapping
+        llvm::outs() << "Random Initial Mapping: \n";
+        for (size_t logical_qubit_index = 0; logical_qubit_index < randomInitialMapping.size();
+             logical_qubit_index++)
+            llvm::outs() << logical_qubit_index << "->" << randomInitialMapping[logical_qubit_index]
+                         << "\n";
+        
+        // reverse SABRE run
+        std::vector<StringRef> compiledGateNames;
+        std::vector<std::vector<int>> compiledGateQubits;
+        std::vector<mlir::ValueRange> compiledGateParams;
+        std::set<quantum::CustomOp> executeGateList;
+        getFrontLayerReverse(&frontLayer);
+        sabreIteratorReverse(&physicalQubits, &frontLayer, &executeGateList, 
+                               distanceMatrix, predecessorMatrix,
+                               couplingMap, &randomInitialMapping, &dagLogicalQubits,
+                               OpToExtractMap, ExtractOpToQubitMap, 
+                               &compiledGateNames, &compiledGateQubits, &compiledGateParams);
+        // forward SABRE run
+        compiledGateNames.clear();
+        compiledGateQubits.clear();
+        compiledGateParams.clear();
+        executeGateList.clear();
+        frontLayer.clear();
+        llvm::outs() << "Initial Mapping from first SABRE run: \n";
+        for (size_t logical_qubit_index = 0; logical_qubit_index < randomInitialMapping.size();
+             logical_qubit_index++)
+            llvm::outs() << logical_qubit_index << "->" << randomInitialMapping[logical_qubit_index]
+                         << "\n";
+        getFrontLayer(&frontLayer);
+        sabreIterator(&physicalQubits, &frontLayer, &executeGateList, 
+                               distanceMatrix, predecessorMatrix,
+                               couplingMap, &randomInitialMapping, &dagLogicalQubits,
+                               OpToExtractMap, ExtractOpToQubitMap, 
+                               &compiledGateNames, &compiledGateQubits, &compiledGateParams);
+
+        // insert gates into new MLIR
+        mlir::func::FuncOp func;
+        quantum::DeviceInitOp device;
+        getOperation()->walk([&](Operation *op) {
+            if (isa<quantum::DeviceInitOp>(op)) {
+                func = op->getParentOfType<func::FuncOp>();
+                device = cast<quantum::DeviceInitOp>(op);
+            }
+        });
+        mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
+        mlir::MLIRContext *context = &getContext();
+        mlir::OpBuilder builder(context);
+        builder.setInsertionPointToEnd(module.getBody());
+        mlir::FunctionType funcType = builder.getFunctionType(
+            /*inputs=*/{}, /*results=*/{});
+        mlir::func::FuncOp newFunc = builder.create<mlir::func::FuncOp>(
+            builder.getUnknownLoc(), func.getName().str(), funcType);
+
+        // insertion point at new function
+        newFunc.addEntryBlock();
+        builder.setInsertionPointToStart(&newFunc.getBody().front());
+
+        // insert device
+        mlir::Operation *newDeviceOp = builder.create<quantum::DeviceInitOp>(
+            builder.getUnknownLoc(), mlir::Value{0}, builder.getStringAttr(device.getLib()),
+            builder.getStringAttr(device.getDeviceName()),
+            builder.getStringAttr(device.getKwargs()));
+
+        // 3. Create the AllocOp and other operations for the new function's body.
+        builder.setInsertionPointAfter(newDeviceOp);
+        Type quregType = builder.getType<catalyst::quantum::QuregType>();
+        IntegerAttr numQubitsAttr = builder.getI64IntegerAttr(numLogicalQubits);
+        mlir::Operation *allocOp = builder.create<quantum::AllocOp>(
+            builder.getUnknownLoc(), quregType, mlir::Value{}, numQubitsAttr);
+
+        builder.setInsertionPointAfter(allocOp);
+
+        // insert ExtractOps
+        mlir::Operation *extractOp;
+        llvm::DenseMap<int, mlir::Value> qubitToValue;
+        for (int qubitIndex = 0; qubitIndex < numLogicalQubits; qubitIndex++) {
+            extractOp = builder.create<quantum::ExtractOp>(
+                builder.getUnknownLoc(), builder.getType<quantum::QubitType>(),
+                allocOp->getResult(0), nullptr, builder.getI64IntegerAttr(qubitIndex));
+            qubitToValue[qubitIndex] = extractOp->getResult(0);
+            builder.setInsertionPointAfter(extractOp);
+        }
+        // insert Gates
+        for (size_t gateIndex = 0; gateIndex < compiledGateNames.size(); gateIndex++) {
+            std::vector<int> mappedQubits = compiledGateQubits[gateIndex];
+            llvm::SmallVector<mlir::Type, 4> resultTypes;
+            if (mappedQubits.size() == 1) {
+                resultTypes.push_back(builder.getType<quantum::QubitType>());
+            }
+            else if (mappedQubits.size() == 2) {
+                resultTypes.push_back(builder.getType<quantum::QubitType>());
+                resultTypes.push_back(builder.getType<quantum::QubitType>());
+            }
+            llvm::SmallVector<mlir::Value, 2> values;
+            if (mappedQubits.size() == 1)
+                values = {qubitToValue[mappedQubits[0]]};
+            else
+                values = {qubitToValue[mappedQubits[0]], qubitToValue[mappedQubits[1]]};
+            mlir::ValueRange in_qubits_to_curr_op(values);
+
+            mlir::Operation *currOp =
+                builder.create<quantum::CustomOp>(builder.getUnknownLoc(),
+                                                  /*out_qubits=*/resultTypes,
+                                                  /*out_ctrl_qubits=*/mlir::TypeRange({}),
+                                                  /*params=*/compiledGateParams[gateIndex],
+                                                  /*in_qubits=*/in_qubits_to_curr_op,
+                                                  /*gate_name=*/compiledGateNames[gateIndex],
+                                                  /*adjoint=*/false,
+                                                  /*in_ctrl_qubits=*/mlir::ValueRange({}),
+                                                  /*in_ctrl_values=*/mlir::ValueRange());
+            builder.setInsertionPointAfter(currOp);
+            qubitToValue[mappedQubits[0]] = currOp->getResult(0);
+            if (mappedQubits.size() == 2)
+                qubitToValue[mappedQubits[1]] = currOp->getResult(1);
+        }
+
+        // Create compbasis observable from input qreg
+        llvm::SmallVector<mlir::Value> currStateValuesVector;
+        for (int qubitIndex = 0; qubitIndex < numLogicalQubits; qubitIndex++)
+            currStateValuesVector.push_back(qubitToValue[qubitIndex]);
+        mlir::ValueRange currStateValues(currStateValuesVector);
+
+        Type obsType = builder.getType<quantum::ObservableType>();
+        mlir::Operation *compBasisOp = builder.create<quantum::ComputationalBasisOp>(
+            builder.getUnknownLoc(), obsType, currStateValues, mlir::Value{});
+        // Get the size of the state vector
+        RankedTensorType constTensorType = RankedTensorType::get({}, builder.getI64Type());
+        DenseIntElementsAttr oneValue = DenseIntElementsAttr::get(constTensorType, APInt(64, 1));
+        mlir::Operation *constOneOp = builder.create<stablehlo::ConstantOp>(
+            builder.getUnknownLoc(), constTensorType, oneValue);
+
+        mlir::Operation *numQubitsOp =
+            builder.create<quantum::NumQubitsOp>(builder.getUnknownLoc(), builder.getI64Type());
+
+        mlir::Operation *fromElementsOp = builder.create<tensor::FromElementsOp>(
+            builder.getUnknownLoc(), RankedTensorType::get({}, builder.getI64Type()),
+            numQubitsOp->getResult(0));
+
+        mlir::Operation *shiftLeftOp = builder.create<stablehlo::ShiftLeftOp>(
+            builder.getUnknownLoc(), constTensorType, constOneOp->getResult(0),
+            fromElementsOp->getResult(0));
+
+        mlir::Operation *stateShapeOp = builder.create<tensor::ExtractOp>(
+            builder.getUnknownLoc(), builder.getI64Type(), shiftLeftOp->getResult(0), ValueRange{});
+
+        // Create quantum state
+        RankedTensorType stateType =
+            RankedTensorType::get({ShapedType::kDynamic}, ComplexType::get(builder.getF64Type()));
+        mlir::Operation *stateOp = builder.create<quantum::StateOp>(
+            builder.getUnknownLoc(), stateType, compBasisOp->getResult(0),
+            stateShapeOp->getResult(0), Value{});
+
+        // Use the builder to insert operations *after* allocOp.
+        builder.create<quantum::DeallocOp>(builder.getUnknownLoc(), allocOp->getResult(0));
+        builder.create<quantum::DeviceReleaseOp>(builder.getUnknownLoc());
+
+        // update return types
+        SmallVector<Type> newReturnTypes;
+        newReturnTypes.push_back(shiftLeftOp->getResult(0).getType());
+        newReturnTypes.push_back(stateOp->getResult(0).getType());
+        auto newFuncType = FunctionType::get(newFunc.getContext(),
+                                             newFunc.getFunctionType().getInputs(), newReturnTypes);
+        newFunc.setType(newFuncType);
+
+        SmallVector<Value> returnValues = {shiftLeftOp->getResult(0), stateOp->getResult(0)};
+        builder.create<mlir::func::ReturnOp>(builder.getUnknownLoc(), returnValues);
+
+        // replace original func with the routed func
+        func->replaceAllUsesWith(newFunc);
+        func->erase();
+    }
+};
+
+std::unique_ptr<Pass> createRoutingPass() { return std::make_unique<RoutingPass>(); }
+
+} // namespace catalyst
diff --git a/ritu.py b/ritu.py
new file mode 100644
index 0000000000..14030b97c8
--- /dev/null
+++ b/ritu.py
@@ -0,0 +1,105 @@
+import functools
+import os
+from copy import copy
+from pathlib import Path
+
+import jax
+import jax.numpy as jnp
+import numpy as np
+import pennylane as qml
+from jax.interpreters import mlir
+
+from catalyst import QJIT, CompileOptions, pure_callback
+from catalyst.compiled_functions import CompiledFunction
+from catalyst.compiler import LinkerDriver, _catalyst
+
+
+def jitting(qnode):
+    assert isinstance(qnode, qml.QNode)
+
+    @functools.wraps(qnode)
+    def tracing_function(*args, **kwargs):
+
+        # trace the code once to get the i/o signature
+        # to avoid overheads, we create the QJIT object manually and invoke specific stages
+        class Basic_QJIT:
+            def __init__(self, qnode):
+                self.__name__ = "jit_" + qnode.__name__
+                self.user_function = qnode
+                self.compile_options = CompileOptions()
+
+        jaxpr, _, treedef, _ = QJIT.capture(Basic_QJIT(qnode), args, **kwargs)
+
+        @pure_callback(result_type=treedef.unflatten(jaxpr.out_avals))
+        def runtime_function(*concrete_args):
+            # INSIDE HERE WE ARE RUNNING...
+
+            # TODO:
+            # * Better names
+
+            # use a closure to provide all arguments as static parameters
+            def closure():
+                return qnode.func(*concrete_args)
+
+            better_qnode = copy(qnode)
+            better_qnode.func = closure
+            jit_obj = Basic_QJIT(better_qnode)
+
+            # get the specialized jaxpr+mlir this time
+            #   (we could probably simplify further and just use `catalyst.qjit` directly)
+            jit_obj.jaxpr, _, _, _ = QJIT.capture(jit_obj, ())
+            mlir_module = QJIT.generate_ir(jit_obj)
+
+            # eliminate setup/teardown functions which will mess with the runtime
+            for _ in range(2):
+                mlir_module.body.operations[-1].erase()
+            print(mlir_module)
+
+            # get some properties of the entry point function
+            entry_point = mlir_module.body.operations[0]
+            entry_symbol = str(entry_point.name).replace('"', "")
+            entry_result_types = entry_point.type.results
+
+            dir_path = os.path.dirname(os.path.realpath(__file__)) + "/ritu_dir"
+            Path(dir_path).mkdir(parents=True, exist_ok=True)
+
+            inp = dir_path + "/input.mlir"
+            with open(inp, "w+") as file:
+                file.write(str(mlir_module))
+
+            out1 = _catalyst(
+                ("--tool=all"),
+                ("--workspace", str(dir_path)),
+                ("--keep-intermediate",),
+                ("--verbose",),
+                ("-o", dir_path + "/jit.so"),
+                (inp,),
+            )
+
+            shared_object = LinkerDriver.run(Path(dir_path + "/catalyst_module.o").absolute())
+            output_object_name = str(Path(shared_object).absolute())
+
+            with mlir.ir.Context(), mlir.ir.Location.unknown():
+                f64 = mlir.ir.F64Type.get()
+
+                compiled_function = CompiledFunction(
+                    output_object_name, entry_symbol, entry_result_types, None, None
+                )
+
+            return compiled_function()
+
+        return runtime_function(*args)
+
+    return tracing_function
+
+
+@qml.qjit
+@jitting
+@qml.qnode(qml.device("lightning.qubit", wires=2))
+def foo(w1, w2):
+    qml.Hadamard(w1)
+    qml.CNOT([w2[0], w2[1]])
+    return qml.probs()
+
+
+print(foo(0, np.array([0, 1])))