diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4b99028..267ec09f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,9 +18,11 @@ endif()
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR})
 
+
 option(COMPILE_DOCS "This is settable from the command line" OFF)
 option(ENABLE_DATABASE "Set this for the database connector to work" OFF)
 option(ENABLE_PYTHON "Set this for python connector to work" OFF)
+option(ENABLE_CUDA "Enable CUDA execution for some faster data structures" OFF)
 
 if (ENABLE_DATABASE)
   add_definitions(-D__FLEXFRINGE_DATABASE)
@@ -29,6 +31,17 @@ if (ENABLE_PYTHON)
   add_definitions(-D__FLEXFRINGE_PYTHON)
 endif()
 
+if (ENABLE_CUDA)
+  include(CheckLanguage)
+  check_language(CUDA)
+  if(CMAKE_CUDA_COMPILER)
+    add_definitions(-D__FLEXFRINGE_CUDA)
+    enable_language(CUDA)
+  else()
+    message(WARNING "Could not find CUDA on system. Proceeding without.")
+  endif()
+endif()
+
 #set(CMAKE_MESSAGE_LOG_LEVEL WARNING)
 
 # Default compiler flags:
@@ -191,6 +204,11 @@ if (ENABLE_DATABASE)
   target_link_libraries(flexfringe libpqxx::pqxx)
 endif()
 
+if(CMAKE_CUDA_COMPILER)
+  find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+  target_link_libraries(ActiveLearning ${CUDART_LIBRARY})
+endif()
+
 
 find_package(Threads)
 target_link_libraries(flexfringe ${CMAKE_THREAD_LIBS_INIT}) # For pthreads
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 9e1c9c71..7106892f 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -24,6 +24,7 @@ file(WRITE "gitversion.cpp" "const char *gitversion = \"${Gitversion}\";")
 ## create the evaluators.h file
 file(GLOB Files "${CMAKE_CURRENT_SOURCE_DIR}/evaluation/*.h")
 file(WRITE "evaluators.h" "#ifndef __ALL_HEADERS__ \n#define __ALL_HEADERS__ \n\n")
+
 foreach (Filepath ${Files})
     get_filename_component(Filename ${Filepath} NAME)
     file(APPEND "evaluators.h" "#include \"${Filename}\"\n")
@@ -140,7 +141,12 @@ file(APPEND "${cmakelists_eval}" "  \"../active_learning/memory/incomplete_infor
 file(APPEND "${cmakelists_eval}" "  \"../active_learning/system_under_learning\"\n")
 file(APPEND "${cmakelists_eval}" "  \"../active_learning/system_under_learning/neural_network_suls\"\n")
 file(APPEND "${cmakelists_eval}" "  \"../active_learning/system_under_learning/benchmark_parsers\"\n")
-file(APPEND "${cmakelists_eval}" ")\n\n\n")
+file(APPEND "${cmakelists_eval}" ")\n\n")
+
+# the cuda directives
+file(APPEND "${cmakelists_eval}" "if(CMAKE_CUDA_COMPILER)\n")
+file(APPEND "${cmakelists_eval}" "  include_directories(\$\{CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES\})\n")
+file(APPEND "${cmakelists_eval}" "endif()\n\n")
 
 #  "${CMAKE_CURRENT_SOURCE_DIR}/../active_learning//oracle/cex_search_strategies"
 #  "${CMAKE_CURRENT_SOURCE_DIR}/../active_learning//oracle/cex_conflict_search"
diff --git a/source/active_learning/CMakeLists.txt b/source/active_learning/CMakeLists.txt
index 8ae42ea3..650f7001 100644
--- a/source/active_learning/CMakeLists.txt
+++ b/source/active_learning/CMakeLists.txt
@@ -15,8 +15,14 @@ include_directories(
         "${CMAKE_CURRENT_SOURCE_DIR}/system_under_learning/benchmark_parsers"
         )
 
+if(CMAKE_CUDA_COMPILER)
+  include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+                     "${CMAKE_CURRENT_SOURCE_DIR}/active_learning_util/cuda"
+                     "${CMAKE_CURRENT_SOURCE_DIR}/memory/distinguishing_sequences/cuda"
+  )
+endif()
 
-add_library(ActiveLearning STATIC
+set(ACTIVE_LEARNING_FILES
             active_learning_mode.h
             active_learning_mode.cpp
 
@@ -188,9 +194,27 @@ add_library(ActiveLearning STATIC
 )
 
 
+if(CMAKE_CUDA_COMPILER)
+  set(CUDA_FILES
+      active_learning_util/cuda/cuda_common.cuh
+      active_learning_util/cuda/cuda_common.cu
+      memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh
+      memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu
+  )
+
+  set(ACTIVE_LEARNING_FILES 
+            ${ACTIVE_LEARNING_FILES} 
+            ${CUDA_FILES}
+  )
+
+  set_source_files_properties(${CUDA_FILES} PROPERTIES LANGUAGE CUDA)
+endif()
+
+add_library(ActiveLearning STATIC ${ACTIVE_LEARNING_FILES})
+
 if (ENABLE_PYTHON)
   target_link_libraries(ActiveLearning ${PYTHON_LIBRARIES})
 endif()
 if (ENABLE_DATABASE)
   target_link_libraries(ActiveLearning libpqxx::pqxx)
-endif()
+endif()
\ No newline at end of file
diff --git a/source/active_learning/active_learning_mode.cpp b/source/active_learning/active_learning_mode.cpp
index 4a1f523b..3cd97ae8 100644
--- a/source/active_learning/active_learning_mode.cpp
+++ b/source/active_learning/active_learning_mode.cpp
@@ -77,6 +77,9 @@ int active_learning_mode::run() {
     unique_ptr<algorithm_base> algorithm = algorithm_factory::create_algorithm_obj();
     algorithm->run(id);
 
+#ifdef __CUDA
+    cudaDeviceReset();
+#endif
         // Hielke: Can we we this one better? For example, we do it in the constructor of the corresponding algorithms
 /*         LOG_S(INFO) << "Learning (partly) passively. Therefore read in input-data.";
         get_inputdata();
diff --git a/source/active_learning/active_learning_util/common_functions.cpp b/source/active_learning/active_learning_util/common_functions.cpp
index 2eaa3af3..721e0a39 100644
--- a/source/active_learning/active_learning_util/common_functions.cpp
+++ b/source/active_learning/active_learning_util/common_functions.cpp
@@ -439,15 +439,7 @@ trace* active_learning_namespace::vector_to_trace(const vector<int>& vec, inputd
     throw runtime_error("We should not reach here. What happened?");
 }*/
 
-/**
- * @brief For debugging.
- */
-void active_learning_namespace::print_list(const list<int>& l) {
-    for (const auto s : l) cout << s << " ";
-    cout << endl;
-}
-
-void active_learning_namespace::print_vector(const vector<int>& l) {
+void active_learning_namespace::print_span(std::span<const int> l) {
     for (const auto s : l) cout << s << " ";
     cout << endl;
 }
diff --git a/source/active_learning/active_learning_util/common_functions.h b/source/active_learning/active_learning_util/common_functions.h
index f76b117b..1e21bc03 100644
--- a/source/active_learning/active_learning_util/common_functions.h
+++ b/source/active_learning/active_learning_util/common_functions.h
@@ -24,6 +24,7 @@
 
 #include <functional>
 #include <list>
+#include <span>
 #include <unordered_map>
 #include <utility>
 
@@ -98,9 +99,7 @@ namespace active_learning_namespace {
         std::cout << std::endl;
     }
 
-    [[maybe_unused]] void print_list(const std::list<int>& l);
-
-    void print_vector(const std::vector<int>& l);
+    void print_span(std::span<const int> l);
 } // namespace active_learning_namespace
 
 
diff --git a/source/active_learning/active_learning_util/cuda/cuda_common.cu b/source/active_learning/active_learning_util/cuda/cuda_common.cu
new file mode 100644
index 00000000..5d414e15
--- /dev/null
+++ b/source/active_learning/active_learning_util/cuda/cuda_common.cu
@@ -0,0 +1,29 @@
+/**
+ * @file cuda_common.cu
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2025-07-13
+ * 
+ * @copyright Copyright (c) 2025
+ * 
+ */
+
+#ifndef __FLEXFRINGE_CUDA
+#include<type_traits>
+static_assert(std::integral_constant<bool, false>::value, "cuda_common.cu included even though CUDA not enabled in project.");
+#endif
+
+#include "cuda_common.cuh"
+
+#include <iostream>
+
+void cuda_common::gpuAssert(cudaError_t code, const char *file, int line, bool abort)
+{
+  if (code != cudaSuccess) 
+  {
+      //fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+      std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line;
+      if (abort) exit(code);
+  }
+}
\ No newline at end of file
diff --git a/source/active_learning/active_learning_util/cuda/cuda_common.cuh b/source/active_learning/active_learning_util/cuda/cuda_common.cuh
new file mode 100644
index 00000000..3e35288f
--- /dev/null
+++ b/source/active_learning/active_learning_util/cuda/cuda_common.cuh
@@ -0,0 +1,26 @@
+/**
+ * @file cuda_common.cuh
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2025-07-12
+ * 
+ * @copyright Copyright (c) 2025
+ * 
+ */
+
+#ifndef __FLEXFRINGE_CUDA
+#include<type_traits>
+static_assert(std::integral_constant<bool, false>::value, "cuda_common.cuh included even though CUDA not enabled in project.");
+#endif
+
+#ifndef __CUDA_COMMON_CUH__
+#define __CUDA_COMMON_CUH__
+
+#include "cuda.h"
+
+namespace cuda_common {  
+  void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true);
+}
+
+#endif // __CUDA_COMMON_CUH__
\ No newline at end of file
diff --git a/source/active_learning/algorithms/paul.cpp b/source/active_learning/algorithms/paul.cpp
index 4869415b..2fffe59e 100644
--- a/source/active_learning/algorithms/paul.cpp
+++ b/source/active_learning/algorithms/paul.cpp
@@ -43,7 +43,10 @@ void paul_algorithm::update_node_data(apta_node* n, std::unique_ptr<apta>& aut)
         ds_handler->complete_node(n, aut);
     }
 
-    if(n_data->get_predictions().size() != ds_handler->size()){
+    //if(n->get_depth() >= 10)
+    //    return;
+
+    if(n_data->get_n_predictions() != ds_handler->size()){
         auto y_pred = ds_handler->predict_node_with_sul_layer_wise(*aut, n);
         n_data->set_predictions(std::move(y_pred));
     }
@@ -159,7 +162,8 @@ refinement* paul_algorithm::check_blue_node_for_merge_partner(apta_node* const b
                 //    continue;
                 //}
             }
-                
+
+            //if(ds_handler->get_score() > 0)
             ref->score = ds_handler->get_score(); // score computed in check_consistency() or distributions_consistent()
             if(ref->score > 0){
                 rs.insert(ref);
@@ -375,14 +379,14 @@ list<refinement*> paul_algorithm::find_hypothesis(list<refinement*>& previous_re
         } */
 
 //#ifndef NDEBUG
-        {
+/*         {
             static int c = 0;
             merger->print_dot("after_" + to_string(c++) + ".dot");
         
             if(c%10==0){
                 output_manager::print_current_automaton(merger.get(), "model.", to_string(c) + ".intermediate");
             }
-        }
+        } */
 
 //#endif
 
diff --git a/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu
new file mode 100644
index 00000000..ca13aa35
--- /dev/null
+++ b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu
@@ -0,0 +1,163 @@
+/**
+ * @file distinguishing_sequences_kernels.cu
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2025-07-13
+ * 
+ * @copyright Copyright (c) 2025
+ * 
+ */
+
+ #ifndef __FLEXFRINGE_CUDA
+#include<type_traits>
+static_assert(std::integral_constant<bool, false>::value, "distinguishing_sequences_gpu.cu included even though CUDA not enabled in project.");
+#endif
+
+#include "distinguishing_sequences_gpu.cuh"
+
+#include "cuda.h"
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include "cuda_common.cuh"
+
+#ifndef gpuErrcheck
+#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); }
+
+namespace {
+
+   /**
+    * @brief Sums the integers in an array up.
+    * Kernel not optimal, but appears good enough (bottleneck lies elsewhere now).
+    * 
+    * Example of how to use below in this file.
+    */
+  __global__ void sum_kernel(int* input, int* res, const int size){
+    int tid = threadIdx.x;
+    int gid = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if(gid>=size)
+      return;
+
+    // local data block pointer
+    for(int offset=1; offset <= blockDim.x/2; offset *= 2){
+      if(tid % (2 * offset) == 0){
+        input[gid] += input[gid + offset];
+      }
+
+      __syncthreads();
+    }
+
+    if(tid==0){
+      res[blockIdx.x] = input[gid];
+    }
+  }
+
+  // TODO: debug
+  __global__ void sum_kernel_unrolled(int* input, int* res, const int size){
+    int tid = threadIdx.x;
+
+    // local data pointer
+    int* i_data = input + blockDim.x * blockIdx.x;
+
+    if(blockDim.x == 1024 && tid < 512){
+      i_data[tid] += i_data[tid+512];
+    }
+    __syncthreads();
+
+    if(blockDim.x == 512 && tid < 256){
+      i_data[tid] += i_data[tid+256];
+    }
+    __syncthreads();
+
+    if(blockDim.x == 256 && tid < 128){
+      i_data[tid] += i_data[tid+128];
+    }
+    __syncthreads();
+
+    if(blockDim.x == 128 && tid < 64){
+      i_data[tid] += i_data[tid+64];
+    }
+    __syncthreads();
+
+    // a number lower than 32 would introduce warp divergence, but like this it is fine
+    if(tid < 32){\
+      // volatile here guarantees that memory load and store to global memory without any caches
+      volatile int* vsmem = i_data;
+
+      // here we unroll the loop
+      vsmem[tid] += vsmem[tid+32];
+      vsmem[tid] += vsmem[tid+16];
+      vsmem[tid] += vsmem[tid+8];
+      vsmem[tid] += vsmem[tid+4];
+      vsmem[tid] += vsmem[tid+2];
+      vsmem[tid] += vsmem[tid+1];
+    }
+
+    if(tid==0){
+      res[blockIdx.x] = i_data[0];
+    }
+  }
+
+    /**
+   * @brief "XORs" two vectors. Result set to one if entry in v1 and v2 same, else 0.
+   */
+  __global__ void xor_vectors_kernel(const int* v1, const int* v2, int* tmp, const int size){
+    int gid = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if(gid >= size)
+      return;
+
+    tmp[gid] = v1[gid] == v2[gid] ? 1 : 0;
+  }
+}
+
+  /**
+   * @brief Get the overlap in percent of two int-vectors of given size.
+   * Useful for e.g. computing the accuracy score in between two vectors.
+   * 
+   * ..._d indicates a data structure on device- (gpu-) memory. 
+   * _...h for host ("cpu-visible" memory)
+   * 
+   * TODO: inefficiency by allocating/recallocating memory
+   */
+  float distinguishing_sequences_gpu::get_overlap_gpu(const int* d1_d, const int* d2_d, const size_t size){
+    const int threads_per_block = 256;
+    const int n_blocks = max(1, static_cast<int>(size/threads_per_block));
+
+    dim3 block(threads_per_block);
+    dim3 grid(n_blocks);
+
+    int* res_arr_d;
+    const auto res_arr_byte_size = grid.x * sizeof(int);
+    gpuErrchk( cudaMalloc((void**) &res_arr_d, res_arr_byte_size) );
+    gpuErrchk( cudaMemset(res_arr_d, 0, res_arr_byte_size) );
+
+    int* tmp_arr_d;
+    gpuErrchk( cudaMalloc((void**) &tmp_arr_d, size * sizeof(int)) );
+
+    xor_vectors_kernel<<<grid, block>>>(d1_d, d2_d, tmp_arr_d, size);
+    gpuErrchk( cudaDeviceSynchronize() );
+
+    sum_kernel<<<grid, block>>>(tmp_arr_d, res_arr_d, size);
+    gpuErrchk( cudaDeviceSynchronize() );
+
+    int* res_arr_h;
+    res_arr_h = (int*) malloc(res_arr_byte_size);
+    cudaMemcpy(res_arr_h, res_arr_d, res_arr_byte_size, cudaMemcpyDeviceToHost);
+
+    int n_overlaps = 0;
+    for(int i = 0; i < grid.x; ++i){
+      n_overlaps += res_arr_h[i];
+    }
+
+    free(res_arr_h);
+    gpuErrchk( cudaFree(tmp_arr_d) );
+    gpuErrchk( cudaFree(res_arr_d) );
+
+    constexpr static float epsilon = 1e-6; // avoid division error when v1 or v2 only have -1 entries, or size of this is 0
+    return static_cast<float>(n_overlaps) / (static_cast<float>(size) + epsilon);
+}
+
+#endif // gpuErrcheck
\ No newline at end of file
diff --git a/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh
new file mode 100644
index 00000000..29ce4542
--- /dev/null
+++ b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh
@@ -0,0 +1,24 @@
+/**
+ * @file distinguishing_sequences_gpu.cuh
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2025-07-12
+ * 
+ * @copyright Copyright (c) 2025
+ * 
+ */
+
+#ifndef __FLEXFRINGE_CUDA
+#include<type_traits>
+static_assert(std::integral_constant<bool, false>::value, "distinguishing_sequences_gpu.cuh included even though CUDA not enabled in project.");
+#endif
+
+#ifndef __KERNELS_DISTINGUISHING_SEQUENCES_CUH__
+#define __KERNELS_DISTINGUISHING_SEQUENCES_CUH__
+
+namespace distinguishing_sequences_gpu {
+  float get_overlap_gpu(const int* d1_d, const int* d2_d, const size_t size);
+}
+
+#endif // __KERNELS_DISTINGUISHING_SEQUENCES_CUH__
\ No newline at end of file
diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h
index 7531448b..254cb114 100644
--- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h
+++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h
@@ -73,7 +73,15 @@ class distinguishing_sequences_handler_base {
 
   public:
     using layer_predictions_map = std::unordered_map< int, std::vector<int> >;
-    
+
+    #ifdef __FLEXFRINGE_CUDA
+      // an aggregate to structure values
+      struct device_vector {
+          std::unordered_map<int, int*> len_pred_map_d; // maps to device pointers
+          std::unordered_map<int, size_t > len_size_map; // maps to size
+      };
+    #endif
+
     distinguishing_sequences_handler_base(const std::shared_ptr<sul_base>& sul) : sul(sul){};
 
     distinguishing_sequences_handler_base(){
@@ -161,6 +169,17 @@ class distinguishing_sequences_handler_base {
       throw std::invalid_argument("This ii-handler does not implement distributions_consistent function");
     }
 
+    #ifdef __FLEXFRINGE_CUDA
+    /**
+     * @brief The same as the non-CUDA version, but it does work on a GPU.
+     */
+    virtual bool distributions_consistent_layer_wise(const device_vector& v1,
+                                                     const device_vector& v2,
+                                                     const std::optional<int> depth1_opt = std::nullopt,
+                                                     const std::optional<int> depth2_opt = std::nullopt) {
+      throw std::invalid_argument("This ii-handler does not implement distributions_consistent function");
+    }
+    #else
     /**
      * @brief A function determining whether the distributions as gained from predict_node_with_automaton
      * and predict_node_with_sul are consistent. Layer-wise enables different kinds of statistical tests such as 
@@ -174,6 +193,7 @@ class distinguishing_sequences_handler_base {
                                                      const std::optional<int> depth2_opt = std::nullopt) {
       throw std::invalid_argument("This ii-handler does not implement distributions_consistent function");
     }
+    #endif
 };
 
 /**
diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp
index 2bb4b9d0..68afd081 100644
--- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp
+++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp
@@ -19,6 +19,10 @@
 #include <optional>
 #include <ranges>
 
+#ifdef __FLEXFRINGE_CUDA
+#include "distinguishing_sequences_gpu.cuh"
+#endif
+
 using namespace std;
 
 /**
@@ -407,6 +411,50 @@ float distinguishing_sequences_handler_fast::compute_threshold(const optional<in
   return initial_threshold + min(max(0.0, sigmoid_term), 0.5);
 }
 
+
+#ifdef __FLEXFRINGE_CUDA
+/** 
+ * Does the evaluation on GPU rather than CPU.
+ */
+bool distinguishing_sequences_handler_fast::distributions_consistent_layer_wise(const device_vector& d1, const device_vector& d2, 
+                                                                                const optional<int> depth1_opt, const optional<int> depth2_opt) {
+                                                                                  
+  /* if(depth1_opt.has_value() && depth2_opt.has_value() && AL_ADJUST_THRESHOLD && (depth1_opt.value() >= 10 || depth2_opt.value() >= 10)){
+    last_overlap = -1; // indicating we skipped this step
+    return true;
+  } */
+  if(d1.len_size_map.size() != d2.len_size_map.size())
+    throw runtime_error("Distributions do not match in lengths");
+
+  float max_ratio = 0;
+  for(auto len_size_t : d1.len_size_map | std::views::keys){
+    const int len = static_cast<int>(len_size_t);
+    const auto threshold = compute_threshold(depth1_opt, depth2_opt);
+
+    if(!d2.len_size_map.contains(len))
+      throw runtime_error("Distributions captured different lengths, this should not have happened");
+
+    const auto n_preds = static_cast<int>(d1.len_size_map.at(len));
+    if(n_preds != static_cast<int>(d1.len_size_map.at(len)))
+      throw runtime_error("Distributions do not match in size in length " + to_string(len));
+
+    const auto v1_d = d1.len_pred_map_d.at(len);
+    const auto v2_d = d2.len_pred_map_d.at(len);
+
+    const auto ratio = 1.0f - distinguishing_sequences_gpu::get_overlap_gpu(v1_d, v2_d, n_preds);
+    if(ratio > threshold){
+      //cout << "\nsize: " << v1.size() << ", depth: " << depth <<  ", ratio: " << ratio << endl;
+      last_overlap = 0;
+      return false;
+    }
+    
+    max_ratio = max(max_ratio, ratio); // TODO: adjust the data types
+  }
+
+  last_overlap = 1-max_ratio;
+  return true;
+}
+#else
 /**
  * @brief Does what you think it does.
  * 
@@ -414,20 +462,25 @@ float distinguishing_sequences_handler_fast::compute_threshold(const optional<in
  */
 bool distinguishing_sequences_handler_fast::distributions_consistent_layer_wise(const layer_predictions_map& d1, const layer_predictions_map& d2, 
                                                                                 const optional<int> depth1_opt, const optional<int> depth2_opt) {
+                                                                                  
+  /* if(depth1_opt.has_value() && depth2_opt.has_value() && AL_ADJUST_THRESHOLD && (depth1_opt.value() >= 10 || depth2_opt.value() >= 10)){
+    last_overlap = -1; // indicating we skipped this step
+    return true;
+  } */
   if(d1.size() != d2.size())
     throw runtime_error("Distributions are unequal");
 
   float max_ratio = 0;
-  for(auto depth : d1 | std::views::keys){
+  for(auto len : d1 | std::views::keys){
     const auto threshold = compute_threshold(depth1_opt, depth2_opt);
 
-    if(!d2.contains(depth))
+    if(!d2.contains(len))
       throw runtime_error("Distributions captured different lengths, this should not have happened");
 
-    const auto& v1 = d1.at(depth);
-    const auto& v2 = d2.at(depth);
+    const auto& v1 = d1.at(len);
+    const auto& v2 = d2.at(len);
     if(v1.size() != v2.size())
-      throw runtime_error("Distributions do not match in size in length " + to_string(depth));
+      throw runtime_error("Distributions do not match in size in length " + to_string(len));
 
     const auto ratio = get_overlap(v1, v2);
     if(ratio > threshold){
@@ -443,5 +496,5 @@ bool distinguishing_sequences_handler_fast::distributions_consistent_layer_wise(
   //cout << "\nDisagreed: " << disagreed << " | agreed: " << agreed << "max ratio: " << max_ratio << endl;
   //cout << "\nmax ratio: " << max_ratio << endl;
   return true;
-
-} 
\ No newline at end of file
+}
+#endif
\ No newline at end of file
diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h
index f9d78923..92e9bb0c 100644
--- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h
+++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h
@@ -44,11 +44,19 @@ class distinguishing_sequences_handler_fast final : public distinguishing_sequen
 
     const int size() const override {return m_suffixes.size();}
 
+  #ifdef __FLEXFRINGE_CUDA 
+    bool distributions_consistent_layer_wise(const device_vector& v1,
+                                             const device_vector& v2,
+                                             const std::optional<int> depth1_opt = std::nullopt,
+                                             const std::optional<int> depth2_opt = std::nullopt)
+                                             override;
+  #else 
     bool distributions_consistent_layer_wise(const layer_predictions_map& v1, 
                                              const layer_predictions_map& v2,
                                              const std::optional<int> depth1_opt = std::nullopt,
                                              const std::optional<int> depth2_opt = std::nullopt) 
-                                             override;    
+                                             override;   
+  #endif
 
     float compute_threshold(const std::optional<int>& d1, const std::optional<int>& d2); 
 
diff --git a/source/active_learning/memory/observation_table.cpp b/source/active_learning/memory/observation_table.cpp
index bdd38162..8935dd52 100644
--- a/source/active_learning/memory/observation_table.cpp
+++ b/source/active_learning/memory/observation_table.cpp
@@ -361,7 +361,7 @@ void observation_table::print() const {
     /*   cout << "Upper table: " << endl;
       for(auto it = upper_table.cbegin(); it != upper_table.cend(); ++it){
         const auto& row_name = it->first;
-        print_vector(row_name);
+        print_span(row_name);
       }
 
       cout << "Lower table:" << endl;
@@ -372,11 +372,11 @@ void observation_table::print() const {
 
       cout << "Columns:" << endl;
       for(const auto col: all_columns){
-        print_vector(col);
+        print_span(col);
       }
 
       cout << "Rows to close:" << endl;
       for(const auto r: incomplete_rows){
-        print_vector(r);
+        print_span(r);
       } */
 }
diff --git a/source/active_learning/memory/observation_table_imat.cpp b/source/active_learning/memory/observation_table_imat.cpp
index 449cbff0..379e6bba 100644
--- a/source/active_learning/memory/observation_table_imat.cpp
+++ b/source/active_learning/memory/observation_table_imat.cpp
@@ -391,7 +391,7 @@ void observation_table_imat::print() const {
     cout << "Upper table rows: " << endl;
     for (auto it = upper_table.cbegin(); it != upper_table.cend(); ++it) {
         const auto& row_name = it->first;
-        print_vector(row_name);
+        print_span(row_name);
     }
 
     cout << "Upper table data: " << endl;
@@ -411,11 +411,11 @@ void observation_table_imat::print() const {
 
     cout << "Columns:" << endl;
     for (const auto col : all_columns) {
-        print_vector(col);
+        print_span(col);
     }
 
     cout << "Rows to close:" << endl;
     for (const auto r : incomplete_rows) {
-        print_vector(r);
+        print_span(r);
     }
 }
diff --git a/source/evaluation/CMakeLists.txt b/source/evaluation/CMakeLists.txt
index f1a9460a..48bd2fb1 100644
--- a/source/evaluation/CMakeLists.txt
+++ b/source/evaluation/CMakeLists.txt
@@ -8,6 +8,9 @@ include_directories(
   "../active_learning/system_under_learning/benchmark_parsers"
 )
 
+if(CMAKE_CUDA_COMPILER)
+  include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+endif()
 
 add_library(Evaluation STATIC
             aic.h
diff --git a/source/evaluation/paul_heuristic.cpp b/source/evaluation/paul_heuristic.cpp
index 3176f880..ee9003e5 100755
--- a/source/evaluation/paul_heuristic.cpp
+++ b/source/evaluation/paul_heuristic.cpp
@@ -12,13 +12,14 @@
 #include <cmath>
 #include <iostream>
 
+#ifdef __FLEXFRINGE_CUDA
+#include "source/active_learning/active_learning_util/cuda/cuda_common.cuh"
+#include <ranges>
+#endif
+
 REGISTER_DEF_TYPE(paul_heuristic);
 REGISTER_DEF_DATATYPE(paul_data);
 
-// template void paul_data::set_predictions(std::vector<int>& predictions);
-// template void paul_data::set_predictions(std::vector<int>&& predictions);
-template void paul_data::set_predictions(layer_predictions_map&& predictions);
-
 void paul_data::print_state_label(std::iostream& output){
     count_data::print_state_label(output);
 };
@@ -59,6 +60,48 @@ void paul_data::update(evaluation_data* right){
     }
 }
 
+/**
+ * @brief Overwrites the current set of predictions with the ones handed to this method.
+ */
+void paul_data::set_predictions(layer_predictions_map&& predictions){
+#ifdef __FLEXFRINGE_CUDA
+
+#ifndef gpuErrcheck
+#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); }
+
+    std::unordered_map< int, int* > preds_d(predictions.size());
+    std::unordered_map< int, size_t > sizes(predictions.size());
+
+    for(const auto& [len, preds]: predictions){
+        sizes[len] = preds.size();
+        
+        int* target_field_d;
+        const auto byte_size = preds.size() * sizeof(int);
+        gpuErrchk(cudaMalloc((void**) &target_field_d, byte_size));
+        gpuErrchk(cudaMemcpy(target_field_d, preds.data(), byte_size, cudaMemcpyHostToDevice));
+        preds_d[len] = target_field_d;
+    }
+    
+    this->predictions.len_pred_map_d = std::move(preds_d);
+    this->predictions.len_size_map = std::move(sizes);
+#endif // gpuErrcheck
+
+#else
+    this->predictions = move(predictions);
+#endif // __FLEXFRINGE_CUDA
+}
+
+#ifdef __FLEXFRINGE_CUDA
+paul_data::~paul_data(){
+#ifndef gpuErrcheck
+#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); }
+
+    for(int* preds_d: this->predictions.len_pred_map_d | std::ranges::views::values){
+       gpuErrchk(cudaFree(preds_d));
+    }
+}
+#endif // gpuErrcheck
+#endif // __FLEXFRINGE_CUDA
 /**
  * @brief Does what you think it does.
  */
diff --git a/source/evaluation/paul_heuristic.h b/source/evaluation/paul_heuristic.h
index 79d95c29..b840b250 100755
--- a/source/evaluation/paul_heuristic.h
+++ b/source/evaluation/paul_heuristic.h
@@ -15,8 +15,16 @@
 #include "count_types.h"
 #include "source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h"
 
-#include <map> // TODO: for debugging only
+#ifdef __FLEXFRINGE_CUDA
+#include "cuda.h"
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+#endif
+
+//#include <map> // TODO: for debugging only
+#include <unordered_map>
 #include <memory>
+#include <ranges>
 
 /* The data contained in every node of the prefix tree or DFA */
 class paul_data: public count_data {
@@ -32,7 +40,13 @@ class paul_data: public count_data {
   int inferred_total_final;
 
   //std::vector<int> predictions;
+#ifdef __FLEXFRINGE_CUDA
+  using device_vector = distinguishing_sequences_handler_base::device_vector; // mapping to GPU memory instead
+  device_vector predictions;
+  ~paul_data() override;
+#else
   layer_predictions_map predictions;
+#endif 
 
   // TODO: delete function
   inline float map_confidence(const float c){
@@ -56,11 +70,11 @@ class paul_data: public count_data {
 
   void print_state_label(std::iostream& output) override;
 
-  void set_confidence(const float confidence) noexcept;
   void add_inferred_type(const int t) noexcept;
 
   void add_tail(tail* t) override;
 
+  void set_confidence(const float confidence) noexcept;
   float get_confidence() const noexcept { return lm_confidence; };
 
   inline bool has_type() const noexcept { return final_counts.size() > 0; }
@@ -72,9 +86,38 @@ class paul_data: public count_data {
   void undo(evaluation_data* right) override;
 
   const auto& get_predictions() const noexcept {return predictions;}
+
+  /**
+   * @brief Gets the number of predictions made at length len.
+   */
+  const auto get_n_predictions(const int len) const noexcept {
+  #ifdef __FLEXFRINGE_CUDA
+    return this->predictions.len_size_map.at(len);  
+  #else
+    return this->predictions[len].size();
+  #endif
+  }
+
+    /**
+   * @brief Gets the totoal number of predictions.
+   */
+  const auto get_n_predictions() const noexcept {
+    int res = 0;
+  
+#ifdef __FLEXFRINGE_CUDA
+    for(const size_t size: this->predictions.len_size_map | std::ranges::views::values){
+      res += static_cast<int>(size);
+    }
+#else
+    for(const auto& preds: this->predictions | std::ranges::views::values){
+      res += preds.size();
+    }
+#endif
+
+  return res;
+  }
   
-  template<typename T>
-  void set_predictions(T&& predictions){this->predictions = std::forward<T>(predictions);}
+  void set_predictions(layer_predictions_map&& predictions);
 };
 
 class paul_heuristic : public count_driven {