diff --git a/CMakeLists.txt b/CMakeLists.txt index d4b99028..267ec09f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,9 +18,11 @@ endif() set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}) + option(COMPILE_DOCS "This is settable from the command line" OFF) option(ENABLE_DATABASE "Set this for the database connector to work" OFF) option(ENABLE_PYTHON "Set this for python connector to work" OFF) +option(ENABLE_CUDA "Enable CUDA execution for some faster data structures" OFF) if (ENABLE_DATABASE) add_definitions(-D__FLEXFRINGE_DATABASE) @@ -29,6 +31,17 @@ if (ENABLE_PYTHON) add_definitions(-D__FLEXFRINGE_PYTHON) endif() +if (ENABLE_CUDA) + include(CheckLanguage) + check_language(CUDA) + if(CMAKE_CUDA_COMPILER) + add_definitions(-D__FLEXFRINGE_CUDA) + enable_language(CUDA) + else() + message(WARNING "Could not find CUDA on system. Proceeding without.") + endif() +endif() + #set(CMAKE_MESSAGE_LOG_LEVEL WARNING) # Default compiler flags: @@ -191,6 +204,11 @@ if (ENABLE_DATABASE) target_link_libraries(flexfringe libpqxx::pqxx) endif() +if(CMAKE_CUDA_COMPILER) + find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + target_link_libraries(ActiveLearning ${CUDART_LIBRARY}) +endif() + find_package(Threads) target_link_libraries(flexfringe ${CMAKE_THREAD_LIBS_INIT}) # For pthreads diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 9e1c9c71..7106892f 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -24,6 +24,7 @@ file(WRITE "gitversion.cpp" "const char *gitversion = \"${Gitversion}\";") ## create the evaluators.h file file(GLOB Files "${CMAKE_CURRENT_SOURCE_DIR}/evaluation/*.h") file(WRITE "evaluators.h" "#ifndef __ALL_HEADERS__ \n#define __ALL_HEADERS__ \n\n") + foreach (Filepath ${Files}) get_filename_component(Filename ${Filepath} NAME) file(APPEND "evaluators.h" "#include \"${Filename}\"\n") @@ -140,7 +141,12 @@ file(APPEND "${cmakelists_eval}" " \"../active_learning/memory/incomplete_infor file(APPEND "${cmakelists_eval}" " \"../active_learning/system_under_learning\"\n") file(APPEND "${cmakelists_eval}" " \"../active_learning/system_under_learning/neural_network_suls\"\n") file(APPEND "${cmakelists_eval}" " \"../active_learning/system_under_learning/benchmark_parsers\"\n") -file(APPEND "${cmakelists_eval}" ")\n\n\n") +file(APPEND "${cmakelists_eval}" ")\n\n") + +# the cuda directives +file(APPEND "${cmakelists_eval}" "if(CMAKE_CUDA_COMPILER)\n") +file(APPEND "${cmakelists_eval}" " include_directories(\$\{CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES\})\n") +file(APPEND "${cmakelists_eval}" "endif()\n\n") # "${CMAKE_CURRENT_SOURCE_DIR}/../active_learning//oracle/cex_search_strategies" # "${CMAKE_CURRENT_SOURCE_DIR}/../active_learning//oracle/cex_conflict_search" diff --git a/source/active_learning/CMakeLists.txt b/source/active_learning/CMakeLists.txt index 8ae42ea3..650f7001 100644 --- a/source/active_learning/CMakeLists.txt +++ b/source/active_learning/CMakeLists.txt @@ -15,8 +15,14 @@ include_directories( "${CMAKE_CURRENT_SOURCE_DIR}/system_under_learning/benchmark_parsers" ) +if(CMAKE_CUDA_COMPILER) + include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + "${CMAKE_CURRENT_SOURCE_DIR}/active_learning_util/cuda" + "${CMAKE_CURRENT_SOURCE_DIR}/memory/distinguishing_sequences/cuda" + ) +endif() -add_library(ActiveLearning STATIC +set(ACTIVE_LEARNING_FILES active_learning_mode.h active_learning_mode.cpp @@ -188,9 +194,27 @@ add_library(ActiveLearning STATIC ) +if(CMAKE_CUDA_COMPILER) + set(CUDA_FILES + active_learning_util/cuda/cuda_common.cuh + active_learning_util/cuda/cuda_common.cu + memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh + memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu + ) + + set(ACTIVE_LEARNING_FILES + ${ACTIVE_LEARNING_FILES} + ${CUDA_FILES} + ) + + set_source_files_properties(${CUDA_FILES} PROPERTIES LANGUAGE CUDA) +endif() + +add_library(ActiveLearning STATIC ${ACTIVE_LEARNING_FILES}) + if (ENABLE_PYTHON) target_link_libraries(ActiveLearning ${PYTHON_LIBRARIES}) endif() if (ENABLE_DATABASE) target_link_libraries(ActiveLearning libpqxx::pqxx) -endif() +endif() \ No newline at end of file diff --git a/source/active_learning/active_learning_mode.cpp b/source/active_learning/active_learning_mode.cpp index 4a1f523b..3cd97ae8 100644 --- a/source/active_learning/active_learning_mode.cpp +++ b/source/active_learning/active_learning_mode.cpp @@ -77,6 +77,9 @@ int active_learning_mode::run() { unique_ptr algorithm = algorithm_factory::create_algorithm_obj(); algorithm->run(id); +#ifdef __CUDA + cudaDeviceReset(); +#endif // Hielke: Can we we this one better? For example, we do it in the constructor of the corresponding algorithms /* LOG_S(INFO) << "Learning (partly) passively. Therefore read in input-data."; get_inputdata(); diff --git a/source/active_learning/active_learning_util/common_functions.cpp b/source/active_learning/active_learning_util/common_functions.cpp index 2eaa3af3..721e0a39 100644 --- a/source/active_learning/active_learning_util/common_functions.cpp +++ b/source/active_learning/active_learning_util/common_functions.cpp @@ -439,15 +439,7 @@ trace* active_learning_namespace::vector_to_trace(const vector& vec, inputd throw runtime_error("We should not reach here. What happened?"); }*/ -/** - * @brief For debugging. - */ -void active_learning_namespace::print_list(const list& l) { - for (const auto s : l) cout << s << " "; - cout << endl; -} - -void active_learning_namespace::print_vector(const vector& l) { +void active_learning_namespace::print_span(std::span l) { for (const auto s : l) cout << s << " "; cout << endl; } diff --git a/source/active_learning/active_learning_util/common_functions.h b/source/active_learning/active_learning_util/common_functions.h index f76b117b..1e21bc03 100644 --- a/source/active_learning/active_learning_util/common_functions.h +++ b/source/active_learning/active_learning_util/common_functions.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -98,9 +99,7 @@ namespace active_learning_namespace { std::cout << std::endl; } - [[maybe_unused]] void print_list(const std::list& l); - - void print_vector(const std::vector& l); + void print_span(std::span l); } // namespace active_learning_namespace diff --git a/source/active_learning/active_learning_util/cuda/cuda_common.cu b/source/active_learning/active_learning_util/cuda/cuda_common.cu new file mode 100644 index 00000000..5d414e15 --- /dev/null +++ b/source/active_learning/active_learning_util/cuda/cuda_common.cu @@ -0,0 +1,29 @@ +/** + * @file cuda_common.cu + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-07-13 + * + * @copyright Copyright (c) 2025 + * + */ + +#ifndef __FLEXFRINGE_CUDA +#include +static_assert(std::integral_constant::value, "cuda_common.cu included even though CUDA not enabled in project."); +#endif + +#include "cuda_common.cuh" + +#include + +void cuda_common::gpuAssert(cudaError_t code, const char *file, int line, bool abort) +{ + if (code != cudaSuccess) + { + //fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line; + if (abort) exit(code); + } +} \ No newline at end of file diff --git a/source/active_learning/active_learning_util/cuda/cuda_common.cuh b/source/active_learning/active_learning_util/cuda/cuda_common.cuh new file mode 100644 index 00000000..3e35288f --- /dev/null +++ b/source/active_learning/active_learning_util/cuda/cuda_common.cuh @@ -0,0 +1,26 @@ +/** + * @file cuda_common.cuh + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-07-12 + * + * @copyright Copyright (c) 2025 + * + */ + +#ifndef __FLEXFRINGE_CUDA +#include +static_assert(std::integral_constant::value, "cuda_common.cuh included even though CUDA not enabled in project."); +#endif + +#ifndef __CUDA_COMMON_CUH__ +#define __CUDA_COMMON_CUH__ + +#include "cuda.h" + +namespace cuda_common { + void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true); +} + +#endif // __CUDA_COMMON_CUH__ \ No newline at end of file diff --git a/source/active_learning/algorithms/paul.cpp b/source/active_learning/algorithms/paul.cpp index 4869415b..2fffe59e 100644 --- a/source/active_learning/algorithms/paul.cpp +++ b/source/active_learning/algorithms/paul.cpp @@ -43,7 +43,10 @@ void paul_algorithm::update_node_data(apta_node* n, std::unique_ptr& aut) ds_handler->complete_node(n, aut); } - if(n_data->get_predictions().size() != ds_handler->size()){ + //if(n->get_depth() >= 10) + // return; + + if(n_data->get_n_predictions() != ds_handler->size()){ auto y_pred = ds_handler->predict_node_with_sul_layer_wise(*aut, n); n_data->set_predictions(std::move(y_pred)); } @@ -159,7 +162,8 @@ refinement* paul_algorithm::check_blue_node_for_merge_partner(apta_node* const b // continue; //} } - + + //if(ds_handler->get_score() > 0) ref->score = ds_handler->get_score(); // score computed in check_consistency() or distributions_consistent() if(ref->score > 0){ rs.insert(ref); @@ -375,14 +379,14 @@ list paul_algorithm::find_hypothesis(list& previous_re } */ //#ifndef NDEBUG - { +/* { static int c = 0; merger->print_dot("after_" + to_string(c++) + ".dot"); if(c%10==0){ output_manager::print_current_automaton(merger.get(), "model.", to_string(c) + ".intermediate"); } - } + } */ //#endif diff --git a/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu new file mode 100644 index 00000000..ca13aa35 --- /dev/null +++ b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cu @@ -0,0 +1,163 @@ +/** + * @file distinguishing_sequences_kernels.cu + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-07-13 + * + * @copyright Copyright (c) 2025 + * + */ + + #ifndef __FLEXFRINGE_CUDA +#include +static_assert(std::integral_constant::value, "distinguishing_sequences_gpu.cu included even though CUDA not enabled in project."); +#endif + +#include "distinguishing_sequences_gpu.cuh" + +#include "cuda.h" +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include "cuda_common.cuh" + +#ifndef gpuErrcheck +#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); } + +namespace { + + /** + * @brief Sums the integers in an array up. + * Kernel not optimal, but appears good enough (bottleneck lies elsewhere now). + * + * Example of how to use below in this file. + */ + __global__ void sum_kernel(int* input, int* res, const int size){ + int tid = threadIdx.x; + int gid = blockDim.x * blockIdx.x + threadIdx.x; + + if(gid>=size) + return; + + // local data block pointer + for(int offset=1; offset <= blockDim.x/2; offset *= 2){ + if(tid % (2 * offset) == 0){ + input[gid] += input[gid + offset]; + } + + __syncthreads(); + } + + if(tid==0){ + res[blockIdx.x] = input[gid]; + } + } + + // TODO: debug + __global__ void sum_kernel_unrolled(int* input, int* res, const int size){ + int tid = threadIdx.x; + + // local data pointer + int* i_data = input + blockDim.x * blockIdx.x; + + if(blockDim.x == 1024 && tid < 512){ + i_data[tid] += i_data[tid+512]; + } + __syncthreads(); + + if(blockDim.x == 512 && tid < 256){ + i_data[tid] += i_data[tid+256]; + } + __syncthreads(); + + if(blockDim.x == 256 && tid < 128){ + i_data[tid] += i_data[tid+128]; + } + __syncthreads(); + + if(blockDim.x == 128 && tid < 64){ + i_data[tid] += i_data[tid+64]; + } + __syncthreads(); + + // a number lower than 32 would introduce warp divergence, but like this it is fine + if(tid < 32){\ + // volatile here guarantees that memory load and store to global memory without any caches + volatile int* vsmem = i_data; + + // here we unroll the loop + vsmem[tid] += vsmem[tid+32]; + vsmem[tid] += vsmem[tid+16]; + vsmem[tid] += vsmem[tid+8]; + vsmem[tid] += vsmem[tid+4]; + vsmem[tid] += vsmem[tid+2]; + vsmem[tid] += vsmem[tid+1]; + } + + if(tid==0){ + res[blockIdx.x] = i_data[0]; + } + } + + /** + * @brief "XORs" two vectors. Result set to one if entry in v1 and v2 same, else 0. + */ + __global__ void xor_vectors_kernel(const int* v1, const int* v2, int* tmp, const int size){ + int gid = blockDim.x * blockIdx.x + threadIdx.x; + + if(gid >= size) + return; + + tmp[gid] = v1[gid] == v2[gid] ? 1 : 0; + } +} + + /** + * @brief Get the overlap in percent of two int-vectors of given size. + * Useful for e.g. computing the accuracy score in between two vectors. + * + * ..._d indicates a data structure on device- (gpu-) memory. + * _...h for host ("cpu-visible" memory) + * + * TODO: inefficiency by allocating/recallocating memory + */ + float distinguishing_sequences_gpu::get_overlap_gpu(const int* d1_d, const int* d2_d, const size_t size){ + const int threads_per_block = 256; + const int n_blocks = max(1, static_cast(size/threads_per_block)); + + dim3 block(threads_per_block); + dim3 grid(n_blocks); + + int* res_arr_d; + const auto res_arr_byte_size = grid.x * sizeof(int); + gpuErrchk( cudaMalloc((void**) &res_arr_d, res_arr_byte_size) ); + gpuErrchk( cudaMemset(res_arr_d, 0, res_arr_byte_size) ); + + int* tmp_arr_d; + gpuErrchk( cudaMalloc((void**) &tmp_arr_d, size * sizeof(int)) ); + + xor_vectors_kernel<<>>(d1_d, d2_d, tmp_arr_d, size); + gpuErrchk( cudaDeviceSynchronize() ); + + sum_kernel<<>>(tmp_arr_d, res_arr_d, size); + gpuErrchk( cudaDeviceSynchronize() ); + + int* res_arr_h; + res_arr_h = (int*) malloc(res_arr_byte_size); + cudaMemcpy(res_arr_h, res_arr_d, res_arr_byte_size, cudaMemcpyDeviceToHost); + + int n_overlaps = 0; + for(int i = 0; i < grid.x; ++i){ + n_overlaps += res_arr_h[i]; + } + + free(res_arr_h); + gpuErrchk( cudaFree(tmp_arr_d) ); + gpuErrchk( cudaFree(res_arr_d) ); + + constexpr static float epsilon = 1e-6; // avoid division error when v1 or v2 only have -1 entries, or size of this is 0 + return static_cast(n_overlaps) / (static_cast(size) + epsilon); +} + +#endif // gpuErrcheck \ No newline at end of file diff --git a/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh new file mode 100644 index 00000000..29ce4542 --- /dev/null +++ b/source/active_learning/memory/distinguishing_sequences/cuda/distinguishing_sequences_gpu.cuh @@ -0,0 +1,24 @@ +/** + * @file distinguishing_sequences_gpu.cuh + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-07-12 + * + * @copyright Copyright (c) 2025 + * + */ + +#ifndef __FLEXFRINGE_CUDA +#include +static_assert(std::integral_constant::value, "distinguishing_sequences_gpu.cuh included even though CUDA not enabled in project."); +#endif + +#ifndef __KERNELS_DISTINGUISHING_SEQUENCES_CUH__ +#define __KERNELS_DISTINGUISHING_SEQUENCES_CUH__ + +namespace distinguishing_sequences_gpu { + float get_overlap_gpu(const int* d1_d, const int* d2_d, const size_t size); +} + +#endif // __KERNELS_DISTINGUISHING_SEQUENCES_CUH__ \ No newline at end of file diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h index 7531448b..254cb114 100644 --- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h +++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h @@ -73,7 +73,15 @@ class distinguishing_sequences_handler_base { public: using layer_predictions_map = std::unordered_map< int, std::vector >; - + + #ifdef __FLEXFRINGE_CUDA + // an aggregate to structure values + struct device_vector { + std::unordered_map len_pred_map_d; // maps to device pointers + std::unordered_map len_size_map; // maps to size + }; + #endif + distinguishing_sequences_handler_base(const std::shared_ptr& sul) : sul(sul){}; distinguishing_sequences_handler_base(){ @@ -161,6 +169,17 @@ class distinguishing_sequences_handler_base { throw std::invalid_argument("This ii-handler does not implement distributions_consistent function"); } + #ifdef __FLEXFRINGE_CUDA + /** + * @brief The same as the non-CUDA version, but it does work on a GPU. + */ + virtual bool distributions_consistent_layer_wise(const device_vector& v1, + const device_vector& v2, + const std::optional depth1_opt = std::nullopt, + const std::optional depth2_opt = std::nullopt) { + throw std::invalid_argument("This ii-handler does not implement distributions_consistent function"); + } + #else /** * @brief A function determining whether the distributions as gained from predict_node_with_automaton * and predict_node_with_sul are consistent. Layer-wise enables different kinds of statistical tests such as @@ -174,6 +193,7 @@ class distinguishing_sequences_handler_base { const std::optional depth2_opt = std::nullopt) { throw std::invalid_argument("This ii-handler does not implement distributions_consistent function"); } + #endif }; /** diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp index 2bb4b9d0..68afd081 100644 --- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp +++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.cpp @@ -19,6 +19,10 @@ #include #include +#ifdef __FLEXFRINGE_CUDA +#include "distinguishing_sequences_gpu.cuh" +#endif + using namespace std; /** @@ -407,6 +411,50 @@ float distinguishing_sequences_handler_fast::compute_threshold(const optional depth1_opt, const optional depth2_opt) { + + /* if(depth1_opt.has_value() && depth2_opt.has_value() && AL_ADJUST_THRESHOLD && (depth1_opt.value() >= 10 || depth2_opt.value() >= 10)){ + last_overlap = -1; // indicating we skipped this step + return true; + } */ + if(d1.len_size_map.size() != d2.len_size_map.size()) + throw runtime_error("Distributions do not match in lengths"); + + float max_ratio = 0; + for(auto len_size_t : d1.len_size_map | std::views::keys){ + const int len = static_cast(len_size_t); + const auto threshold = compute_threshold(depth1_opt, depth2_opt); + + if(!d2.len_size_map.contains(len)) + throw runtime_error("Distributions captured different lengths, this should not have happened"); + + const auto n_preds = static_cast(d1.len_size_map.at(len)); + if(n_preds != static_cast(d1.len_size_map.at(len))) + throw runtime_error("Distributions do not match in size in length " + to_string(len)); + + const auto v1_d = d1.len_pred_map_d.at(len); + const auto v2_d = d2.len_pred_map_d.at(len); + + const auto ratio = 1.0f - distinguishing_sequences_gpu::get_overlap_gpu(v1_d, v2_d, n_preds); + if(ratio > threshold){ + //cout << "\nsize: " << v1.size() << ", depth: " << depth << ", ratio: " << ratio << endl; + last_overlap = 0; + return false; + } + + max_ratio = max(max_ratio, ratio); // TODO: adjust the data types + } + + last_overlap = 1-max_ratio; + return true; +} +#else /** * @brief Does what you think it does. * @@ -414,20 +462,25 @@ float distinguishing_sequences_handler_fast::compute_threshold(const optional depth1_opt, const optional depth2_opt) { + + /* if(depth1_opt.has_value() && depth2_opt.has_value() && AL_ADJUST_THRESHOLD && (depth1_opt.value() >= 10 || depth2_opt.value() >= 10)){ + last_overlap = -1; // indicating we skipped this step + return true; + } */ if(d1.size() != d2.size()) throw runtime_error("Distributions are unequal"); float max_ratio = 0; - for(auto depth : d1 | std::views::keys){ + for(auto len : d1 | std::views::keys){ const auto threshold = compute_threshold(depth1_opt, depth2_opt); - if(!d2.contains(depth)) + if(!d2.contains(len)) throw runtime_error("Distributions captured different lengths, this should not have happened"); - const auto& v1 = d1.at(depth); - const auto& v2 = d2.at(depth); + const auto& v1 = d1.at(len); + const auto& v2 = d2.at(len); if(v1.size() != v2.size()) - throw runtime_error("Distributions do not match in size in length " + to_string(depth)); + throw runtime_error("Distributions do not match in size in length " + to_string(len)); const auto ratio = get_overlap(v1, v2); if(ratio > threshold){ @@ -443,5 +496,5 @@ bool distinguishing_sequences_handler_fast::distributions_consistent_layer_wise( //cout << "\nDisagreed: " << disagreed << " | agreed: " << agreed << "max ratio: " << max_ratio << endl; //cout << "\nmax ratio: " << max_ratio << endl; return true; - -} \ No newline at end of file +} +#endif \ No newline at end of file diff --git a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h index f9d78923..92e9bb0c 100644 --- a/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h +++ b/source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_fast.h @@ -44,11 +44,19 @@ class distinguishing_sequences_handler_fast final : public distinguishing_sequen const int size() const override {return m_suffixes.size();} + #ifdef __FLEXFRINGE_CUDA + bool distributions_consistent_layer_wise(const device_vector& v1, + const device_vector& v2, + const std::optional depth1_opt = std::nullopt, + const std::optional depth2_opt = std::nullopt) + override; + #else bool distributions_consistent_layer_wise(const layer_predictions_map& v1, const layer_predictions_map& v2, const std::optional depth1_opt = std::nullopt, const std::optional depth2_opt = std::nullopt) - override; + override; + #endif float compute_threshold(const std::optional& d1, const std::optional& d2); diff --git a/source/active_learning/memory/observation_table.cpp b/source/active_learning/memory/observation_table.cpp index bdd38162..8935dd52 100644 --- a/source/active_learning/memory/observation_table.cpp +++ b/source/active_learning/memory/observation_table.cpp @@ -361,7 +361,7 @@ void observation_table::print() const { /* cout << "Upper table: " << endl; for(auto it = upper_table.cbegin(); it != upper_table.cend(); ++it){ const auto& row_name = it->first; - print_vector(row_name); + print_span(row_name); } cout << "Lower table:" << endl; @@ -372,11 +372,11 @@ void observation_table::print() const { cout << "Columns:" << endl; for(const auto col: all_columns){ - print_vector(col); + print_span(col); } cout << "Rows to close:" << endl; for(const auto r: incomplete_rows){ - print_vector(r); + print_span(r); } */ } diff --git a/source/active_learning/memory/observation_table_imat.cpp b/source/active_learning/memory/observation_table_imat.cpp index 449cbff0..379e6bba 100644 --- a/source/active_learning/memory/observation_table_imat.cpp +++ b/source/active_learning/memory/observation_table_imat.cpp @@ -391,7 +391,7 @@ void observation_table_imat::print() const { cout << "Upper table rows: " << endl; for (auto it = upper_table.cbegin(); it != upper_table.cend(); ++it) { const auto& row_name = it->first; - print_vector(row_name); + print_span(row_name); } cout << "Upper table data: " << endl; @@ -411,11 +411,11 @@ void observation_table_imat::print() const { cout << "Columns:" << endl; for (const auto col : all_columns) { - print_vector(col); + print_span(col); } cout << "Rows to close:" << endl; for (const auto r : incomplete_rows) { - print_vector(r); + print_span(r); } } diff --git a/source/evaluation/CMakeLists.txt b/source/evaluation/CMakeLists.txt index f1a9460a..48bd2fb1 100644 --- a/source/evaluation/CMakeLists.txt +++ b/source/evaluation/CMakeLists.txt @@ -8,6 +8,9 @@ include_directories( "../active_learning/system_under_learning/benchmark_parsers" ) +if(CMAKE_CUDA_COMPILER) + include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +endif() add_library(Evaluation STATIC aic.h diff --git a/source/evaluation/paul_heuristic.cpp b/source/evaluation/paul_heuristic.cpp index 3176f880..ee9003e5 100755 --- a/source/evaluation/paul_heuristic.cpp +++ b/source/evaluation/paul_heuristic.cpp @@ -12,13 +12,14 @@ #include #include +#ifdef __FLEXFRINGE_CUDA +#include "source/active_learning/active_learning_util/cuda/cuda_common.cuh" +#include +#endif + REGISTER_DEF_TYPE(paul_heuristic); REGISTER_DEF_DATATYPE(paul_data); -// template void paul_data::set_predictions(std::vector& predictions); -// template void paul_data::set_predictions(std::vector&& predictions); -template void paul_data::set_predictions(layer_predictions_map&& predictions); - void paul_data::print_state_label(std::iostream& output){ count_data::print_state_label(output); }; @@ -59,6 +60,48 @@ void paul_data::update(evaluation_data* right){ } } +/** + * @brief Overwrites the current set of predictions with the ones handed to this method. + */ +void paul_data::set_predictions(layer_predictions_map&& predictions){ +#ifdef __FLEXFRINGE_CUDA + +#ifndef gpuErrcheck +#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); } + + std::unordered_map< int, int* > preds_d(predictions.size()); + std::unordered_map< int, size_t > sizes(predictions.size()); + + for(const auto& [len, preds]: predictions){ + sizes[len] = preds.size(); + + int* target_field_d; + const auto byte_size = preds.size() * sizeof(int); + gpuErrchk(cudaMalloc((void**) &target_field_d, byte_size)); + gpuErrchk(cudaMemcpy(target_field_d, preds.data(), byte_size, cudaMemcpyHostToDevice)); + preds_d[len] = target_field_d; + } + + this->predictions.len_pred_map_d = std::move(preds_d); + this->predictions.len_size_map = std::move(sizes); +#endif // gpuErrcheck + +#else + this->predictions = move(predictions); +#endif // __FLEXFRINGE_CUDA +} + +#ifdef __FLEXFRINGE_CUDA +paul_data::~paul_data(){ +#ifndef gpuErrcheck +#define gpuErrchk(ans) { cuda_common::gpuAssert((ans), __FILE__, __LINE__); } + + for(int* preds_d: this->predictions.len_pred_map_d | std::ranges::views::values){ + gpuErrchk(cudaFree(preds_d)); + } +} +#endif // gpuErrcheck +#endif // __FLEXFRINGE_CUDA /** * @brief Does what you think it does. */ diff --git a/source/evaluation/paul_heuristic.h b/source/evaluation/paul_heuristic.h index 79d95c29..b840b250 100755 --- a/source/evaluation/paul_heuristic.h +++ b/source/evaluation/paul_heuristic.h @@ -15,8 +15,16 @@ #include "count_types.h" #include "source/active_learning/memory/distinguishing_sequences/distinguishing_sequences_handler_base.h" -#include // TODO: for debugging only +#ifdef __FLEXFRINGE_CUDA +#include "cuda.h" +#include "cuda_runtime.h" +#include "device_launch_parameters.h" +#endif + +//#include // TODO: for debugging only +#include #include +#include /* The data contained in every node of the prefix tree or DFA */ class paul_data: public count_data { @@ -32,7 +40,13 @@ class paul_data: public count_data { int inferred_total_final; //std::vector predictions; +#ifdef __FLEXFRINGE_CUDA + using device_vector = distinguishing_sequences_handler_base::device_vector; // mapping to GPU memory instead + device_vector predictions; + ~paul_data() override; +#else layer_predictions_map predictions; +#endif // TODO: delete function inline float map_confidence(const float c){ @@ -56,11 +70,11 @@ class paul_data: public count_data { void print_state_label(std::iostream& output) override; - void set_confidence(const float confidence) noexcept; void add_inferred_type(const int t) noexcept; void add_tail(tail* t) override; + void set_confidence(const float confidence) noexcept; float get_confidence() const noexcept { return lm_confidence; }; inline bool has_type() const noexcept { return final_counts.size() > 0; } @@ -72,9 +86,38 @@ class paul_data: public count_data { void undo(evaluation_data* right) override; const auto& get_predictions() const noexcept {return predictions;} + + /** + * @brief Gets the number of predictions made at length len. + */ + const auto get_n_predictions(const int len) const noexcept { + #ifdef __FLEXFRINGE_CUDA + return this->predictions.len_size_map.at(len); + #else + return this->predictions[len].size(); + #endif + } + + /** + * @brief Gets the totoal number of predictions. + */ + const auto get_n_predictions() const noexcept { + int res = 0; + +#ifdef __FLEXFRINGE_CUDA + for(const size_t size: this->predictions.len_size_map | std::ranges::views::values){ + res += static_cast(size); + } +#else + for(const auto& preds: this->predictions | std::ranges::views::values){ + res += preds.size(); + } +#endif + + return res; + } - template - void set_predictions(T&& predictions){this->predictions = std::forward(predictions);} + void set_predictions(layer_predictions_map&& predictions); }; class paul_heuristic : public count_driven {