diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b105c342137..be4cb62c2311 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ name: Ubuntu - master - develop - feature/** + - lru_miltiindex_prototype env: JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 diff --git a/cmake/install/userver-multi-index-lru-config.cmake b/cmake/install/userver-multi-index-lru-config.cmake new file mode 100644 index 000000000000..28acce8da7a8 --- /dev/null +++ b/cmake/install/userver-multi-index-lru-config.cmake @@ -0,0 +1,11 @@ +include_guard(GLOBAL) + +if(userver_multiindex_lru_FOUND) + return() +endif() + +find_package(userver REQUIRED COMPONENTS core) + +find_package(Boost REQUIRED) + +set(userver_multiindex_lru_FOUND TRUE) diff --git a/libraries/CMakeLists.txt b/libraries/CMakeLists.txt index 36dfc73798f3..21ba4f303f65 100644 --- a/libraries/CMakeLists.txt +++ b/libraries/CMakeLists.txt @@ -2,11 +2,16 @@ option(USERVER_FEATURE_EASY "Build easy HTTP server library" "${USERVER_LIB_ENAB option(USERVER_FEATURE_S3API "Build S3 api client library" "${USERVER_LIB_ENABLED_DEFAULT}") option(USERVER_FEATURE_GRPC_REFLECTION "Build grpc reflection library" "${USERVER_LIB_ENABLED_DEFAULT}") option(USERVER_FEATURE_GRPC_PROTOVALIDATE "Build grpc protovalidate library" "OFF") +option(USERVER_FEATURE_MULTI_INDEX_LRU "Build multi index lru library" "ON") if(USERVER_FEATURE_S3API) add_subdirectory(s3api) endif() +if (USERVER_FEATURE_MULTI_INDEX_LRU) + add_subdirectory(multi_index_lru) +endif() + if(USERVER_FEATURE_EASY) if(NOT USERVER_FEATURE_POSTGRESQL) message(FATAL_ERROR "'USERVER_FEATURE_EASY' requires 'USERVER_FEATURE_POSTGRESQL=ON'") diff --git a/libraries/multi_index_lru/CMakeLists.txt b/libraries/multi_index_lru/CMakeLists.txt new file mode 100644 index 000000000000..b610d719d1f9 --- /dev/null +++ b/libraries/multi_index_lru/CMakeLists.txt @@ -0,0 +1,23 @@ +project(userver-multi-index-lru CXX) + +find_package(Boost REQUIRED) + +userver_module( + multi-index-lru + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" + UTEST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*_test.cpp" + UBENCH_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*_benchmark.cpp" + DEPENDS core +) + +target_include_directories(userver-multi-index-lru + PUBLIC + ${Boost_INCLUDE_DIRS} + "${CMAKE_CURRENT_SOURCE_DIR}/tests/" +) + +_userver_directory_install( + COMPONENT multi-index-lru + FILES "${USERVER_ROOT_DIR}/cmake/modules/Findboost.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/userver/modules" +) \ No newline at end of file diff --git a/libraries/multi_index_lru/include/userver/multi_index_lru/lru_boost_list_container.h b/libraries/multi_index_lru/include/userver/multi_index_lru/lru_boost_list_container.h new file mode 100644 index 000000000000..9f5c31ae7f88 --- /dev/null +++ b/libraries/multi_index_lru/include/userver/multi_index_lru/lru_boost_list_container.h @@ -0,0 +1,193 @@ +#pragma once + +#include +#include +#include +#include +#include + +USERVER_NAMESPACE_BEGIN + +namespace multi_index_lru { +using namespace boost::multi_index; + +namespace impl { +template +struct ValueWithHook +{ + Value value; + mutable boost::intrusive::list_member_hook<> list_hook; + const ValueWithHook *GetPointerToSelf() const { return this; }; + + explicit ValueWithHook(const Value& val) : value(val) {} + + explicit ValueWithHook(Value&& val) : value(std::move(val)) {} + + ValueWithHook() = delete; + ValueWithHook(const ValueWithHook&) = delete; + ValueWithHook(ValueWithHook &&) = delete; + + ValueWithHook &operator=(const ValueWithHook&) = delete; + ValueWithHook &operator=(ValueWithHook&&) = delete; + + operator Value&() { return value; } + operator const Value&() const { return value; } + + Value* operator->() { return &value; } + const Value* operator->() const { return &value; } + + Value& get() { return value; } + const Value& get() const { return value; } + + using boost_list = boost::intrusive::list< + ValueWithHook, + boost::intrusive::member_hook< + ValueWithHook, + boost::intrusive::list_member_hook<>, + &ValueWithHook::list_hook + > + >; + + void push_back_to_list(boost_list &lst) const { + lst.push_back(const_cast(*this)); + } + + void splice_in_list(boost_list &lst) const { + lst.splice(lst.end(), lst, lst.iterator_to(const_cast(*this))); + } +}; + +struct internal_ptr_tag {}; +} // namespace impl + +template< + typename Value, + typename IndexSpecifierList, + typename Allocator = std::allocator> +> +class LRUCacheContainer { +private: + using CacheItem = impl::ValueWithHook; + using List = boost::intrusive::list< + CacheItem, + boost::intrusive::member_hook< + CacheItem, + boost::intrusive::list_member_hook<>, + &CacheItem::list_hook + > + >; + + using ExtendedIndexSpecifierList = typename boost::mpl::push_back< + IndexSpecifierList, + hashed_unique< + tag, + const_mem_fun + > + >::type; + + using Container = multi_index_container< + CacheItem, + ExtendedIndexSpecifierList, + Allocator + >; + + Container container; + size_t max_size; + + List usage_list; + +public: + using value_type = Value; + using cache_item_type = CacheItem; + + LRUCacheContainer(size_t max_size) : max_size(max_size) {} + + template + bool emplace(Args&&... args) { + if (container.size() >= max_size) { + evict_lru(); + } + + auto result = container.emplace(std::forward(args)...); + + auto &value = *result.first; + if (result.second) { + value.push_back_to_list(usage_list); + } else { + value.splice_in_list(usage_list); + } + return result.second; + } + + bool insert(const Value& value) { + return emplace(value); + } + + bool insert(Value&& value) { + return emplace(std::move(value)); + } + + template + typename Container::template index::type::iterator find(const Key& key) { + auto& primary_index = container.template get(); + auto it = primary_index.find(key); + + if (it != primary_index.end()) { + it->splice_in_list(usage_list); + } + + return it; + } + + template + bool contains(const Key& key) { + return this->template find(key) != container.template get().end(); + } + + template + bool erase(const Key& key) { + auto& primary_index = container.template get(); + auto it = primary_index.find(key); + if (it != primary_index.end()) { + usage_list.erase(usage_list.iterator_to(*it)); + } + return container.template get().erase(key) > 0; + } + + template + auto& get() { + return container.template get(); + } + + template + const auto& get() const { + return container.template get(); + } + + size_t size() const { return container.size(); } + bool empty() const { return container.empty(); } + size_t capacity() const { return max_size; } + + void set_capacity(size_t new_capacity) { + max_size = new_capacity; + while (container.size() > max_size) { + evict_lru(); + } + } + + void clear() { + container.clear(); + } + +private: + void evict_lru() { + if (!usage_list.empty()) { + CacheItem *ptr_to_erase = &*usage_list.begin(); + usage_list.erase(usage_list.begin()); + container.template get().erase(ptr_to_erase); + } + } +}; +} // namespace multi_index_lru + +USERVER_NAMESPACE_END \ No newline at end of file diff --git a/libraries/multi_index_lru/library.yaml b/libraries/multi_index_lru/library.yaml new file mode 100644 index 000000000000..91039f89479f --- /dev/null +++ b/libraries/multi_index_lru/library.yaml @@ -0,0 +1,9 @@ +project-name: userver-lib-multi-index-lru + +description: multi index lru cache + +maintainers: + - Common components + +libraries: + - userver-core diff --git a/libraries/multi_index_lru/src/main_benchmark.cpp b/libraries/multi_index_lru/src/main_benchmark.cpp new file mode 100644 index 000000000000..c8f23c3dc800 --- /dev/null +++ b/libraries/multi_index_lru/src/main_benchmark.cpp @@ -0,0 +1,16 @@ +#include "benchmarks/lru_basic_benchmarks.h" +#include "benchmarks/lru_google_benchmarks.h" + +// #define LRU_CONTAINER_DEBUG__ +#include + +using namespace USERVER_NAMESPACE; + +int main() { + benchmarks::simple_benchmark("boost_list_output.txt"); + benchmarks::google_benchmark(); + + benchmarks::google_benchmark_init("google_output.txt"); + benchmarks::google_benchmark_run(); + return 0; +} \ No newline at end of file diff --git a/libraries/multi_index_lru/src/main_test.cpp b/libraries/multi_index_lru/src/main_test.cpp new file mode 100644 index 000000000000..a16f4140a393 --- /dev/null +++ b/libraries/multi_index_lru/src/main_test.cpp @@ -0,0 +1,13 @@ +#include "tests/lru_basic_tests.h" + +// #define LRU_CONTAINER_DEBUG__ +#include + +using namespace USERVER_NAMESPACE; + +int main() { + test_lru_users(); + test_lru_products(); + std::cout << "all tests success" << std::endl; + return 0; +} \ No newline at end of file diff --git a/libraries/multi_index_lru/src/multi_index_lru.cpp b/libraries/multi_index_lru/src/multi_index_lru.cpp new file mode 100644 index 000000000000..bdb5f8542d7c --- /dev/null +++ b/libraries/multi_index_lru/src/multi_index_lru.cpp @@ -0,0 +1,3 @@ +#include + +namespace userver::multi_index_lru { const char* multi_index_lru_version = "1.0"; } diff --git a/libraries/multi_index_lru/tests/benchmarks/benchmarks_resourses.h b/libraries/multi_index_lru/tests/benchmarks/benchmarks_resourses.h new file mode 100644 index 000000000000..a9d5e3acf9fc --- /dev/null +++ b/libraries/multi_index_lru/tests/benchmarks/benchmarks_resourses.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "../lru_container_concept.h" + +USERVER_NAMESPACE_BEGIN + +namespace benchmarks { + +const std::vector CACHE_SIZES = {1000, 10000, 100000}; +const size_t OPERATIONS_NUMBER = 100000; +const int MAX_ID_SIZE = 50000; + +struct id_tag {}; +struct email_tag {}; +struct name_tag {}; + +struct User { + int id; + std::string email; + std::string name; + + bool operator==(const User& other) const { + return id == other.id && email == other.email && name == other.name; + } +}; + +namespace generator { +std::random_device rd; +std::mt19937 gen(rd()); +std::uniform_real_distribution action_dist(0.0, 1.0); +std::uniform_int_distribution id_dist(0, MAX_ID_SIZE); + +User generate_user() { + std::string email = "email" + std::to_string(id_dist(gen)); + std::string name = "name" + std::to_string(id_dist(gen)); + return User{id_dist(gen), email, name}; +} + +int generate_id() { + return id_dist(gen); +} + +std::string generate_name() { + return "name" + std::to_string(id_dist(gen)); +} + +std::string generate_email() { + return "email" + std::to_string(id_dist(gen)); +} +} // generator +} // benchmarks +USERVER_NAMESPACE_END \ No newline at end of file diff --git a/libraries/multi_index_lru/tests/benchmarks/lru_basic_benchmarks.h b/libraries/multi_index_lru/tests/benchmarks/lru_basic_benchmarks.h new file mode 100644 index 000000000000..9c57fff865fc --- /dev/null +++ b/libraries/multi_index_lru/tests/benchmarks/lru_basic_benchmarks.h @@ -0,0 +1,90 @@ +#pragma once + +#include "benchmarks_resourses.h" + +USERVER_NAMESPACE_BEGIN + +namespace benchmarks { + +using namespace boost::multi_index; + +template< + template class LRUCacheContainer +> +void simple_benchmark(std::string &&output_filename) { + + using UserCache = LRUCacheContainer< + User, + indexed_by< + ordered_unique, member>, + ordered_unique, member>, + ordered_non_unique, member> + >, + std::allocator + >; + + lru_concept_assert_for_one_tag(UserCache, id_tag, int, User); + lru_concept_assert_for_one_tag(UserCache, email_tag, std::string, User); + lru_concept_assert_for_one_tag(UserCache, name_tag, std::string, User); + + std::ofstream output_file(output_filename, std::ios::app); + if (!output_file.is_open()) { + std::cerr << "Failed to open output file: " << output_filename << std::endl; + return; + } + + output_file << std::left << std::setw(20) << "Operations count" + << std::setw(16) << "Cache size" + << std::setw(12) << "Time (ms)" + << std::endl; + output_file << std::string(50, '-') << std::endl; + + for (const size_t size : CACHE_SIZES) { + UserCache cache(size); + for (size_t i = 0; i < size; ++i) { + cache.emplace(generator::generate_user()); + } + + size_t reading_operations_number = OPERATIONS_NUMBER * 4 / 5; + size_t writing_operations_number = OPERATIONS_NUMBER / 5; + + std::vector names, emails; + std::vector ids; + std::vector users; + + for (size_t i = 0; i < reading_operations_number; ++i) { + names.push_back(generator::generate_name()); + emails.push_back(generator::generate_email()); + ids.push_back(generator::generate_id()); + } + + for (size_t i = 0; i < writing_operations_number; ++i) { + users.push_back(generator::generate_user()); + } + + auto start_time = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < reading_operations_number; ++i) { + cache.template find(names[i]); + cache.template find(emails[i]); + cache.template find(ids[i]); + } + + for (size_t i = 0; i < writing_operations_number; ++i) { + cache.emplace(users[i]); + } + + auto end_time = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end_time - start_time); + + output_file << std::left << std::setw(20) << OPERATIONS_NUMBER + << std::setw(16) << size + << std::setw(12) << elapsed.count() + << std::endl; + output_file << std::string(50, '-') << std::endl; + } + + output_file.close(); +} +} // benchmarks +USERVER_NAMESPACE_END \ No newline at end of file diff --git a/libraries/multi_index_lru/tests/benchmarks/lru_google_benchmarks.h b/libraries/multi_index_lru/tests/benchmarks/lru_google_benchmarks.h new file mode 100644 index 000000000000..82f80e02c9cd --- /dev/null +++ b/libraries/multi_index_lru/tests/benchmarks/lru_google_benchmarks.h @@ -0,0 +1,140 @@ +#pragma once + +#include +#include "benchmarks_resourses.h" + +USERVER_NAMESPACE_BEGIN + +namespace benchmarks { + +using namespace boost::multi_index; + +template< + template class LRUCacheContainer +> +class LRUCacheBenchmark { +private: + using UserCache = LRUCacheContainer< + User, + indexed_by< + ordered_unique, member>, + ordered_unique, member>, + ordered_non_unique, member> + >, + std::allocator + >; + + static void prepare_cache(UserCache& cache, size_t size) { + for (size_t i = 0; i < size; ++i) { + cache.emplace(generator::generate_user()); + } + } + +public: + static void BM_GetOperations(::benchmark::State& state) { + const size_t cache_size = state.range(0); + const size_t operations_count = state.range(1); + + UserCache cache(cache_size); + prepare_cache(cache, cache_size); + + for (auto _ : state) { + state.PauseTiming(); + std::vector names, emails; + std::vector ids; + for (size_t i = 0; i < operations_count; ++i) { + names.push_back(generator::generate_name()); + emails.push_back(generator::generate_email()); + ids.push_back(generator::generate_id()); + } + state.ResumeTiming(); + + for (size_t i = 0; i < operations_count; ++i) { + ::benchmark::DoNotOptimize(cache.template find(names[i])); + ::benchmark::DoNotOptimize(cache.template find(emails[i])); + ::benchmark::DoNotOptimize(cache.template find(ids[i])); + } + } + + state.SetItemsProcessed(state.iterations() * operations_count * 3); + state.SetComplexityN(cache_size); + } + + static void BM_EmplaceOperations(::benchmark::State& state) { + const size_t cache_size = state.range(0); + const size_t operations_count = state.range(1); + + UserCache cache(cache_size); + prepare_cache(cache, cache_size); + + for (auto _ : state) { + state.PauseTiming(); + std::vector users; + for (size_t i = 0; i < operations_count; ++i) { + users.push_back(generator::generate_user()); + } + state.ResumeTiming(); + + for (size_t i = 0; i < operations_count; ++i) { + cache.emplace(users[i]); + } + } + + state.SetItemsProcessed(state.iterations() * operations_count); + state.SetComplexityN(cache_size); + } +}; + +void google_benchmark_init(std::string&& output_filename) { + std::vector args; + std::string prog_name = "benchmark"; + args.push_back(prog_name.data()); + std::string out_arg = "--benchmark_out=" + output_filename; + args.push_back(out_arg.data()); + std::string format_arg = "--benchmark_out_format=json"; + args.push_back(format_arg.data()); + int argc = args.size(); + ::benchmark::Initialize(&argc, args.data()); +} + +void google_benchmark_run() { + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::ClearRegisteredBenchmarks(); + ::benchmark::Shutdown(); +} + +template< + template class LRUCacheContainer +> +void google_benchmark() { +#if __cplusplus >= 202002L + using UserCache = LRUCacheContainer< + User, + indexed_by< + ordered_unique, member>, + ordered_unique, member>, + ordered_non_unique, member> + >, + std::allocator + >; + + lru_concept_assert_for_one_tag(UserCache, id_tag, int, User); + lru_concept_assert_for_one_tag(UserCache, email_tag, std::string, User); + lru_concept_assert_for_one_tag(UserCache, name_tag, std::string, User); +#endif + for (auto size : CACHE_SIZES) { + ::benchmark::RegisterBenchmark( + "GetOperations", + &LRUCacheBenchmark::BM_GetOperations + )->Args({size, OPERATIONS_NUMBER})->Unit(::benchmark::kMicrosecond); + } + + for (auto size : CACHE_SIZES) { + ::benchmark::RegisterBenchmark( + "EmplaceOperations", + &LRUCacheBenchmark::BM_EmplaceOperations + )->Args({size, OPERATIONS_NUMBER})->Unit(::benchmark::kMicrosecond); + } +} +} // benchmarks +USERVER_NAMESPACE_END \ No newline at end of file diff --git a/libraries/multi_index_lru/tests/lru_container_concept.h b/libraries/multi_index_lru/tests/lru_container_concept.h new file mode 100644 index 000000000000..72f7d80f66a9 --- /dev/null +++ b/libraries/multi_index_lru/tests/lru_container_concept.h @@ -0,0 +1,43 @@ +#pragma once + +#include "boost/multi_index_container.hpp" +#include "boost/multi_index/ordered_index.hpp" +#include "boost/multi_index/sequenced_index.hpp" +#include "boost/multi_index/identity.hpp" +#include "boost/multi_index/member.hpp" +#include "boost/multi_index/tag.hpp" + +#if __cplusplus >= 202002L +#include +#include +#include + +USERVER_NAMESPACE_BEGIN + +template +concept LRUCacheType = requires(T cache, size_t size, const Key &key, Args&&... args) { + T{size}; + {cache.size()} -> std::same_as; + {cache.empty()} -> std::same_as; + {cache.capacity()} -> std::same_as; + {cache.clear()} -> std::same_as; + {cache.set_capacity(size)} -> std::same_as; + + {cache.template find(key)} -> std::input_iterator; + {cache.template contains(key)} -> std::same_as; + {cache.template erase(key)} -> std::same_as; + cache.template get(); + std::as_const(cache).template get(); + + {cache.emplace(std::forward(args)...)} -> std::same_as; +}; + +// проверка концептом вместо наследования от абстрактного класса кэша +#define lru_concept_assert_for_one_tag(CahceType, Tag, IndexType, ValueType) \ + static_assert((LRUCacheType, "LRUCacheType concept")); \ + static_assert((LRUCacheType, "LRUCacheType concept")); + +USERVER_NAMESPACE_END +#else +#define lru_concept_assert_for_one_tag(CahceType, Tag, IndexType, ValueType) +#endif diff --git a/libraries/multi_index_lru/tests/tests/lru_basic_tests.h b/libraries/multi_index_lru/tests/tests/lru_basic_tests.h new file mode 100644 index 000000000000..bf04ec512f06 --- /dev/null +++ b/libraries/multi_index_lru/tests/tests/lru_basic_tests.h @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include + +#include "../lru_container_concept.h" + +USERVER_NAMESPACE_BEGIN + +namespace { + +using namespace boost::multi_index; + +template< + template class LRUCacheContainer +> +void test_lru_users() { + + struct id_tag {}; + struct email_tag {}; + struct name_tag {}; + + struct User { + int id; + std::string email; + std::string name; + + bool operator==(const User& other) const { + return id == other.id && email == other.email && name == other.name; + } + }; + + using UserCache = LRUCacheContainer< + User, + indexed_by< + ordered_unique, member>, + ordered_unique, member>, + ordered_non_unique, member> + >, + std::allocator + >; + + lru_concept_assert_for_one_tag(UserCache, id_tag, int, User); + lru_concept_assert_for_one_tag(UserCache, email_tag, std::string, User); + lru_concept_assert_for_one_tag(UserCache, name_tag, std::string, User); + + UserCache cache(3); // capacity == 3 + + cache.emplace(User{1, "alice@test.com", "Alice"}); + cache.emplace(User{2, "bob@test.com", "Bob"}); + cache.emplace(User{3, "charlie@test.com", "Charlie"}); + + // find by id + [[maybe_unused]] auto by_id = cache.template get().find(1); + assert((by_id != cache.template get().end())); + assert((by_id->get().name == "Alice")); + + // find by email + [[maybe_unused]] auto by_email = cache.template get().find("bob@test.com"); + assert((by_email != cache.template get().end())); + assert((by_email->get().id == 2)); + + //find by name + [[maybe_unused]] auto by_name = cache.template get().find("Charlie"); + assert((by_name != cache.template get().end())); + assert((by_name->get().email == "charlie@test.com")); + + //find by email + [[maybe_unused]] auto it = cache.template find("alice@test.com"); + assert((it != cache.template get().end())); + + //find by id + cache.template find(1); + + // capacity == 3, Alice, Charlie was recently used -> Bob will be ousted + cache.emplace(User{4, "david@test.com", "David"}); + + assert((!cache.template contains(2))); // Bob outsed + assert((cache.template contains(1))); + assert((cache.template contains(3))); + assert((cache.template contains(4))); + + std::cout << "test_lru_users correct" << std::endl; + +} + +template