diff --git a/keyvi/bin/keyvi_c/c_api.cpp b/keyvi/bin/keyvi_c/c_api.cpp index 4eece67dd..ded5a1143 100644 --- a/keyvi/bin/keyvi_c/c_api.cpp +++ b/keyvi/bin/keyvi_c/c_api.cpp @@ -103,6 +103,16 @@ keyvi_dictionary* keyvi_create_dictionary(const char* filename) { } } +keyvi_dictionary* keyvi_create_dictionary_with_loading_strategy(const char* filename, + keyvi::dictionary::LoadingStrategy loading_strategy) { + try { + return new keyvi_dictionary(Dictionary(filename, loading_strategy)); + } catch (const std::exception& e) { + std::cerr << e.what() << '\n'; + return nullptr; + } +} + void keyvi_dictionary_destroy(const keyvi_dictionary* dict) { delete dict; } diff --git a/keyvi/include/keyvi/c_api/c_api.h b/keyvi/include/keyvi/c_api/c_api.h index 4200f32bb..7e05761e9 100644 --- a/keyvi/include/keyvi/c_api/c_api.h +++ b/keyvi/include/keyvi/c_api/c_api.h @@ -33,6 +33,7 @@ extern "C" { #include #include "keyvi/compression/compression_algorithm.h" +#include "keyvi/dictionary/loading_strategy.h" struct keyvi_dictionary; struct keyvi_match; @@ -61,6 +62,8 @@ void keyvi_string_destroy(char* str); struct keyvi_dictionary* keyvi_create_dictionary(const char*); +struct keyvi_dictionary* keyvi_create_dictionary_with_loading_strategy(const char*, keyvi::dictionary::LoadingStrategy); + void keyvi_dictionary_destroy(const struct keyvi_dictionary*); size_t keyvi_dictionary_get_size(const struct keyvi_dictionary*); diff --git a/keyvi/include/keyvi/dictionary/fsa/automata.h b/keyvi/include/keyvi/dictionary/fsa/automata.h index 3041f0f3b..8657e1606 100644 --- a/keyvi/include/keyvi/dictionary/fsa/automata.h +++ b/keyvi/include/keyvi/dictionary/fsa/automata.h @@ -40,6 +40,7 @@ #include "keyvi/dictionary/fsa/internal/value_store_factory.h" #include "keyvi/dictionary/fsa/traversal/traversal_base.h" #include "keyvi/dictionary/fsa/traversal/weighted_traversal.h" +#include "keyvi/dictionary/loading_strategy.h" // #define ENABLE_TRACING #include "keyvi/dictionary/util/trace.h" diff --git a/keyvi/include/keyvi/dictionary/fsa/internal/memory_map_flags.h b/keyvi/include/keyvi/dictionary/fsa/internal/memory_map_flags.h index 7a73b091c..572c22a0f 100644 --- a/keyvi/include/keyvi/dictionary/fsa/internal/memory_map_flags.h +++ b/keyvi/include/keyvi/dictionary/fsa/internal/memory_map_flags.h @@ -32,6 +32,8 @@ #include #include +#include "keyvi/dictionary/loading_strategy.h" + // Mac has no MAP_POPULATE #if defined(OS_MACOSX) #ifndef MAP_POPULATE @@ -41,18 +43,6 @@ namespace keyvi { namespace dictionary { - -enum class loading_strategy_types { - default_os, // no special treatment, use whatever the OS/Boost has as default - lazy, // load data as needed with some read-ahead - populate, // immediately load everything in memory (blocks until everything is fully read) - populate_key_part, // populate only the key part, load value part lazy - populate_lazy, // load data lazy but ask the OS to read ahead if possible (does not block) - lazy_no_readahead, // disable any read-ahead (for cases when index > x * main memory) - lazy_no_readahead_value_part, // disable read-ahead only for the value part - populate_key_part_no_readahead_value_part // populate the key part, but disable read ahead value part -}; - namespace fsa { namespace internal { diff --git a/keyvi/include/keyvi/dictionary/loading_strategy.h b/keyvi/include/keyvi/dictionary/loading_strategy.h new file mode 100644 index 000000000..de0ce9077 --- /dev/null +++ b/keyvi/include/keyvi/dictionary/loading_strategy.h @@ -0,0 +1,39 @@ +/* keyvi - A key value store. + * + * Copyright 2025 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef KEYVI_DICTIONARY_LOADING_STRATEGY_H_ +#define KEYVI_DICTIONARY_LOADING_STRATEGY_H_ + +namespace keyvi { +namespace dictionary { + +enum class loading_strategy_types { + default_os, // no special treatment, use whatever the OS/Boost has as default + lazy, // load data as needed with some read-ahead + populate, // immediately load everything in memory (blocks until everything is fully read) + populate_key_part, // populate only the key part, load value part lazy + populate_lazy, // load data lazy but ask the OS to read ahead if possible (does not block) + lazy_no_readahead, // disable any read-ahead (for cases when index > x * main memory) + lazy_no_readahead_value_part, // disable read-ahead only for the value part + populate_key_part_no_readahead_value_part // populate the key part, but disable read ahead value part +}; + +using LoadingStrategy = loading_strategy_types; + +} /* namespace dictionary */ +} /* namespace keyvi */ +#endif // KEYVI_DICTIONARY_LOADING_STRATEGY_H_ diff --git a/rust/build.rs b/rust/build.rs index 90202dcf9..b428b877e 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -53,9 +53,11 @@ fn main() { .enable_cxx_namespaces() .layout_tests(true) .rustified_enum("keyvi::compression::CompressionAlgorithm") + .rustified_enum("keyvi::dictionary::loading_strategy_types") .allowlist_function("keyvi_bytes_destroy") .allowlist_function("keyvi_string_destroy") .allowlist_function("keyvi_create_dictionary") + .allowlist_function("keyvi_create_dictionary_with_loading_strategy") .allowlist_function("keyvi_dictionary_destroy") .allowlist_function("keyvi_dictionary_get") .allowlist_function("keyvi_dictionary_get_all_items") diff --git a/rust/src/dictionary.rs b/rust/src/dictionary.rs index 00a066df0..737219361 100644 --- a/rust/src/dictionary.rs +++ b/rust/src/dictionary.rs @@ -54,6 +54,24 @@ impl Dictionary { } } + pub fn with_loading_strategy( + filename: &str, + loading_strategy: root::keyvi::dictionary::LoadingStrategy, + ) -> io::Result { + let fn_c = CString::new(filename)?; + let ptr = unsafe { + root::keyvi_create_dictionary_with_loading_strategy(fn_c.as_ptr(), loading_strategy) + }; + if ptr.is_null() { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "could not load file", + )) + } else { + Ok(Dictionary { dict: ptr }) + } + } + pub fn statistics(&self) -> String { let c_buf: *mut ::std::os::raw::c_char = unsafe { root::keyvi_dictionary_get_statistics(self.dict) }; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 976c8b78f..f269f18e5 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -38,3 +38,4 @@ pub mod keyvi_match_iterator; pub mod keyvi_string; pub type Compression = bindings::root::keyvi::compression::CompressionAlgorithm; +pub type LoadingStrategy = bindings::root::keyvi::dictionary::loading_strategy_types; diff --git a/rust/tests/tests.rs b/rust/tests/tests.rs index 2d0bbd33f..1c96a3c8e 100644 --- a/rust/tests/tests.rs +++ b/rust/tests/tests.rs @@ -182,7 +182,11 @@ mod tests { #[test] fn get_all_items() { let expected_items = [("a", "[12,13]"), ("b", "[12,13]"), ("c", "[14,15]")]; - let dict = dictionary::Dictionary::new("test_data/test.kv").unwrap(); + let dict = dictionary::Dictionary::with_loading_strategy( + "test_data/test.kv", + keyvi::LoadingStrategy::populate, + ) + .unwrap(); for (item, expected_item) in dict.get_all_items().zip(&expected_items) { assert_eq!(item.matched_string(), expected_item.0);