Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion keyvi/include/keyvi/dictionary/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ class Dictionary final {

bool Contains(const uint64_t start_state, const std::string& key) const {
uint64_t state = start_state;

if (!state) {
return false;
}

const size_t key_length = key.size();

TRACE("Contains for %s", key.c_str());
Expand All @@ -281,6 +286,11 @@ class Dictionary final {

MatchIterator::MatchIteratorPair Get(const uint64_t start_state, const std::string& key) const {
uint64_t state = start_state;

if (!state) {
return MatchIterator::EmptyIteratorPair();
}

const size_t text_length = key.size();

for (size_t i = 0; i < text_length; ++i) {
Expand All @@ -304,6 +314,10 @@ class Dictionary final {
}

MatchIterator::MatchIteratorPair GetAllItems(const uint64_t state) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}

std::vector<unsigned char> traversal_stack;
traversal_stack.reserve(1024);

Expand Down Expand Up @@ -351,6 +365,10 @@ class Dictionary final {

MatchIterator::MatchIteratorPair GetNear(const uint64_t state, const std::string& key,
const size_t minimum_prefix_length, const bool greedy = false) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}

auto data = std::make_shared<matching::NearMatching<>>(
matching::NearMatching<>::FromSingleFsa(fsa_, state, key, minimum_prefix_length, greedy));

Expand All @@ -361,6 +379,10 @@ class Dictionary final {
MatchIterator::MatchIteratorPair GetFuzzy(const uint64_t state, const std::string& query,
const int32_t max_edit_distance,
const size_t minimum_exact_prefix = 2) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}

auto data = std::make_shared<matching::FuzzyMatching<>>(
matching::FuzzyMatching<>::FromSingleFsa(fsa_, state, query, max_edit_distance, minimum_exact_prefix));

Expand All @@ -369,6 +391,9 @@ class Dictionary final {
}

MatchIterator::MatchIteratorPair GetPrefixCompletion(const uint64_t state, const std::string& query) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}
auto data = std::make_shared<matching::PrefixCompletionMatching<>>(
matching::PrefixCompletionMatching<>::FromSingleFsa(fsa_, state, query));

Expand All @@ -380,6 +405,10 @@ class Dictionary final {

MatchIterator::MatchIteratorPair GetPrefixCompletion(const uint64_t state, const std::string& query,
size_t top_n) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}

auto data = std::make_shared<matching::PrefixCompletionMatching<>>(
matching::PrefixCompletionMatching<>::FromSingleFsa(fsa_, state, query));

Expand All @@ -405,6 +434,9 @@ class Dictionary final {

MatchIterator::MatchIteratorPair GetMultiwordCompletion(const uint64_t state, const std::string& query,
const unsigned char multiword_separator) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}
auto data = std::make_shared<matching::MultiwordCompletionMatching<>>(
matching::MultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, multiword_separator));

Expand All @@ -417,6 +449,10 @@ class Dictionary final {
MatchIterator::MatchIteratorPair GetMultiwordCompletion(const uint64_t state, const std::string& query,
const size_t top_n,
const unsigned char multiword_separator) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}

auto data = std::make_shared<matching::MultiwordCompletionMatching<>>(
matching::MultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, multiword_separator));

Expand Down Expand Up @@ -444,6 +480,9 @@ class Dictionary final {
const int32_t max_edit_distance,
const size_t minimum_exact_prefix,
const unsigned char multiword_separator) const {
if (!state) {
return MatchIterator::EmptyIteratorPair();
}
auto data = std::make_shared<matching::FuzzyMultiwordCompletionMatching<>>(
matching::FuzzyMultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, max_edit_distance,
minimum_exact_prefix, multiword_separator));
Expand All @@ -456,7 +495,7 @@ class Dictionary final {
};

// shared pointer
typedef std::shared_ptr<Dictionary> dictionary_t;
using dictionary_t = std::shared_ptr<Dictionary>;

} /* namespace dictionary */
} /* namespace keyvi */
Expand Down
2 changes: 2 additions & 0 deletions keyvi/include/keyvi/dictionary/dictionary_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class DictionaryProperties {

uint64_t GetNumberOfKeys() const { return number_of_keys_; }

uint64_t GetNumberOfStates() const { return number_of_states_; }

fsa::internal::value_store_t GetValueStoreType() const { return value_store_type_; }

size_t GetSparseArraySize() const { return sparse_array_size_; }
Expand Down
10 changes: 7 additions & 3 deletions keyvi/include/keyvi/dictionary/fsa/automata.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ class Automata final {
const boost::interprocess::map_options_t map_options =
internal::MemoryMapFlags::FSAGetMemoryMapOptions(loading_strategy);

TRACE("labels start offset: %d", dictionary_properties_.GetPersistenceOffset());
TRACE("labels start offset: %d", dictionary_properties_->GetPersistenceOffset());
labels_region_ = boost::interprocess::mapped_region(file_mapping_, boost::interprocess::read_only,
dictionary_properties_->GetPersistenceOffset(),
dictionary_properties_->GetSparseArraySize(), 0, map_options);

TRACE("transitions start offset: %d", dictionary_properties_.GetTransitionsOffset());
TRACE("transitions start offset: %d", dictionary_properties_->GetTransitionsOffset());
transitions_region_ = boost::interprocess::mapped_region(
file_mapping_, boost::interprocess::read_only, dictionary_properties_->GetTransitionsOffset(),
dictionary_properties_->GetTransitionsSize(), 0, map_options);
Expand All @@ -129,9 +129,13 @@ class Automata final {
/**
* Get the start(root) stage of the FSA
*
* In case of an empty FSA, returns 0.
*
* @return index of root state.
*/
uint64_t GetStartState() const { return dictionary_properties_->GetStartState(); }
uint64_t GetStartState() const {
return dictionary_properties_->GetNumberOfStates() != 0 ? dictionary_properties_->GetStartState() : 0;
}

uint64_t GetNumberOfKeys() const { return dictionary_properties_->GetNumberOfKeys(); }

Expand Down
27 changes: 16 additions & 11 deletions keyvi/include/keyvi/dictionary/fsa/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,20 +257,25 @@ class Generator final {

state_ = generator_state::FINALIZING;

// Consume all but stack[0].
ConsumeStack(0);
if (number_of_keys_added_ > 0) {
// Consume all but stack[0].
ConsumeStack(0);

// handling of last State.
internal::UnpackedState<PersistenceT>* unpacked_state = stack_->Get(0);
// handling of last State.
internal::UnpackedState<PersistenceT>* unpacked_state = stack_->Get(0);

start_state_ = builder_->PersistState(unpacked_state);
start_state_ = builder_->PersistState(unpacked_state);

TRACE("wrote start state at %d", start_state_);
TRACE("Check first transition: %d/%d %s", (*unpacked_state)[0].label,
persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label),
(*unpacked_state)[0].label == persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label)
? "OK"
: "BROKEN");
TRACE("wrote start state at %d", start_state_);
TRACE("Check first transition: %d/%d %s", (*unpacked_state)[0].label,
persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label),
(*unpacked_state)[0].label == persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label)
? "OK"
: "BROKEN");
} else {
// empty dictionaries have start_state_ = 1 for backwards compatibility
start_state_ = 1;
}

// free structures that are not needed anymore
delete stack_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ class SparseArrayPersistence final {
labels_ = new unsigned char[buffer_size_];
std::memset(labels_, 0, buffer_size_);

// GH#360 pre-initialize the 1st 2 label positions with a value >1 to prevent illegal zero-bytes in empty
// dictionaries
labels_[0] = 42;
labels_[1] = 42;

temporary_directory_ = temporary_path;
temporary_directory_ /= boost::filesystem::unique_path("dictionary-fsa-%%%%-%%%%-%%%%-%%%%");
boost::filesystem::create_directory(temporary_directory_);
Expand Down Expand Up @@ -195,7 +200,9 @@ class SparseArrayPersistence final {
TRACE("Wrote Transitions, stream at %d", stream.tellp());
}

size_t GetChunkSizeExternalTransitions() const { return transitions_extern_->GetChunkSize(); }
size_t GetChunkSizeExternalTransitions() const {
return transitions_extern_->GetChunkSize();
}

uint32_t GetVersion() const;

Expand Down
2 changes: 1 addition & 1 deletion keyvi/include/keyvi/dictionary/matching/near_matching.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class NearMatching final {
*/
static NearMatching FromSingleFsa(const fsa::automata_t& fsa, const uint64_t start_state, const std::string& query,
const size_t minimum_exact_prefix, const bool greedy = false) {
if (query.size() < minimum_exact_prefix) {
if (query.size() < minimum_exact_prefix || start_state == 0) {
return NearMatching();
}

Expand Down
2 changes: 1 addition & 1 deletion keyvi/include/keyvi/dictionary/secondary_key_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ class SecondaryKeyDictionary final {
};

// shared pointer
typedef std::shared_ptr<SecondaryKeyDictionary> secondary_key_dictionary_t;
using secondary_key_dictionary_t = std::shared_ptr<SecondaryKeyDictionary>;

} /* namespace dictionary */
} /* namespace keyvi */
Expand Down
Loading
Loading