Skip to content

Commit 6ea136e

Browse files
authored
empty kv dict can crash when dumping dict (#359)
Fix a corner case when a kv dict is empty and being iterated over. - initialize labels on position 0 and 1, to prevent lookup[1] - this works backwards compatible: old keyvi versions reading dicts from new version - set start_state to 0 if dict is empty and check start_state !=0 for lookup - this works backwards compatible: new keyvi versions reading dicts from old version - shortcut lookup on empty dicts to prevent paging for empty dicts fixes #360
1 parent 207e0a2 commit 6ea136e

File tree

10 files changed

+133
-59
lines changed

10 files changed

+133
-59
lines changed

keyvi/include/keyvi/dictionary/dictionary.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ class Dictionary final {
258258

259259
bool Contains(const uint64_t start_state, const std::string& key) const {
260260
uint64_t state = start_state;
261+
262+
if (!state) {
263+
return false;
264+
}
265+
261266
const size_t key_length = key.size();
262267

263268
TRACE("Contains for %s", key.c_str());
@@ -281,6 +286,11 @@ class Dictionary final {
281286

282287
MatchIterator::MatchIteratorPair Get(const uint64_t start_state, const std::string& key) const {
283288
uint64_t state = start_state;
289+
290+
if (!state) {
291+
return MatchIterator::EmptyIteratorPair();
292+
}
293+
284294
const size_t text_length = key.size();
285295

286296
for (size_t i = 0; i < text_length; ++i) {
@@ -304,6 +314,10 @@ class Dictionary final {
304314
}
305315

306316
MatchIterator::MatchIteratorPair GetAllItems(const uint64_t state) const {
317+
if (!state) {
318+
return MatchIterator::EmptyIteratorPair();
319+
}
320+
307321
std::vector<unsigned char> traversal_stack;
308322
traversal_stack.reserve(1024);
309323

@@ -351,6 +365,10 @@ class Dictionary final {
351365

352366
MatchIterator::MatchIteratorPair GetNear(const uint64_t state, const std::string& key,
353367
const size_t minimum_prefix_length, const bool greedy = false) const {
368+
if (!state) {
369+
return MatchIterator::EmptyIteratorPair();
370+
}
371+
354372
auto data = std::make_shared<matching::NearMatching<>>(
355373
matching::NearMatching<>::FromSingleFsa(fsa_, state, key, minimum_prefix_length, greedy));
356374

@@ -361,6 +379,10 @@ class Dictionary final {
361379
MatchIterator::MatchIteratorPair GetFuzzy(const uint64_t state, const std::string& query,
362380
const int32_t max_edit_distance,
363381
const size_t minimum_exact_prefix = 2) const {
382+
if (!state) {
383+
return MatchIterator::EmptyIteratorPair();
384+
}
385+
364386
auto data = std::make_shared<matching::FuzzyMatching<>>(
365387
matching::FuzzyMatching<>::FromSingleFsa(fsa_, state, query, max_edit_distance, minimum_exact_prefix));
366388

@@ -369,6 +391,9 @@ class Dictionary final {
369391
}
370392

371393
MatchIterator::MatchIteratorPair GetPrefixCompletion(const uint64_t state, const std::string& query) const {
394+
if (!state) {
395+
return MatchIterator::EmptyIteratorPair();
396+
}
372397
auto data = std::make_shared<matching::PrefixCompletionMatching<>>(
373398
matching::PrefixCompletionMatching<>::FromSingleFsa(fsa_, state, query));
374399

@@ -380,6 +405,10 @@ class Dictionary final {
380405

381406
MatchIterator::MatchIteratorPair GetPrefixCompletion(const uint64_t state, const std::string& query,
382407
size_t top_n) const {
408+
if (!state) {
409+
return MatchIterator::EmptyIteratorPair();
410+
}
411+
383412
auto data = std::make_shared<matching::PrefixCompletionMatching<>>(
384413
matching::PrefixCompletionMatching<>::FromSingleFsa(fsa_, state, query));
385414

@@ -405,6 +434,9 @@ class Dictionary final {
405434

406435
MatchIterator::MatchIteratorPair GetMultiwordCompletion(const uint64_t state, const std::string& query,
407436
const unsigned char multiword_separator) const {
437+
if (!state) {
438+
return MatchIterator::EmptyIteratorPair();
439+
}
408440
auto data = std::make_shared<matching::MultiwordCompletionMatching<>>(
409441
matching::MultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, multiword_separator));
410442

@@ -417,6 +449,10 @@ class Dictionary final {
417449
MatchIterator::MatchIteratorPair GetMultiwordCompletion(const uint64_t state, const std::string& query,
418450
const size_t top_n,
419451
const unsigned char multiword_separator) const {
452+
if (!state) {
453+
return MatchIterator::EmptyIteratorPair();
454+
}
455+
420456
auto data = std::make_shared<matching::MultiwordCompletionMatching<>>(
421457
matching::MultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, multiword_separator));
422458

@@ -444,6 +480,9 @@ class Dictionary final {
444480
const int32_t max_edit_distance,
445481
const size_t minimum_exact_prefix,
446482
const unsigned char multiword_separator) const {
483+
if (!state) {
484+
return MatchIterator::EmptyIteratorPair();
485+
}
447486
auto data = std::make_shared<matching::FuzzyMultiwordCompletionMatching<>>(
448487
matching::FuzzyMultiwordCompletionMatching<>::FromSingleFsa(fsa_, state, query, max_edit_distance,
449488
minimum_exact_prefix, multiword_separator));
@@ -456,7 +495,7 @@ class Dictionary final {
456495
};
457496

458497
// shared pointer
459-
typedef std::shared_ptr<Dictionary> dictionary_t;
498+
using dictionary_t = std::shared_ptr<Dictionary>;
460499

461500
} /* namespace dictionary */
462501
} /* namespace keyvi */

keyvi/include/keyvi/dictionary/dictionary_properties.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ class DictionaryProperties {
126126

127127
uint64_t GetNumberOfKeys() const { return number_of_keys_; }
128128

129+
uint64_t GetNumberOfStates() const { return number_of_states_; }
130+
129131
fsa::internal::value_store_t GetValueStoreType() const { return value_store_type_; }
130132

131133
size_t GetSparseArraySize() const { return sparse_array_size_; }

keyvi/include/keyvi/dictionary/fsa/automata.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,12 @@ class Automata final {
9797
const boost::interprocess::map_options_t map_options =
9898
internal::MemoryMapFlags::FSAGetMemoryMapOptions(loading_strategy);
9999

100-
TRACE("labels start offset: %d", dictionary_properties_.GetPersistenceOffset());
100+
TRACE("labels start offset: %d", dictionary_properties_->GetPersistenceOffset());
101101
labels_region_ = boost::interprocess::mapped_region(file_mapping_, boost::interprocess::read_only,
102102
dictionary_properties_->GetPersistenceOffset(),
103103
dictionary_properties_->GetSparseArraySize(), 0, map_options);
104104

105-
TRACE("transitions start offset: %d", dictionary_properties_.GetTransitionsOffset());
105+
TRACE("transitions start offset: %d", dictionary_properties_->GetTransitionsOffset());
106106
transitions_region_ = boost::interprocess::mapped_region(
107107
file_mapping_, boost::interprocess::read_only, dictionary_properties_->GetTransitionsOffset(),
108108
dictionary_properties_->GetTransitionsSize(), 0, map_options);
@@ -129,9 +129,13 @@ class Automata final {
129129
/**
130130
* Get the start(root) stage of the FSA
131131
*
132+
* In case of an empty FSA, returns 0.
133+
*
132134
* @return index of root state.
133135
*/
134-
uint64_t GetStartState() const { return dictionary_properties_->GetStartState(); }
136+
uint64_t GetStartState() const {
137+
return dictionary_properties_->GetNumberOfStates() != 0 ? dictionary_properties_->GetStartState() : 0;
138+
}
135139

136140
uint64_t GetNumberOfKeys() const { return dictionary_properties_->GetNumberOfKeys(); }
137141

keyvi/include/keyvi/dictionary/fsa/generator.h

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -257,20 +257,25 @@ class Generator final {
257257

258258
state_ = generator_state::FINALIZING;
259259

260-
// Consume all but stack[0].
261-
ConsumeStack(0);
260+
if (number_of_keys_added_ > 0) {
261+
// Consume all but stack[0].
262+
ConsumeStack(0);
262263

263-
// handling of last State.
264-
internal::UnpackedState<PersistenceT>* unpacked_state = stack_->Get(0);
264+
// handling of last State.
265+
internal::UnpackedState<PersistenceT>* unpacked_state = stack_->Get(0);
265266

266-
start_state_ = builder_->PersistState(unpacked_state);
267+
start_state_ = builder_->PersistState(unpacked_state);
267268

268-
TRACE("wrote start state at %d", start_state_);
269-
TRACE("Check first transition: %d/%d %s", (*unpacked_state)[0].label,
270-
persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label),
271-
(*unpacked_state)[0].label == persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label)
272-
? "OK"
273-
: "BROKEN");
269+
TRACE("wrote start state at %d", start_state_);
270+
TRACE("Check first transition: %d/%d %s", (*unpacked_state)[0].label,
271+
persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label),
272+
(*unpacked_state)[0].label == persistence_->ReadTransitionLabel(start_state_ + (*unpacked_state)[0].label)
273+
? "OK"
274+
: "BROKEN");
275+
} else {
276+
// empty dictionaries have start_state_ = 1 for backwards compatibility
277+
start_state_ = 1;
278+
}
274279

275280
// free structures that are not needed anymore
276281
delete stack_;

keyvi/include/keyvi/dictionary/fsa/internal/sparse_array_persistence.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ class SparseArrayPersistence final {
6868
labels_ = new unsigned char[buffer_size_];
6969
std::memset(labels_, 0, buffer_size_);
7070

71+
// GH#360 pre-initialize the 1st 2 label positions with a value >1 to prevent illegal zero-bytes in empty
72+
// dictionaries
73+
labels_[0] = 42;
74+
labels_[1] = 42;
75+
7176
temporary_directory_ = temporary_path;
7277
temporary_directory_ /= boost::filesystem::unique_path("dictionary-fsa-%%%%-%%%%-%%%%-%%%%");
7378
boost::filesystem::create_directory(temporary_directory_);
@@ -195,7 +200,9 @@ class SparseArrayPersistence final {
195200
TRACE("Wrote Transitions, stream at %d", stream.tellp());
196201
}
197202

198-
size_t GetChunkSizeExternalTransitions() const { return transitions_extern_->GetChunkSize(); }
203+
size_t GetChunkSizeExternalTransitions() const {
204+
return transitions_extern_->GetChunkSize();
205+
}
199206

200207
uint32_t GetVersion() const;
201208

keyvi/include/keyvi/dictionary/matching/near_matching.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class NearMatching final {
8282
*/
8383
static NearMatching FromSingleFsa(const fsa::automata_t& fsa, const uint64_t start_state, const std::string& query,
8484
const size_t minimum_exact_prefix, const bool greedy = false) {
85-
if (query.size() < minimum_exact_prefix) {
85+
if (query.size() < minimum_exact_prefix || start_state == 0) {
8686
return NearMatching();
8787
}
8888

keyvi/include/keyvi/dictionary/secondary_key_dictionary.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ class SecondaryKeyDictionary final {
214214
};
215215

216216
// shared pointer
217-
typedef std::shared_ptr<SecondaryKeyDictionary> secondary_key_dictionary_t;
217+
using secondary_key_dictionary_t = std::shared_ptr<SecondaryKeyDictionary>;
218218

219219
} /* namespace dictionary */
220220
} /* namespace keyvi */

0 commit comments

Comments
 (0)