Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 1d36af9

Browse files
committed
Support nested dictionaries in StringDictionary::getLike.
Signed-off-by: ienkovich <[email protected]>
1 parent 3a634b0 commit 1d36af9

File tree

3 files changed

+49
-7
lines changed

3 files changed

+49
-7
lines changed

omniscidb/StringDictionary/StringDictionary.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -693,20 +693,30 @@ std::vector<int32_t> StringDictionary::getLike(const std::string& pattern,
693693
const bool icase,
694694
const bool is_simple,
695695
const char escape,
696-
const size_t generation) const {
697-
CHECK(!base_dict_) << "Not implemented";
696+
int64_t generation) const {
697+
generation = generation >= 0 ? std::min(generation, static_cast<int64_t>(entryCount()))
698+
: static_cast<int64_t>(entryCount());
698699
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
699-
const auto cache_key = std::make_tuple(pattern, icase, is_simple, escape);
700+
const auto cache_key = std::make_tuple(pattern, icase, is_simple, escape, generation);
700701
const auto it = like_cache_.find(cache_key);
701702
if (it != like_cache_.end()) {
702703
return it->second;
703704
}
705+
704706
std::vector<int32_t> result;
707+
if (base_dict_) {
708+
result = base_dict_->getLike(
709+
pattern, icase, is_simple, escape, std::min(generation, base_generation_));
710+
}
711+
712+
if (generation < base_generation_) {
713+
return result;
714+
}
715+
705716
std::vector<std::thread> workers;
706717
int worker_count = cpu_threads();
707718
CHECK_GT(worker_count, 0);
708719
std::vector<std::vector<int32_t>> worker_results(worker_count);
709-
CHECK_LE(generation, str_count_);
710720
for (int worker_idx = 0; worker_idx < worker_count; ++worker_idx) {
711721
workers.emplace_back([&worker_results,
712722
&pattern,
@@ -717,7 +727,7 @@ std::vector<int32_t> StringDictionary::getLike(const std::string& pattern,
717727
worker_idx,
718728
worker_count,
719729
this]() {
720-
for (size_t string_id = worker_idx; string_id < generation;
730+
for (int string_id = indexToId(worker_idx); string_id < generation;
721731
string_id += worker_count) {
722732
const auto str = getStringUnlocked(string_id);
723733
if (is_like(str, pattern, icase, is_simple, escape)) {

omniscidb/StringDictionary/StringDictionary.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class StringDictionary {
126126
const bool icase,
127127
const bool is_simple,
128128
const char escape,
129-
const size_t generation) const;
129+
int64_t generation = -1) const;
130130

131131
std::vector<int32_t> getCompare(const std::string& pattern,
132132
const std::string& comp_operator,
@@ -251,7 +251,8 @@ class StringDictionary {
251251
size_t payload_file_size_;
252252
size_t payload_file_off_;
253253
mutable mapd_shared_mutex rw_mutex_;
254-
mutable std::map<std::tuple<std::string, bool, bool, char>, std::vector<int32_t>>
254+
mutable std::map<std::tuple<std::string, bool, bool, char, int64_t>,
255+
std::vector<int32_t>>
255256
like_cache_;
256257
mutable std::map<std::pair<std::string, char>, std::vector<int32_t>> regex_cache_;
257258
mutable std::map<std::string, int32_t> equal_cache_;

omniscidb/Tests/StringDictionaryTest.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,37 @@ TEST(NestedStringDictionary, CopyStringsParallel) {
741741
ASSERT_EQ(dict1->copyStrings(), strings2);
742742
}
743743

744+
TEST(NestedStringDictionary, IsLike) {
745+
auto dict1 =
746+
std::make_shared<StringDictionary>(DictRef{-1, 1}, -1, g_cache_string_hash);
747+
ASSERT_EQ(dict1->getOrAdd("str1"), 0);
748+
ASSERT_EQ(dict1->getOrAdd("str2"), 1);
749+
ASSERT_EQ(dict1->getOrAdd("str3"), 2);
750+
751+
ASSERT_EQ(dict1->getLike("str%", true, false, '\\'), std::vector<int>({0, 1, 2}));
752+
ASSERT_EQ(dict1->getLike("str%", true, false, '\\', 2), std::vector<int>({0, 1}));
753+
ASSERT_EQ(dict1->getLike("str[124]", true, false, '\\'), std::vector<int>({0, 1}));
754+
755+
auto dict2 = std::make_shared<StringDictionary>(dict1, -1, g_cache_string_hash);
756+
ASSERT_EQ(dict1->getOrAdd("str4"), 3);
757+
ASSERT_EQ(dict2->getOrAdd("str5"), 3);
758+
ASSERT_EQ(dict2->getOrAdd("str6"), 4);
759+
760+
ASSERT_EQ(dict1->getLike("str%", true, false, '\\'), std::vector<int>({0, 1, 2, 3}));
761+
ASSERT_EQ(dict1->getLike("str%", true, false, '\\', 2), std::vector<int>({0, 1}));
762+
ASSERT_EQ(dict1->getLike("str[124]", true, false, '\\'), std::vector<int>({0, 1, 3}));
763+
764+
ASSERT_EQ(dict2->getLike("str%", true, false, '\\'), std::vector<int>({0, 1, 2, 3, 4}));
765+
ASSERT_EQ(dict2->getLike("str%", true, false, '\\', 2), std::vector<int>({0, 1}));
766+
ASSERT_EQ(dict2->getLike("str[12467]", true, false, '\\'), std::vector<int>({0, 1, 4}));
767+
768+
ASSERT_EQ(dict1->getOrAdd("str6"), 4);
769+
ASSERT_EQ(dict2->getOrAdd("str7"), 5);
770+
771+
ASSERT_EQ(dict2->getLike("str[12467]", true, false, '\\'),
772+
std::vector<int>({0, 1, 4, 5}));
773+
}
774+
744775
TEST(StringDictionaryProxy, BuildIntersectionTranslationMapToOtherProxy) {
745776
// Use existing dictionary from GetBulk
746777
const DictRef dict_ref1(-1, 1);

0 commit comments

Comments
 (0)