Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 25fd287

Browse files
committed
Support nested dictionaries in StringDictionary::getRegexpLike.
Signed-off-by: ienkovich <[email protected]>
1 parent 7a9198a commit 25fd287

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

omniscidb/StringDictionary/StringDictionary.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -970,20 +970,30 @@ bool is_regexp_like(const std::string& str,
970970

971971
std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
972972
const char escape,
973-
const size_t generation) const {
974-
CHECK(!base_dict_) << "Not implemented";
973+
int64_t generation) const {
974+
generation = generation >= 0 ? std::min(generation, static_cast<int64_t>(entryCount()))
975+
: static_cast<int64_t>(entryCount());
976+
975977
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
976-
const auto cache_key = std::make_pair(pattern, escape);
978+
const auto cache_key = std::make_tuple(pattern, escape, generation);
977979
const auto it = regex_cache_.find(cache_key);
978980
if (it != regex_cache_.end()) {
979981
return it->second;
980982
}
983+
981984
std::vector<int32_t> result;
985+
if (base_dict_) {
986+
result = base_dict_->getRegexpLike(
987+
pattern, escape, std::min(generation, base_generation_));
988+
if (generation < base_generation_) {
989+
return result;
990+
}
991+
}
992+
982993
std::vector<std::thread> workers;
983994
int worker_count = cpu_threads();
984995
CHECK_GT(worker_count, 0);
985996
std::vector<std::vector<int32_t>> worker_results(worker_count);
986-
CHECK_LE(generation, str_count_);
987997
for (int worker_idx = 0; worker_idx < worker_count; ++worker_idx) {
988998
workers.emplace_back([&worker_results,
989999
&pattern,
@@ -992,7 +1002,7 @@ std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
9921002
worker_idx,
9931003
worker_count,
9941004
this]() {
995-
for (size_t string_id = worker_idx; string_id < generation;
1005+
for (int string_id = indexToId(worker_idx); string_id < generation;
9961006
string_id += worker_count) {
9971007
const auto str = getStringUnlocked(string_id);
9981008
if (is_regexp_like(str, pattern, escape)) {

omniscidb/StringDictionary/StringDictionary.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class StringDictionary {
134134

135135
std::vector<int32_t> getRegexpLike(const std::string& pattern,
136136
const char escape,
137-
const size_t generation) const;
137+
int64_t generation = -1) const;
138138

139139
std::vector<std::string> copyStrings(int64_t generation = -1) const;
140140

@@ -254,7 +254,8 @@ class StringDictionary {
254254
mutable std::map<std::tuple<std::string, bool, bool, char, int64_t>,
255255
std::vector<int32_t>>
256256
like_cache_;
257-
mutable std::map<std::pair<std::string, char>, std::vector<int32_t>> regex_cache_;
257+
mutable std::map<std::tuple<std::string, char, int64_t>, std::vector<int32_t>>
258+
regex_cache_;
258259
mutable std::map<std::string, int32_t> equal_cache_;
259260
mutable DictionaryCache<std::string, compare_cache_value_t> compare_cache_;
260261
mutable std::shared_ptr<std::vector<std::string>> strings_cache_;

omniscidb/Tests/StringDictionaryTest.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,36 @@ TEST(NestedStringDictionary, GetCompare) {
884884
sortAndCompare(dict2->getCompare("str6", "<>", 1), {0});
885885
}
886886

887+
TEST(NestedStringDictionary, GetRegexpLike) {
888+
auto dict1 =
889+
std::make_shared<StringDictionary>(DictRef{-1, 1}, -1, g_cache_string_hash);
890+
ASSERT_EQ(dict1->getOrAdd("str1"), 0);
891+
ASSERT_EQ(dict1->getOrAdd("str2"), 1);
892+
ASSERT_EQ(dict1->getOrAdd("str3"), 2);
893+
894+
ASSERT_EQ(dict1->getRegexpLike("str.", '\\'), std::vector<int>({0, 1, 2}));
895+
ASSERT_EQ(dict1->getRegexpLike("str.", '\\', 2), std::vector<int>({0, 1}));
896+
ASSERT_EQ(dict1->getRegexpLike("str[124]", '\\'), std::vector<int>({0, 1}));
897+
898+
auto dict2 = std::make_shared<StringDictionary>(dict1, -1, g_cache_string_hash);
899+
ASSERT_EQ(dict1->getOrAdd("str4"), 3);
900+
ASSERT_EQ(dict2->getOrAdd("str5"), 3);
901+
ASSERT_EQ(dict2->getOrAdd("str6"), 4);
902+
903+
ASSERT_EQ(dict1->getRegexpLike("str.", '\\'), std::vector<int>({0, 1, 2, 3}));
904+
ASSERT_EQ(dict1->getRegexpLike("str.", '\\', 2), std::vector<int>({0, 1}));
905+
ASSERT_EQ(dict1->getRegexpLike("str[124]", '\\'), std::vector<int>({0, 1, 3}));
906+
907+
ASSERT_EQ(dict2->getRegexpLike("str.", '\\'), std::vector<int>({0, 1, 2, 3, 4}));
908+
ASSERT_EQ(dict2->getRegexpLike("str.", '\\', 2), std::vector<int>({0, 1}));
909+
ASSERT_EQ(dict2->getRegexpLike("str[12467]", '\\'), std::vector<int>({0, 1, 4}));
910+
911+
ASSERT_EQ(dict1->getOrAdd("str6"), 4);
912+
ASSERT_EQ(dict2->getOrAdd("str7"), 5);
913+
914+
ASSERT_EQ(dict2->getRegexpLike("str[12467]", '\\'), std::vector<int>({0, 1, 4, 5}));
915+
}
916+
887917
TEST(StringDictionaryProxy, BuildIntersectionTranslationMapToOtherProxy) {
888918
// Use existing dictionary from GetBulk
889919
const DictRef dict_ref1(-1, 1);

0 commit comments

Comments
 (0)