Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 60d60c4

Browse files
committed
Remove StringDictionaryClient and associated code
1 parent 81ac5b3 commit 60d60c4

File tree

5 files changed

+42
-318
lines changed

5 files changed

+42
-318
lines changed

omniscidb/StringDictionary/StringDictionary.cpp

Lines changed: 40 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
#include "OSDependent/omnisci_fs.h"
4242
#include "Shared/sqltypes.h"
4343
#include "Shared/thread_count.h"
44-
#include "StringDictionaryClient.h"
4544
#include "Utils/Regexp.h"
4645
#include "Utils/StringLike.h"
4746

@@ -248,40 +247,14 @@ class MapMaker : public StringDictionary::StringCallback {
248247
};
249248
} // namespace
250249

251-
std::function<int32_t(std::string const&)> StringDictionary::makeLambdaStringToId()
252-
const {
253-
CHECK(isClient());
254-
constexpr size_t big_gen = static_cast<size_t>(std::numeric_limits<size_t>::max());
255-
MapMaker map_maker;
256-
eachStringSerially(big_gen, map_maker);
257-
return [map{map_maker.moveMap()}](std::string const& str) {
258-
auto const itr = map.find(str);
259-
return itr == map.cend() ? INVALID_STR_ID : itr->second;
260-
};
261-
}
262-
263250
// Call serial_callback for each (string/_view, string_id). Must be called serially.
264251
void StringDictionary::eachStringSerially(int64_t const generation,
265252
StringCallback& serial_callback) const {
266-
if (isClient()) {
267-
// copyStrings() is not supported when isClient().
268-
std::string str; // Import buffer. Placing outside of loop should reduce allocations.
269-
size_t const n = std::min(static_cast<size_t>(generation), storageEntryCount());
270-
CHECK_LE(n, static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
271-
for (unsigned id = 0; id < n; ++id) {
272-
{
273-
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
274-
client_->get_string(str, id);
275-
}
276-
serial_callback(str, id);
277-
}
278-
} else {
279-
size_t const n = std::min(static_cast<size_t>(generation), str_count_);
280-
CHECK_LE(n, static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
281-
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
282-
for (unsigned id = 0; id < n; ++id) {
283-
serial_callback(getStringFromStorageFast(static_cast<int>(id)), id);
284-
}
253+
size_t const n = std::min(static_cast<size_t>(generation), str_count_);
254+
CHECK_LE(n, static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
255+
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
256+
for (unsigned id = 0; id < n; ++id) {
257+
serial_callback(getStringFromStorageFast(static_cast<int>(id)), id);
285258
}
286259
}
287260

@@ -343,19 +316,8 @@ size_t StringDictionary::getNumStringsFromStorage(
343316
return guess + (min_bound > guess ? 1 : 0);
344317
}
345318

346-
StringDictionary::StringDictionary(const LeafHostInfo& host, const DictRef dict_ref)
347-
: dict_ref_(dict_ref)
348-
, folder_("DB_" + std::to_string(dict_ref.dbId) + "_DICT_" +
349-
std::to_string(dict_ref.dictId))
350-
, strings_cache_(nullptr)
351-
, client_(new StringDictionaryClient(host, dict_ref, true))
352-
, client_no_timeout_(new StringDictionaryClient(host, dict_ref, false)) {}
353-
354319
StringDictionary::~StringDictionary() noexcept {
355320
free(CANARY_BUFFER);
356-
if (isClient()) {
357-
return;
358-
}
359321
if (payload_map_) {
360322
if (!isTemp_) {
361323
CHECK(offset_map_);
@@ -374,13 +336,42 @@ StringDictionary::~StringDictionary() noexcept {
374336
}
375337

376338
int32_t StringDictionary::getOrAdd(const std::string_view& str) noexcept {
377-
if (isClient()) {
378-
std::vector<int32_t> string_ids;
379-
client_->get_or_add_bulk(string_ids, std::vector<std::string>{std::string(str)});
380-
CHECK_EQ(size_t(1), string_ids.size());
381-
return string_ids.front();
339+
// @TODO(wei) treat empty string as NULL for now
340+
if (str.size() == 0) {
341+
return inline_int_null_value<int32_t>();
342+
}
343+
CHECK(str.size() <= MAX_STRLEN);
344+
const string_dict_hash_t hash = hash_string(str);
345+
{
346+
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
347+
const uint32_t bucket = computeBucket(hash, str, string_id_string_dict_hash_table_);
348+
if (string_id_string_dict_hash_table_[bucket] != INVALID_STR_ID) {
349+
return string_id_string_dict_hash_table_[bucket];
350+
}
351+
}
352+
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
353+
if (fillRateIsHigh(str_count_)) {
354+
// resize when more than 50% is full
355+
increaseHashTableCapacity();
382356
}
383-
return getOrAddImpl(str);
357+
// need to recalculate the bucket in case it changed before
358+
// we got the lock
359+
const uint32_t bucket = computeBucket(hash, str, string_id_string_dict_hash_table_);
360+
if (string_id_string_dict_hash_table_[bucket] == INVALID_STR_ID) {
361+
CHECK_LT(str_count_, MAX_STRCOUNT)
362+
<< "Maximum number (" << str_count_
363+
<< ") of Dictionary encoded Strings reached for this column, offset path "
364+
"for column is "
365+
<< offsets_path_;
366+
appendToStorage(str);
367+
string_id_string_dict_hash_table_[bucket] = static_cast<int32_t>(str_count_);
368+
if (materialize_hashes_) {
369+
hash_cache_[str_count_] = hash;
370+
}
371+
++str_count_;
372+
invalidateInvertedIndex();
373+
}
374+
return string_id_string_dict_hash_table_[bucket];
384375
}
385376

386377
namespace {
@@ -434,11 +425,6 @@ template <class String>
434425
void StringDictionary::getOrAddBulkArray(
435426
const std::vector<std::vector<String>>& string_array_vec,
436427
std::vector<std::vector<int32_t>>& ids_array_vec) {
437-
if (client_no_timeout_) {
438-
client_no_timeout_->get_or_add_bulk_array(ids_array_vec, string_array_vec);
439-
return;
440-
}
441-
442428
ids_array_vec.resize(string_array_vec.size());
443429
for (size_t i = 0; i < string_array_vec.size(); i++) {
444430
auto& strings = string_array_vec[i];
@@ -734,13 +720,6 @@ template void StringDictionary::getOrAddBulk(
734720
template <class String>
735721
int32_t StringDictionary::getIdOfString(const String& str) const {
736722
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
737-
if (isClient()) {
738-
if constexpr (std::is_same_v<std::string, std::decay_t<String>>) {
739-
return client_->get(str);
740-
} else {
741-
return client_->get(std::string(str));
742-
}
743-
}
744723
return getUnlocked(str);
745724
}
746725

@@ -756,11 +735,6 @@ int32_t StringDictionary::getUnlocked(const std::string_view sv) const noexcept
756735

757736
std::string StringDictionary::getString(int32_t string_id) const {
758737
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
759-
if (isClient()) {
760-
std::string ret;
761-
client_->get_string(ret, string_id);
762-
return ret;
763-
}
764738
return getStringUnlocked(string_id);
765739
}
766740

@@ -772,17 +746,13 @@ std::string StringDictionary::getStringUnlocked(int32_t string_id) const noexcep
772746
std::pair<char*, size_t> StringDictionary::getStringBytes(
773747
int32_t string_id) const noexcept {
774748
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
775-
CHECK(!isClient());
776749
CHECK_LE(0, string_id);
777750
CHECK_LT(string_id, static_cast<int32_t>(str_count_));
778751
return getStringBytesChecked(string_id);
779752
}
780753

781754
size_t StringDictionary::storageEntryCount() const {
782755
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
783-
if (isClient()) {
784-
return client_->storage_entry_count();
785-
}
786756
return str_count_;
787757
}
788758

@@ -818,9 +788,6 @@ std::vector<int32_t> StringDictionary::getLike(const std::string& pattern,
818788
const char escape,
819789
const size_t generation) const {
820790
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
821-
if (isClient()) {
822-
return client_->get_like(pattern, icase, is_simple, escape, generation);
823-
}
824791
const auto cache_key = std::make_tuple(pattern, icase, is_simple, escape);
825792
const auto it = like_cache_.find(cache_key);
826793
if (it != like_cache_.end()) {
@@ -929,9 +896,6 @@ std::vector<int32_t> StringDictionary::getCompare(const std::string& pattern,
929896
const std::string& comp_operator,
930897
const size_t generation) {
931898
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
932-
if (isClient()) {
933-
return client_->get_compare(pattern, comp_operator, generation);
934-
}
935899
std::vector<int32_t> ret;
936900
if (str_count_ == 0) {
937901
return ret;
@@ -1090,9 +1054,6 @@ std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
10901054
const char escape,
10911055
const size_t generation) const {
10921056
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
1093-
if (isClient()) {
1094-
return client_->get_regexp_like(pattern, escape, generation);
1095-
}
10961057
const auto cache_key = std::make_pair(pattern, escape);
10971058
const auto it = regex_cache_.find(cache_key);
10981059
if (it != regex_cache_.end()) {
@@ -1135,11 +1096,6 @@ std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
11351096

11361097
std::vector<std::string> StringDictionary::copyStrings() const {
11371098
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
1138-
if (isClient()) {
1139-
// TODO(miyu): support remote string dictionary
1140-
throw std::runtime_error(
1141-
"copying dictionaries from remote server is not supported yet.");
1142-
}
11431099

11441100
if (strings_cache_) {
11451101
return *strings_cache_;
@@ -1245,45 +1201,6 @@ void StringDictionary::increaseHashTableCapacityFromStorageAndMemory(
12451201
string_id_string_dict_hash_table_.swap(new_str_ids);
12461202
}
12471203

1248-
int32_t StringDictionary::getOrAddImpl(const std::string_view& str) noexcept {
1249-
// @TODO(wei) treat empty string as NULL for now
1250-
if (str.size() == 0) {
1251-
return inline_int_null_value<int32_t>();
1252-
}
1253-
CHECK(str.size() <= MAX_STRLEN);
1254-
const string_dict_hash_t hash = hash_string(str);
1255-
{
1256-
mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
1257-
const uint32_t bucket = computeBucket(hash, str, string_id_string_dict_hash_table_);
1258-
if (string_id_string_dict_hash_table_[bucket] != INVALID_STR_ID) {
1259-
return string_id_string_dict_hash_table_[bucket];
1260-
}
1261-
}
1262-
mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
1263-
if (fillRateIsHigh(str_count_)) {
1264-
// resize when more than 50% is full
1265-
increaseHashTableCapacity();
1266-
}
1267-
// need to recalculate the bucket in case it changed before
1268-
// we got the lock
1269-
const uint32_t bucket = computeBucket(hash, str, string_id_string_dict_hash_table_);
1270-
if (string_id_string_dict_hash_table_[bucket] == INVALID_STR_ID) {
1271-
CHECK_LT(str_count_, MAX_STRCOUNT)
1272-
<< "Maximum number (" << str_count_
1273-
<< ") of Dictionary encoded Strings reached for this column, offset path "
1274-
"for column is "
1275-
<< offsets_path_;
1276-
appendToStorage(str);
1277-
string_id_string_dict_hash_table_[bucket] = static_cast<int32_t>(str_count_);
1278-
if (materialize_hashes_) {
1279-
hash_cache_[str_count_] = hash;
1280-
}
1281-
++str_count_;
1282-
invalidateInvertedIndex();
1283-
}
1284-
return string_id_string_dict_hash_table_[bucket];
1285-
}
1286-
12871204
std::string StringDictionary::getStringChecked(const int string_id) const noexcept {
12881205
const auto str_canary = getStringFromStorage(string_id);
12891206
CHECK(!str_canary.canary);
@@ -1569,13 +1486,6 @@ void StringDictionary::invalidateInvertedIndex() noexcept {
15691486
// uncheckpointed data be written to disk. Only option is a table truncate, and thats
15701487
// assuming not replicated dictionary
15711488
bool StringDictionary::checkpoint() noexcept {
1572-
if (isClient()) {
1573-
try {
1574-
return client_->checkpoint();
1575-
} catch (...) {
1576-
return false;
1577-
}
1578-
}
15791489
CHECK(!isTemp_);
15801490
bool ret = true;
15811491
ret = ret &&
@@ -1587,10 +1497,6 @@ bool StringDictionary::checkpoint() noexcept {
15871497
return ret;
15881498
}
15891499

1590-
bool StringDictionary::isClient() const noexcept {
1591-
return static_cast<bool>(client_);
1592-
}
1593-
15941500
void StringDictionary::buildSortedCache() {
15951501
// This method is not thread-safe.
15961502
const auto cur_cache_size = sorted_cache.size();
@@ -1797,15 +1703,6 @@ size_t StringDictionary::buildDictionaryTranslationMap(
17971703
return 0;
17981704
}
17991705

1800-
// If here we should should have local dictionaries.
1801-
// Note case of transient source dictionaries that aren't
1802-
// seen as remote (they have no client_no_timeout_) is covered
1803-
// by early bail above on num_source_strings == 0
1804-
if (dest_dict->client_no_timeout_) {
1805-
throw std::runtime_error(
1806-
"Cannot translate between a local source and remote destination dictionary.");
1807-
}
1808-
18091706
// Sort this/source dict and dest dict on folder_ so we can enforce
18101707
// lock ordering and avoid deadlocks
18111708

@@ -1920,15 +1817,3 @@ size_t StringDictionary::buildDictionaryTranslationMap(
19201817
}
19211818
return total_num_strings_not_translated;
19221819
}
1923-
1924-
void translate_string_ids(std::vector<int32_t>& dest_ids,
1925-
const LeafHostInfo& dict_server_host,
1926-
const DictRef dest_dict_ref,
1927-
const std::vector<int32_t>& source_ids,
1928-
const DictRef source_dict_ref,
1929-
const int32_t dest_generation) {
1930-
DictRef temp_dict_ref(-1, -1);
1931-
StringDictionaryClient string_client(dict_server_host, temp_dict_ref, false);
1932-
string_client.translate_string_ids(
1933-
dest_ids, dest_dict_ref, source_ids, source_dict_ref, dest_generation);
1934-
}

omniscidb/StringDictionary/StringDictionary.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@
3131

3232
extern bool g_enable_stringdict_parallel;
3333

34-
class StringDictionaryClient;
35-
class LeafHostInfo;
36-
3734
class DictPayloadUnavailable : public std::runtime_error {
3835
public:
3936
DictPayloadUnavailable() : std::runtime_error("DictPayloadUnavailable") {}
@@ -53,7 +50,6 @@ class StringDictionary {
5350
const bool recover,
5451
const bool materializeHashes = false,
5552
size_t initial_capacity = 256);
56-
StringDictionary(const LeafHostInfo& host, const DictRef dict_ref);
5753
~StringDictionary() noexcept;
5854

5955
int32_t getDbId() const noexcept;
@@ -70,7 +66,6 @@ class StringDictionary {
7066
// Each std::string const& (if isClient()) or std::string_view (if !isClient())
7167
// plus string_id is passed to the callback functor.
7268
void eachStringSerially(int64_t const generation, StringCallback&) const;
73-
std::function<int32_t(std::string const&)> makeLambdaStringToId() const;
7469
friend class StringLocalCallback;
7570

7671
int32_t getOrAdd(const std::string_view& str) noexcept;
@@ -126,8 +121,6 @@ class StringDictionary {
126121

127122
bool checkpoint() noexcept;
128123

129-
bool isClient() const noexcept;
130-
131124
/**
132125
* @brief Populates provided \p dest_ids vector with string ids corresponding to given
133126
* source strings
@@ -196,7 +189,6 @@ class StringDictionary {
196189
const std::vector<String>& input_strings,
197190
const std::vector<size_t>& string_memory_ids,
198191
const std::vector<string_dict_hash_t>& input_strings_hashes) noexcept;
199-
int32_t getOrAddImpl(const std::string_view& str) noexcept;
200192
template <class String>
201193
void hashStrings(const std::vector<String>& string_vec,
202194
std::vector<string_dict_hash_t>& hashes) const noexcept;
@@ -272,20 +264,11 @@ class StringDictionary {
272264
mutable std::map<std::string, int32_t> equal_cache_;
273265
mutable DictionaryCache<std::string, compare_cache_value_t> compare_cache_;
274266
mutable std::shared_ptr<std::vector<std::string>> strings_cache_;
275-
mutable std::unique_ptr<StringDictionaryClient> client_;
276-
mutable std::unique_ptr<StringDictionaryClient> client_no_timeout_;
277267

278268
char* CANARY_BUFFER{nullptr};
279269
size_t canary_buffer_size = 0;
280270
};
281271

282272
int32_t truncate_to_generation(const int32_t id, const size_t generation);
283273

284-
void translate_string_ids(std::vector<int32_t>& dest_ids,
285-
const LeafHostInfo& dict_server_host,
286-
const DictRef dest_dict_ref,
287-
const std::vector<int32_t>& source_ids,
288-
const DictRef source_dict_ref,
289-
const int32_t dest_generation);
290-
291274
#endif // STRINGDICTIONARY_STRINGDICTIONARY_H

0 commit comments

Comments
 (0)