41
41
#include " OSDependent/omnisci_fs.h"
42
42
#include " Shared/sqltypes.h"
43
43
#include " Shared/thread_count.h"
44
- #include " StringDictionaryClient.h"
45
44
#include " Utils/Regexp.h"
46
45
#include " Utils/StringLike.h"
47
46
@@ -248,40 +247,14 @@ class MapMaker : public StringDictionary::StringCallback {
248
247
};
249
248
} // namespace
250
249
251
- std::function<int32_t (std::string const &)> StringDictionary::makeLambdaStringToId ()
252
- const {
253
- CHECK (isClient ());
254
- constexpr size_t big_gen = static_cast <size_t >(std::numeric_limits<size_t >::max ());
255
- MapMaker map_maker;
256
- eachStringSerially (big_gen, map_maker);
257
- return [map{map_maker.moveMap ()}](std::string const & str) {
258
- auto const itr = map.find (str);
259
- return itr == map.cend () ? INVALID_STR_ID : itr->second ;
260
- };
261
- }
262
-
263
250
// Call serial_callback for each (string/_view, string_id). Must be called serially.
264
251
void StringDictionary::eachStringSerially (int64_t const generation,
265
252
StringCallback& serial_callback) const {
266
- if (isClient ()) {
267
- // copyStrings() is not supported when isClient().
268
- std::string str; // Import buffer. Placing outside of loop should reduce allocations.
269
- size_t const n = std::min (static_cast <size_t >(generation), storageEntryCount ());
270
- CHECK_LE (n, static_cast <size_t >(std::numeric_limits<int32_t >::max ()) + 1 );
271
- for (unsigned id = 0 ; id < n; ++id) {
272
- {
273
- mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
274
- client_->get_string (str, id);
275
- }
276
- serial_callback (str, id);
277
- }
278
- } else {
279
- size_t const n = std::min (static_cast <size_t >(generation), str_count_);
280
- CHECK_LE (n, static_cast <size_t >(std::numeric_limits<int32_t >::max ()) + 1 );
281
- mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
282
- for (unsigned id = 0 ; id < n; ++id) {
283
- serial_callback (getStringFromStorageFast (static_cast <int >(id)), id);
284
- }
253
+ size_t const n = std::min (static_cast <size_t >(generation), str_count_);
254
+ CHECK_LE (n, static_cast <size_t >(std::numeric_limits<int32_t >::max ()) + 1 );
255
+ mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
256
+ for (unsigned id = 0 ; id < n; ++id) {
257
+ serial_callback (getStringFromStorageFast (static_cast <int >(id)), id);
285
258
}
286
259
}
287
260
@@ -343,19 +316,8 @@ size_t StringDictionary::getNumStringsFromStorage(
343
316
return guess + (min_bound > guess ? 1 : 0 );
344
317
}
345
318
346
- StringDictionary::StringDictionary (const LeafHostInfo& host, const DictRef dict_ref)
347
- : dict_ref_(dict_ref)
348
- , folder_(" DB_" + std::to_string(dict_ref.dbId) + " _DICT_" +
349
- std::to_string (dict_ref.dictId))
350
- , strings_cache_(nullptr )
351
- , client_(new StringDictionaryClient(host, dict_ref, true ))
352
- , client_no_timeout_(new StringDictionaryClient(host, dict_ref, false )) {}
353
-
354
319
StringDictionary::~StringDictionary () noexcept {
355
320
free (CANARY_BUFFER);
356
- if (isClient ()) {
357
- return ;
358
- }
359
321
if (payload_map_) {
360
322
if (!isTemp_) {
361
323
CHECK (offset_map_);
@@ -374,13 +336,42 @@ StringDictionary::~StringDictionary() noexcept {
374
336
}
375
337
376
338
int32_t StringDictionary::getOrAdd (const std::string_view& str) noexcept {
377
- if (isClient ()) {
378
- std::vector<int32_t > string_ids;
379
- client_->get_or_add_bulk (string_ids, std::vector<std::string>{std::string (str)});
380
- CHECK_EQ (size_t (1 ), string_ids.size ());
381
- return string_ids.front ();
339
+ // @TODO(wei) treat empty string as NULL for now
340
+ if (str.size () == 0 ) {
341
+ return inline_int_null_value<int32_t >();
342
+ }
343
+ CHECK (str.size () <= MAX_STRLEN);
344
+ const string_dict_hash_t hash = hash_string (str);
345
+ {
346
+ mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
347
+ const uint32_t bucket = computeBucket (hash, str, string_id_string_dict_hash_table_);
348
+ if (string_id_string_dict_hash_table_[bucket] != INVALID_STR_ID) {
349
+ return string_id_string_dict_hash_table_[bucket];
350
+ }
351
+ }
352
+ mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
353
+ if (fillRateIsHigh (str_count_)) {
354
+ // resize when more than 50% is full
355
+ increaseHashTableCapacity ();
382
356
}
383
- return getOrAddImpl (str);
357
+ // need to recalculate the bucket in case it changed before
358
+ // we got the lock
359
+ const uint32_t bucket = computeBucket (hash, str, string_id_string_dict_hash_table_);
360
+ if (string_id_string_dict_hash_table_[bucket] == INVALID_STR_ID) {
361
+ CHECK_LT (str_count_, MAX_STRCOUNT)
362
+ << " Maximum number (" << str_count_
363
+ << " ) of Dictionary encoded Strings reached for this column, offset path "
364
+ " for column is "
365
+ << offsets_path_;
366
+ appendToStorage (str);
367
+ string_id_string_dict_hash_table_[bucket] = static_cast <int32_t >(str_count_);
368
+ if (materialize_hashes_) {
369
+ hash_cache_[str_count_] = hash;
370
+ }
371
+ ++str_count_;
372
+ invalidateInvertedIndex ();
373
+ }
374
+ return string_id_string_dict_hash_table_[bucket];
384
375
}
385
376
386
377
namespace {
@@ -434,11 +425,6 @@ template <class String>
434
425
void StringDictionary::getOrAddBulkArray (
435
426
const std::vector<std::vector<String>>& string_array_vec,
436
427
std::vector<std::vector<int32_t >>& ids_array_vec) {
437
- if (client_no_timeout_) {
438
- client_no_timeout_->get_or_add_bulk_array (ids_array_vec, string_array_vec);
439
- return ;
440
- }
441
-
442
428
ids_array_vec.resize (string_array_vec.size ());
443
429
for (size_t i = 0 ; i < string_array_vec.size (); i++) {
444
430
auto & strings = string_array_vec[i];
@@ -734,13 +720,6 @@ template void StringDictionary::getOrAddBulk(
734
720
template <class String >
735
721
int32_t StringDictionary::getIdOfString (const String& str) const {
736
722
mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
737
- if (isClient ()) {
738
- if constexpr (std::is_same_v<std::string, std::decay_t <String>>) {
739
- return client_->get (str);
740
- } else {
741
- return client_->get (std::string (str));
742
- }
743
- }
744
723
return getUnlocked (str);
745
724
}
746
725
@@ -756,11 +735,6 @@ int32_t StringDictionary::getUnlocked(const std::string_view sv) const noexcept
756
735
757
736
std::string StringDictionary::getString (int32_t string_id) const {
758
737
mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
759
- if (isClient ()) {
760
- std::string ret;
761
- client_->get_string (ret, string_id);
762
- return ret;
763
- }
764
738
return getStringUnlocked (string_id);
765
739
}
766
740
@@ -772,17 +746,13 @@ std::string StringDictionary::getStringUnlocked(int32_t string_id) const noexcep
772
746
std::pair<char *, size_t > StringDictionary::getStringBytes (
773
747
int32_t string_id) const noexcept {
774
748
mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
775
- CHECK (!isClient ());
776
749
CHECK_LE (0 , string_id);
777
750
CHECK_LT (string_id, static_cast <int32_t >(str_count_));
778
751
return getStringBytesChecked (string_id);
779
752
}
780
753
781
754
size_t StringDictionary::storageEntryCount () const {
782
755
mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
783
- if (isClient ()) {
784
- return client_->storage_entry_count ();
785
- }
786
756
return str_count_;
787
757
}
788
758
@@ -818,9 +788,6 @@ std::vector<int32_t> StringDictionary::getLike(const std::string& pattern,
818
788
const char escape,
819
789
const size_t generation) const {
820
790
mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
821
- if (isClient ()) {
822
- return client_->get_like (pattern, icase, is_simple, escape, generation);
823
- }
824
791
const auto cache_key = std::make_tuple (pattern, icase, is_simple, escape);
825
792
const auto it = like_cache_.find (cache_key);
826
793
if (it != like_cache_.end ()) {
@@ -929,9 +896,6 @@ std::vector<int32_t> StringDictionary::getCompare(const std::string& pattern,
929
896
const std::string& comp_operator,
930
897
const size_t generation) {
931
898
mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
932
- if (isClient ()) {
933
- return client_->get_compare (pattern, comp_operator, generation);
934
- }
935
899
std::vector<int32_t > ret;
936
900
if (str_count_ == 0 ) {
937
901
return ret;
@@ -1090,9 +1054,6 @@ std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
1090
1054
const char escape,
1091
1055
const size_t generation) const {
1092
1056
mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
1093
- if (isClient ()) {
1094
- return client_->get_regexp_like (pattern, escape, generation);
1095
- }
1096
1057
const auto cache_key = std::make_pair (pattern, escape);
1097
1058
const auto it = regex_cache_.find (cache_key);
1098
1059
if (it != regex_cache_.end ()) {
@@ -1135,11 +1096,6 @@ std::vector<int32_t> StringDictionary::getRegexpLike(const std::string& pattern,
1135
1096
1136
1097
std::vector<std::string> StringDictionary::copyStrings () const {
1137
1098
mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
1138
- if (isClient ()) {
1139
- // TODO(miyu): support remote string dictionary
1140
- throw std::runtime_error (
1141
- " copying dictionaries from remote server is not supported yet." );
1142
- }
1143
1099
1144
1100
if (strings_cache_) {
1145
1101
return *strings_cache_;
@@ -1245,45 +1201,6 @@ void StringDictionary::increaseHashTableCapacityFromStorageAndMemory(
1245
1201
string_id_string_dict_hash_table_.swap (new_str_ids);
1246
1202
}
1247
1203
1248
- int32_t StringDictionary::getOrAddImpl (const std::string_view& str) noexcept {
1249
- // @TODO(wei) treat empty string as NULL for now
1250
- if (str.size () == 0 ) {
1251
- return inline_int_null_value<int32_t >();
1252
- }
1253
- CHECK (str.size () <= MAX_STRLEN);
1254
- const string_dict_hash_t hash = hash_string (str);
1255
- {
1256
- mapd_shared_lock<mapd_shared_mutex> read_lock (rw_mutex_);
1257
- const uint32_t bucket = computeBucket (hash, str, string_id_string_dict_hash_table_);
1258
- if (string_id_string_dict_hash_table_[bucket] != INVALID_STR_ID) {
1259
- return string_id_string_dict_hash_table_[bucket];
1260
- }
1261
- }
1262
- mapd_lock_guard<mapd_shared_mutex> write_lock (rw_mutex_);
1263
- if (fillRateIsHigh (str_count_)) {
1264
- // resize when more than 50% is full
1265
- increaseHashTableCapacity ();
1266
- }
1267
- // need to recalculate the bucket in case it changed before
1268
- // we got the lock
1269
- const uint32_t bucket = computeBucket (hash, str, string_id_string_dict_hash_table_);
1270
- if (string_id_string_dict_hash_table_[bucket] == INVALID_STR_ID) {
1271
- CHECK_LT (str_count_, MAX_STRCOUNT)
1272
- << " Maximum number (" << str_count_
1273
- << " ) of Dictionary encoded Strings reached for this column, offset path "
1274
- " for column is "
1275
- << offsets_path_;
1276
- appendToStorage (str);
1277
- string_id_string_dict_hash_table_[bucket] = static_cast <int32_t >(str_count_);
1278
- if (materialize_hashes_) {
1279
- hash_cache_[str_count_] = hash;
1280
- }
1281
- ++str_count_;
1282
- invalidateInvertedIndex ();
1283
- }
1284
- return string_id_string_dict_hash_table_[bucket];
1285
- }
1286
-
1287
1204
std::string StringDictionary::getStringChecked (const int string_id) const noexcept {
1288
1205
const auto str_canary = getStringFromStorage (string_id);
1289
1206
CHECK (!str_canary.canary );
@@ -1569,13 +1486,6 @@ void StringDictionary::invalidateInvertedIndex() noexcept {
1569
1486
// uncheckpointed data be written to disk. Only option is a table truncate, and thats
1570
1487
// assuming not replicated dictionary
1571
1488
bool StringDictionary::checkpoint () noexcept {
1572
- if (isClient ()) {
1573
- try {
1574
- return client_->checkpoint ();
1575
- } catch (...) {
1576
- return false ;
1577
- }
1578
- }
1579
1489
CHECK (!isTemp_);
1580
1490
bool ret = true ;
1581
1491
ret = ret &&
@@ -1587,10 +1497,6 @@ bool StringDictionary::checkpoint() noexcept {
1587
1497
return ret;
1588
1498
}
1589
1499
1590
- bool StringDictionary::isClient () const noexcept {
1591
- return static_cast <bool >(client_);
1592
- }
1593
-
1594
1500
void StringDictionary::buildSortedCache () {
1595
1501
// This method is not thread-safe.
1596
1502
const auto cur_cache_size = sorted_cache.size ();
@@ -1797,15 +1703,6 @@ size_t StringDictionary::buildDictionaryTranslationMap(
1797
1703
return 0 ;
1798
1704
}
1799
1705
1800
- // If here we should should have local dictionaries.
1801
- // Note case of transient source dictionaries that aren't
1802
- // seen as remote (they have no client_no_timeout_) is covered
1803
- // by early bail above on num_source_strings == 0
1804
- if (dest_dict->client_no_timeout_ ) {
1805
- throw std::runtime_error (
1806
- " Cannot translate between a local source and remote destination dictionary." );
1807
- }
1808
-
1809
1706
// Sort this/source dict and dest dict on folder_ so we can enforce
1810
1707
// lock ordering and avoid deadlocks
1811
1708
@@ -1920,15 +1817,3 @@ size_t StringDictionary::buildDictionaryTranslationMap(
1920
1817
}
1921
1818
return total_num_strings_not_translated;
1922
1819
}
1923
-
1924
- void translate_string_ids (std::vector<int32_t >& dest_ids,
1925
- const LeafHostInfo& dict_server_host,
1926
- const DictRef dest_dict_ref,
1927
- const std::vector<int32_t >& source_ids,
1928
- const DictRef source_dict_ref,
1929
- const int32_t dest_generation) {
1930
- DictRef temp_dict_ref (-1 , -1 );
1931
- StringDictionaryClient string_client (dict_server_host, temp_dict_ref, false );
1932
- string_client.translate_string_ids (
1933
- dest_ids, dest_dict_ref, source_ids, source_dict_ref, dest_generation);
1934
- }
0 commit comments