Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 83370e2

Browse files
committed
Remove unused methods and variables from StringDictionary
1 parent a38b9e1 commit 83370e2

File tree

2 files changed

+3
-168
lines changed

2 files changed

+3
-168
lines changed

omniscidb/StringDictionary/StringDictionary.cpp

Lines changed: 3 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -144,26 +144,6 @@ void StringDictionary::eachStringSerially(int64_t const generation,
144144
}
145145
}
146146

147-
void StringDictionary::processDictionaryFutures(
148-
std::vector<std::future<std::vector<std::pair<string_dict_hash_t, unsigned int>>>>&
149-
dictionary_futures) {
150-
for (auto& dictionary_future : dictionary_futures) {
151-
dictionary_future.wait();
152-
const auto hashVec = dictionary_future.get();
153-
for (const auto& hash : hashVec) {
154-
const uint32_t bucket =
155-
computeUniqueBucketWithHash(hash.first, string_id_string_dict_hash_table_);
156-
payload_file_off_ += hash.second;
157-
string_id_string_dict_hash_table_[bucket] = static_cast<int32_t>(str_count_);
158-
if (materialize_hashes_) {
159-
hash_cache_[str_count_] = hash.first;
160-
}
161-
++str_count_;
162-
}
163-
}
164-
dictionary_futures.clear();
165-
}
166-
167147
int32_t StringDictionary::getDbId() const noexcept {
168148
return dict_ref_.dbId;
169149
}
@@ -172,36 +152,6 @@ int32_t StringDictionary::getDictId() const noexcept {
172152
return dict_ref_.dictId;
173153
}
174154

175-
/**
176-
* Method to retrieve number of strings in storage via a binary search for the first
177-
* canary
178-
* @param storage_slots number of storage entries we should search to find the minimum
179-
* canary
180-
* @return number of strings in storage
181-
*/
182-
size_t StringDictionary::getNumStringsFromStorage(
183-
const size_t storage_slots) const noexcept {
184-
if (storage_slots == 0) {
185-
return 0;
186-
}
187-
// Must use signed integers since final binary search step can wrap to max size_t value
188-
// if dictionary is empty
189-
int64_t min_bound = 0;
190-
int64_t max_bound = storage_slots - 1;
191-
int64_t guess{0};
192-
while (min_bound <= max_bound) {
193-
guess = (max_bound + min_bound) / 2;
194-
CHECK_GE(guess, 0);
195-
if (getStringFromStorage(guess).canary) {
196-
max_bound = guess - 1;
197-
} else {
198-
min_bound = guess + 1;
199-
}
200-
}
201-
CHECK_GE(guess + (min_bound > guess ? 1 : 0), 0);
202-
return guess + (min_bound > guess ? 1 : 0);
203-
}
204-
205155
StringDictionary::~StringDictionary() noexcept {
206156
free(CANARY_BUFFER);
207157
if (payload_map_) {
@@ -236,9 +186,7 @@ int32_t StringDictionary::getOrAdd(const std::string_view& str) noexcept {
236186
if (string_id_string_dict_hash_table_[bucket] == INVALID_STR_ID) {
237187
CHECK_LT(str_count_, MAX_STRCOUNT)
238188
<< "Maximum number (" << str_count_
239-
<< ") of Dictionary encoded Strings reached for this column, offset path "
240-
"for column is "
241-
<< offsets_path_;
189+
<< ") of Dictionary encoded Strings reached for this column";
242190
appendToStorage(str);
243191
string_id_string_dict_hash_table_[bucket] = static_cast<int32_t>(str_count_);
244192
if (materialize_hashes_) {
@@ -473,8 +421,7 @@ void StringDictionary::getOrAddBulk(const std::vector<String>& input_strings,
473421
CHECK_LT(str_count_, MAX_STRCOUNT)
474422
<< "Maximum number (" << str_count_
475423
<< ") of Dictionary encoded Strings reached for this column, offset path "
476-
"for column is "
477-
<< offsets_path_;
424+
"for column is";
478425
if (fillRateIsHigh(str_count_)) {
479426
// resize when more than 50% is full
480427
increaseHashTableCapacity();
@@ -557,9 +504,7 @@ void StringDictionary::getOrAddBulkParallel(const std::vector<String>& input_str
557504
CHECK_LT(shadow_str_count, MAX_STRCOUNT)
558505
<< "Maximum number (" << shadow_str_count
559506
<< ") of Dictionary encoded Strings reached for this column, offset path "
560-
"for column is "
561-
<< offsets_path_;
562-
507+
"for column is ";
563508
string_memory_ids.push_back(input_string_idx);
564509
sum_new_string_lengths += input_string.size();
565510
string_id_string_dict_hash_table_[hash_bucket] =
@@ -1354,71 +1299,6 @@ void StringDictionary::mergeSortedCache(std::vector<int32_t>& temp_sorted_cache)
13541299
sorted_cache.swap(updated_cache);
13551300
}
13561301

1357-
void StringDictionary::populate_string_ids(
1358-
std::vector<int32_t>& dest_ids,
1359-
StringDictionary* dest_dict,
1360-
const std::vector<int32_t>& source_ids,
1361-
const StringDictionary* source_dict,
1362-
const std::vector<std::string const*>& transient_string_vec) {
1363-
std::vector<std::string> strings;
1364-
1365-
for (const int32_t source_id : source_ids) {
1366-
if (source_id == std::numeric_limits<int32_t>::min()) {
1367-
strings.emplace_back("");
1368-
} else if (source_id < 0) {
1369-
unsigned const string_index = StringDictionaryProxy::transientIdToIndex(source_id);
1370-
CHECK_LT(string_index, transient_string_vec.size()) << "source_id=" << source_id;
1371-
strings.emplace_back(*transient_string_vec[string_index]);
1372-
} else {
1373-
strings.push_back(source_dict->getString(source_id));
1374-
}
1375-
}
1376-
1377-
dest_ids.resize(strings.size());
1378-
dest_dict->getOrAddBulk(strings, &dest_ids[0]);
1379-
}
1380-
1381-
void StringDictionary::populate_string_array_ids(
1382-
std::vector<std::vector<int32_t>>& dest_array_ids,
1383-
StringDictionary* dest_dict,
1384-
const std::vector<std::vector<int32_t>>& source_array_ids,
1385-
const StringDictionary* source_dict) {
1386-
dest_array_ids.resize(source_array_ids.size());
1387-
1388-
std::atomic<size_t> row_idx{0};
1389-
auto processor = [&row_idx, &dest_array_ids, dest_dict, &source_array_ids, source_dict](
1390-
int thread_id) {
1391-
for (;;) {
1392-
auto row = row_idx.fetch_add(1);
1393-
1394-
if (row >= dest_array_ids.size()) {
1395-
return;
1396-
}
1397-
const auto& source_ids = source_array_ids[row];
1398-
auto& dest_ids = dest_array_ids[row];
1399-
populate_string_ids(dest_ids, dest_dict, source_ids, source_dict);
1400-
}
1401-
};
1402-
1403-
const int num_worker_threads = std::thread::hardware_concurrency();
1404-
1405-
if (source_array_ids.size() / num_worker_threads > 10) {
1406-
std::vector<std::future<void>> worker_threads;
1407-
for (int i = 0; i < num_worker_threads; ++i) {
1408-
worker_threads.push_back(std::async(std::launch::async, processor, i));
1409-
}
1410-
1411-
for (auto& child : worker_threads) {
1412-
child.wait();
1413-
}
1414-
for (auto& child : worker_threads) {
1415-
child.get();
1416-
}
1417-
} else {
1418-
processor(0);
1419-
}
1420-
}
1421-
14221302
std::vector<std::string_view> StringDictionary::getStringViews(
14231303
const size_t generation) const {
14241304
auto timer = DEBUG_TIMER(__func__);

omniscidb/StringDictionary/StringDictionary.h

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@
3131

3232
extern bool g_enable_stringdict_parallel;
3333

34-
class DictPayloadUnavailable : public std::runtime_error {
35-
public:
36-
DictPayloadUnavailable() : std::runtime_error("DictPayloadUnavailable") {}
37-
38-
DictPayloadUnavailable(const std::string& err) : std::runtime_error(err) {}
39-
};
40-
4134
using string_dict_hash_t = uint32_t;
4235

4336
using StringLookupCallback = std::function<bool(std::string_view, int32_t string_id)>;
@@ -116,36 +109,6 @@ class StringDictionary {
116109
const bool dest_has_transients,
117110
StringLookupCallback const& dest_transient_lookup_callback) const;
118111

119-
/**
120-
* @brief Populates provided \p dest_ids vector with string ids corresponding to given
121-
* source strings
122-
*
123-
* Given a vector of source string ids and corresponding source dictionary, this method
124-
* populates a vector of destination string ids by either returning the string id of
125-
* matching strings in the destination dictionary or creating new entries in the
126-
* dictionary. Source string ids can also be transient if they were created by a
127-
* function (e.g LOWER/UPPER functions). A map of transient string ids to string values
128-
* is provided in order to handle this use case.
129-
*
130-
* @param dest_ids - vector of destination string ids to be populated
131-
* @param dest_dict - destination dictionary
132-
* @param source_ids - vector of source string ids for which destination ids are needed
133-
* @param source_dict - source dictionary
134-
* @param transient_string_vec - ordered vector of string value pointers
135-
*/
136-
static void populate_string_ids(
137-
std::vector<int32_t>& dest_ids,
138-
StringDictionary* dest_dict,
139-
const std::vector<int32_t>& source_ids,
140-
const StringDictionary* source_dict,
141-
const std::vector<std::string const*>& transient_string_vec = {});
142-
143-
static void populate_string_array_ids(
144-
std::vector<std::vector<int32_t>>& dest_array_ids,
145-
StringDictionary* dest_dict,
146-
const std::vector<std::vector<int32_t>>& source_array_ids,
147-
const StringDictionary* source_dict);
148-
149112
static constexpr int32_t INVALID_STR_ID = -1;
150113
static constexpr size_t MAX_STRLEN = (1 << 15) - 1;
151114
static constexpr size_t MAX_STRCOUNT = (1U << 31) - 1;
@@ -171,10 +134,6 @@ class StringDictionary {
171134
bool canary;
172135
};
173136

174-
void processDictionaryFutures(
175-
std::vector<std::future<std::vector<std::pair<string_dict_hash_t, unsigned int>>>>&
176-
dictionary_futures);
177-
size_t getNumStringsFromStorage(const size_t storage_slots) const noexcept;
178137
bool fillRateIsHigh(const size_t num_strings) const noexcept;
179138
void increaseHashTableCapacity() noexcept;
180139
template <class String>
@@ -229,20 +188,16 @@ class StringDictionary {
229188
std::string comp_operator,
230189
size_t generation);
231190
void buildSortedCache();
232-
void insertInSortedCache(std::string str, int32_t str_id);
233191
void sortCache(std::vector<int32_t>& cache);
234192
void mergeSortedCache(std::vector<int32_t>& temp_sorted_cache);
235-
compare_cache_value_t* binary_search_cache(const std::string& pattern) const;
236193

237194
const DictRef dict_ref_;
238195
size_t str_count_;
239196
size_t collisions_;
240197
std::vector<int32_t> string_id_string_dict_hash_table_;
241198
std::vector<string_dict_hash_t> hash_cache_;
242199
std::vector<int32_t> sorted_cache;
243-
bool isTemp_;
244200
bool materialize_hashes_;
245-
std::string offsets_path_;
246201
StringIdxEntry* offset_map_;
247202
char* payload_map_;
248203
size_t offset_file_size_;

0 commit comments

Comments
 (0)