Skip to content

Commit 0ba573c

Browse files
p0-draft1 (#732)
* p0-draft1 * Adding changes * Updating .gitignore * Reverting back to the old changes * Adding util hash instead of primer_hash * Removing custom hash type * Adding suggested changes * Small changes * Changes as suggested * making constant consistent * Redfining constants * Fixing format * capitalize t --------- Co-authored-by: Connor Tsui <[email protected]>
1 parent 7311a5d commit 0ba573c

File tree

5 files changed

+133
-106
lines changed

5 files changed

+133
-106
lines changed

src/include/primer/hyperloglog.h

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,30 @@
11
#pragma once
22

33
#include <bitset>
4-
#include <functional>
54
#include <memory>
65
#include <mutex> // NOLINT
76
#include <string>
7+
#include <utility>
88
#include <vector>
99

10-
#define MAX_BITS 64
11-
#define DEFAULT_CARDINALITY 0
10+
#include "common/util/hash_util.h"
1211

13-
namespace bustub {
14-
15-
/** @brief Hash value type. */
16-
using hash_t = uint64_t;
12+
/** @brief Capacity of the bitset stream. */
13+
#define BITSET_CAPACITY 64
1714

18-
/** @brief Constant for HLL. */
19-
static const double CONSTANT = 0.79402;
15+
namespace bustub {
2016

21-
template <typename T>
17+
template <typename KeyType>
2218
class HyperLogLog {
19+
/** @brief Constant for HLL. */
20+
static constexpr double CONSTANT = 0.79402;
21+
2322
public:
23+
/** @brief Disable default constructor. */
2424
HyperLogLog() = delete;
2525

26-
explicit HyperLogLog(int16_t n_bits) : cardinality_(0) {}
26+
/** @brief Parameterized constructor. */
27+
explicit HyperLogLog(int16_t n_bits);
2728

2829
/**
2930
* @brief Getter value for cardinality.
@@ -37,7 +38,7 @@ class HyperLogLog {
3738
*
3839
* @param[in] val - value that's added into hyperloglog
3940
*/
40-
auto AddElem(T val) -> void;
41+
auto AddElem(KeyType val) -> void;
4142

4243
/**
4344
* @brief Function that computes cardinality.
@@ -51,7 +52,15 @@ class HyperLogLog {
5152
* @param[in] val - value
5253
* @returns hash integer of given input value
5354
*/
54-
inline auto CalculateHash(T val) -> hash_t;
55+
inline auto CalculateHash(KeyType val) -> hash_t {
56+
Value val_obj;
57+
if constexpr (std::is_same<KeyType, std::string>::value) {
58+
val_obj = Value(VARCHAR, val);
59+
} else {
60+
val_obj = Value(BIGINT, val);
61+
}
62+
return bustub::HashUtil::HashValue(&val_obj);
63+
}
5564

5665
/**
5766
* @brief Function that computes binary.
@@ -60,15 +69,15 @@ class HyperLogLog {
6069
* @param[in] hash
6170
* @returns binary of a given hash
6271
*/
63-
auto ComputeBinary(const hash_t &hash) const -> std::bitset<MAX_BITS>;
72+
auto ComputeBinary(const hash_t &hash) const -> std::bitset<BITSET_CAPACITY>;
6473

6574
/**
6675
* @brief Function that computes leading zeros.
6776
*
6877
* @param[in] bset - binary values of a given bitset
6978
* @returns leading zeros of given binary set
7079
*/
71-
auto PositionOfLeftmostOne(const std::bitset<MAX_BITS> &bset) const -> uint64_t;
80+
auto PositionOfLeftmostOne(const std::bitset<BITSET_CAPACITY> &bset) const -> uint64_t;
7281

7382
/** @brief Cardinality value. */
7483
size_t cardinality_;

src/include/primer/hyperloglog_presto.h

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,46 @@
11
#pragma once
22

33
#include <bitset>
4-
#include <functional>
54
#include <memory>
65
#include <mutex> // NOLINT
6+
#include <sstream>
77
#include <string>
88
#include <unordered_map>
9+
#include <utility>
910
#include <vector>
1011

11-
#define BUCKET_SIZE 4UL
12-
#define CONSTANT 0.79402
13-
#define MAX_SIZE 7UL
12+
#include "common/util/hash_util.h"
13+
14+
/** @brief Dense bucket size. */
15+
#define DENSE_BUCKET_SIZE 4
16+
/** @brief Overflow bucket size. */
17+
#define OVERFLOW_BUCKET_SIZE 3
18+
19+
/** @brief Total bucket size. */
20+
#define TOTAL_BUCKET_SIZE (DENSE_BUCKET_SIZE + OVERFLOW_BUCKET_SIZE)
1421

1522
namespace bustub {
1623

17-
template <typename T>
24+
template <typename KeyType>
1825
class HyperLogLogPresto {
1926
/**
2027
* INSTRUCTIONS: Testing framework will use the GetDenseBucket and GetOverflow function,
2128
* hence SHOULD NOT be deleted. It's essential to use the dense_bucket_
2229
* data structure.
2330
*/
2431

25-
/** @brief Hash type. */
26-
using hash_t = uint64_t;
32+
/** @brief Constant for HLL. */
33+
static constexpr double CONSTANT = 0.79402;
2734

2835
public:
2936
/** @brief Disabling default constructor. */
3037
HyperLogLogPresto() = delete;
3138

3239
/** @brief Parameterized constructor. */
33-
explicit HyperLogLogPresto(int16_t n_leading_bits) : cardinality_(0) {}
40+
explicit HyperLogLogPresto(int16_t n_leading_bits);
3441

3542
/** @brief Returns the dense_bucket_ data structure. */
36-
auto GetDenseBucket() const -> std::vector<std::bitset<BUCKET_SIZE>> { return dense_bucket_; }
43+
auto GetDenseBucket() const -> std::vector<std::bitset<DENSE_BUCKET_SIZE>> { return dense_bucket_; }
3744

3845
/** @brief Returns overflow bucket of a specific given index. */
3946
auto GetOverflowBucketofIndex(uint16_t idx) { return overflow_bucket_[idx]; }
@@ -42,7 +49,7 @@ class HyperLogLogPresto {
4249
auto GetCardinality() const -> uint64_t { return cardinality_; }
4350

4451
/** @brief Element is added for HLL calculation. */
45-
auto AddElem(T val) -> void;
52+
auto AddElem(KeyType val) -> void;
4653

4754
/** @brief Function to compute cardinality. */
4855
auto ComputeCardinality() -> void;
@@ -54,13 +61,23 @@ class HyperLogLogPresto {
5461
*
5562
* @returns hash value
5663
*/
57-
inline auto CalculateHash(T val) -> hash_t { return std::hash<T>{}(val); }
64+
inline auto CalculateHash(KeyType val) -> hash_t {
65+
Value val_obj;
66+
if constexpr (std::is_same<KeyType, std::string>::value) {
67+
val_obj = Value(VARCHAR, val);
68+
return bustub::HashUtil::HashValue(&val_obj);
69+
}
70+
if constexpr (std::is_same<KeyType, int64_t>::value) {
71+
return static_cast<hash_t>(val);
72+
}
73+
return 0;
74+
}
5875

5976
/** @brief Structure holding dense buckets (or also known as registers). */
60-
std::vector<std::bitset<BUCKET_SIZE>> dense_bucket_;
77+
std::vector<std::bitset<DENSE_BUCKET_SIZE>> dense_bucket_;
6178

6279
/** @brief Structure holding overflow buckets. */
63-
std::unordered_map<uint16_t, std::bitset<MAX_SIZE - BUCKET_SIZE>> overflow_bucket_;
80+
std::unordered_map<uint16_t, std::bitset<OVERFLOW_BUCKET_SIZE>> overflow_bucket_;
6481

6582
/** @brief Storing cardinality value */
6683
uint64_t cardinality_;

src/primer/hyperloglog.cpp

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,29 @@
22

33
namespace bustub {
44

5-
template <typename T>
6-
auto inline HyperLogLog<T>::CalculateHash(T val) -> hash_t {
7-
/** @todo student - implement the function */
8-
return 0;
9-
}
5+
template <typename KeyType>
6+
HyperLogLog<KeyType>::HyperLogLog(int16_t n_bits) : cardinality_(0) {}
107

11-
template <typename T>
12-
auto HyperLogLog<T>::ComputeBinary(const hash_t &hash) const -> std::bitset<MAX_BITS> {
13-
/** @todo student - implement the function */
8+
template <typename KeyType>
9+
auto HyperLogLog<KeyType>::ComputeBinary(const hash_t &hash) const -> std::bitset<BITSET_CAPACITY> {
10+
/** @TODO(student) Implement this function! */
1411
return {0};
1512
}
1613

17-
template <typename T>
18-
auto HyperLogLog<T>::PositionOfLeftmostOne(const std::bitset<MAX_BITS> &bset) const -> uint64_t {
19-
/** @todo student - implement the function*/
14+
template <typename KeyType>
15+
auto HyperLogLog<KeyType>::PositionOfLeftmostOne(const std::bitset<BITSET_CAPACITY> &bset) const -> uint64_t {
16+
/** @TODO(student) Implement this function! */
2017
return 0;
2118
}
2219

23-
template <typename T>
24-
auto HyperLogLog<T>::AddElem(T val) -> void {
25-
/** @todo implement the function */
20+
template <typename KeyType>
21+
auto HyperLogLog<KeyType>::AddElem(KeyType val) -> void {
22+
/** @TODO(student) Implement this function! */
2623
}
2724

28-
template <typename T>
29-
auto HyperLogLog<T>::ComputeCardinality() -> void {
30-
/** @todo - student implement the function */
25+
template <typename KeyType>
26+
auto HyperLogLog<KeyType>::ComputeCardinality() -> void {
27+
/** @TODO(student) Implement this function! */
3128
}
3229

3330
template class HyperLogLog<int64_t>;

src/primer/hyperloglog_presto.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22

33
namespace bustub {
44

5-
template <typename T>
6-
auto HyperLogLogPresto<T>::AddElem(T val) -> void {
7-
/** @todo (student) has to fill the function */
5+
template <typename KeyType>
6+
HyperLogLogPresto<KeyType>::HyperLogLogPresto(int16_t n_leading_bits) : cardinality_(0) {}
7+
8+
template <typename KeyType>
9+
auto HyperLogLogPresto<KeyType>::AddElem(KeyType val) -> void {
10+
/** @TODO(student) Implement this function! */
811
}
912

1013
template <typename T>
1114
auto HyperLogLogPresto<T>::ComputeCardinality() -> void {
12-
// TODO(student) - implement the function
15+
/** @TODO(student) Implement this function! */
1316
}
1417

1518
template class HyperLogLogPresto<int64_t>;

0 commit comments

Comments
 (0)