-
Notifications
You must be signed in to change notification settings - Fork 83
feat(clp-s)!: Use core clp parsing and search code in clp-s; Bump archive version to 0.4.0. #1163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
d0637da
a67f996
6fd85dc
b1b2325
069f9d7
c72107f
30d673a
8cd9af2
e8c3e28
53c6933
43052af
26cd059
514ecdd
d594d67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,11 +4,11 @@ | |
| #include <utility> | ||
| #include <variant> | ||
|
|
||
| #include "../clp/Defs.h" | ||
| #include "DictionaryWriter.hpp" | ||
| #include "FileWriter.hpp" | ||
| #include "ParsedMessage.hpp" | ||
| #include "TimestampDictionaryWriter.hpp" | ||
| #include "VariableEncoder.hpp" | ||
| #include "ZstdCompressor.hpp" | ||
|
|
||
| namespace clp_s { | ||
|
|
@@ -117,6 +117,9 @@ class BooleanColumnWriter : public BaseColumnWriter { | |
|
|
||
| class ClpStringColumnWriter : public BaseColumnWriter { | ||
| public: | ||
| // Types | ||
| using encoded_log_dict_id_t = uint64_t; | ||
|
|
||
|
Comment on lines
+120
to
+122
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Consider hoisting the
🤖 Prompt for AI Agents
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This header is currently already included in all places this type needs to be used.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gibber9809, thank you for the clarification! You're right - if ✏️ Learnings added
🧠 Learnings used |
||
| // Constructor | ||
| ClpStringColumnWriter( | ||
| int32_t id, | ||
|
|
@@ -141,15 +144,15 @@ class ClpStringColumnWriter : public BaseColumnWriter { | |
| * @param encoded_id | ||
| * @return the encoded log dict id | ||
| */ | ||
| static int64_t get_encoded_log_dict_id(uint64_t encoded_id) { | ||
| return (int64_t)encoded_id & cLogDictIdMask; | ||
| static clp::logtype_dictionary_id_t get_encoded_log_dict_id(encoded_log_dict_id_t encoded_id) { | ||
| return static_cast<clp::logtype_dictionary_id_t>(encoded_id & cLogDictIdMask); | ||
| } | ||
|
|
||
| /** | ||
| * @param encoded_id | ||
| * @return The encoded offset | ||
| */ | ||
| static int64_t get_encoded_offset(uint64_t encoded_id) { | ||
| static uint64_t get_encoded_offset(uint64_t encoded_id) { | ||
| return ((int64_t)encoded_id & cOffsetMask) >> cOffsetBitPosition; | ||
| } | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
@@ -160,8 +163,9 @@ class ClpStringColumnWriter : public BaseColumnWriter { | |
| * @param offset | ||
| * @return The encoded log dict id | ||
| */ | ||
| static int64_t encode_log_dict_id(uint64_t id, uint64_t offset) { | ||
| return ((int64_t)id) | ((int64_t)offset) << cOffsetBitPosition; | ||
| static encoded_log_dict_id_t | ||
| encode_log_dict_id(clp::logtype_dictionary_id_t id, uint64_t offset) { | ||
| return static_cast<uint64_t>(id) | (offset << cOffsetBitPosition); | ||
haiqi96 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| static constexpr int cOffsetBitPosition = 24; | ||
|
|
@@ -172,8 +176,9 @@ class ClpStringColumnWriter : public BaseColumnWriter { | |
| std::shared_ptr<LogTypeDictionaryWriter> m_log_dict; | ||
| LogTypeDictionaryEntry m_logtype_entry; | ||
|
|
||
| std::vector<int64_t> m_logtypes; | ||
| std::vector<int64_t> m_encoded_vars; | ||
| std::vector<encoded_log_dict_id_t> m_logtypes; | ||
| std::vector<clp::encoded_variable_t> m_encoded_vars; | ||
| std::vector<clp::variable_dictionary_id_t> m_temp_var_dict_ids; | ||
| }; | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| class VariableStringColumnWriter : public BaseColumnWriter { | ||
|
|
@@ -193,7 +198,7 @@ class VariableStringColumnWriter : public BaseColumnWriter { | |
|
|
||
| private: | ||
| std::shared_ptr<VariableDictionaryWriter> m_var_dict; | ||
| std::vector<int64_t> m_variables; | ||
| std::vector<clp::variable_dictionary_id_t> m_var_dict_ids; | ||
| }; | ||
|
|
||
| class DateStringColumnWriter : public BaseColumnWriter { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel that making this to be local is better than creating a class member specifically for a temporary usage purpose.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer class member over local in this instance since it helps avoid a large number of memory allocations in some frequently run code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fwiw, I agree with Haiqi. Unless we have metrics to back up the importance of this it feels very much like a premature optimization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's fair -- I'm kicking off some compression benchmarks now that I'll check on later today. I'll make the change depending on the results.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah it seems to help a bit with some datasets but probably not enough to justify it. I'll get rid of the class member.