Skip to content

Commit 236ae8d

Browse files
gibber9809haiqi96
andauthored
feat(clp-s)!: Use core clp parsing and search code in clp-s; Bump archive version to 0.4.0. (#1163)
Co-authored-by: haiqi96 <[email protected]>
1 parent 8b4f58a commit 236ae8d

32 files changed

+335
-1937
lines changed

components/core/CMakeLists.txt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,6 @@ set(SOURCE_FILES_clp_s_unitTest
402402
src/clp_s/SchemaWriter.hpp
403403
src/clp_s/search/AddTimestampConditions.cpp
404404
src/clp_s/search/AddTimestampConditions.hpp
405-
src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp
406-
src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp
407-
src/clp_s/search/clp_search/Grep.cpp
408-
src/clp_s/search/clp_search/Grep.hpp
409-
src/clp_s/search/clp_search/Query.cpp
410-
src/clp_s/search/clp_search/Query.hpp
411405
src/clp_s/search/EvaluateRangeIndexFilters.cpp
412406
src/clp_s/search/EvaluateRangeIndexFilters.hpp
413407
src/clp_s/search/EvaluateTimestampIndex.cpp
@@ -429,10 +423,6 @@ set(SOURCE_FILES_clp_s_unitTest
429423
src/clp_s/TimestampEntry.hpp
430424
src/clp_s/Utils.cpp
431425
src/clp_s/Utils.hpp
432-
src/clp_s/VariableDecoder.cpp
433-
src/clp_s/VariableDecoder.hpp
434-
src/clp_s/VariableEncoder.cpp
435-
src/clp_s/VariableEncoder.hpp
436426
src/clp_s/ZstdCompressor.cpp
437427
src/clp_s/ZstdCompressor.hpp
438428
src/clp_s/ZstdDecompressor.cpp

components/core/cmake/Options/options.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ function(set_clp_s_clp_dependencies_dependencies)
249249
CLP_NEED_BOOST
250250
CLP_NEED_CURL
251251
CLP_NEED_FMT
252+
CLP_NEED_LOG_SURGEON
252253
CLP_NEED_MSGPACKCXX
253254
CLP_NEED_NLOHMANN_JSON
254255
CLP_NEED_OPENSSL
@@ -311,6 +312,7 @@ endfunction()
311312
function(set_clp_s_search_dependencies)
312313
set_clp_need_flags(
313314
CLP_NEED_ABSL
315+
CLP_NEED_LOG_SURGEON
314316
CLP_NEED_SIMDJSON
315317
CLP_NEED_SPDLOG
316318
)

components/core/src/clp/StringReader.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
#include <cassert>
88
#include <cerrno>
99

10-
#include <boost/filesystem.hpp>
11-
1210
using std::string;
1311

1412
namespace clp {

components/core/src/clp/ir/parsing.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,6 @@ bool is_delim(signed char c) {
2323
|| ('A' <= c && c <= 'Z') || '\\' == c || '_' == c || ('a' <= c && c <= 'z'));
2424
}
2525

26-
bool is_variable_placeholder(char c) {
27-
return (enum_to_underlying_type(VariablePlaceholder::Integer) == c)
28-
|| (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c)
29-
|| (enum_to_underlying_type(VariablePlaceholder::Float) == c);
30-
}
31-
3226
bool is_var(std::string_view value) {
3327
size_t begin_pos = 0;
3428
size_t end_pos = 0;

components/core/src/clp/ir/parsing.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <string>
1414
#include <string_view>
1515

16+
#include "../type_utils.hpp"
17+
#include "types.hpp"
18+
1619
namespace clp::ir {
1720
/**
1821
* Checks if the given character is a delimiter
@@ -23,10 +26,16 @@ namespace clp::ir {
2326
bool is_delim(signed char c);
2427

2528
/**
29+
* NOTE: This method is marked inline for a ~50% performance improvement to
30+
* `append_constant_to_logtype`.
2631
* @param c
2732
* @return Whether the character is a variable placeholder
2833
*/
29-
bool is_variable_placeholder(char c);
34+
inline bool is_variable_placeholder(char c) {
35+
return (enum_to_underlying_type(VariablePlaceholder::Integer) == c)
36+
|| (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c)
37+
|| (enum_to_underlying_type(VariablePlaceholder::Float) == c);
38+
}
3039

3140
/**
3241
* NOTE: This method is marked inline for a 1-2% performance improvement

components/core/src/clp_s/CMakeLists.txt

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,22 @@ set(
1717
../clp/cli_utils.cpp
1818
../clp/cli_utils.hpp
1919
../clp/Defs.h
20+
../clp/EncodedVariableInterpreter.cpp
21+
../clp/EncodedVariableInterpreter.hpp
2022
../clp/ErrorCode.hpp
23+
../clp/ffi/encoding_methods.cpp
24+
../clp/ffi/encoding_methods.hpp
25+
../clp/ffi/encoding_methods.inc
26+
../clp/ffi/ir_stream/byteswap.hpp
2127
../clp/ffi/ir_stream/decoding_methods.cpp
2228
../clp/ffi/ir_stream/decoding_methods.hpp
29+
../clp/ffi/ir_stream/decoding_methods.inc
2330
../clp/ffi/ir_stream/Deserializer.hpp
2431
../clp/ffi/ir_stream/encoding_methods.cpp
2532
../clp/ffi/ir_stream/encoding_methods.hpp
2633
../clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp
2734
../clp/ffi/ir_stream/ir_unit_deserialization_methods.hpp
35+
../clp/ffi/ir_stream/protocol_constants.hpp
2836
../clp/ffi/ir_stream/Serializer.cpp
2937
../clp/ffi/ir_stream/Serializer.hpp
3038
../clp/ffi/ir_stream/search/AstEvaluationResult.hpp
@@ -48,17 +56,28 @@ set(
4856
../clp/FileDescriptor.hpp
4957
../clp/FileReader.cpp
5058
../clp/FileReader.hpp
59+
../clp/GrepCore.cpp
60+
../clp/GrepCore.hpp
5161
../clp/hash_utils.cpp
5262
../clp/hash_utils.hpp
5363
../clp/ir/constants.hpp
5464
../clp/ir/EncodedTextAst.cpp
5565
../clp/ir/EncodedTextAst.hpp
66+
../clp/ir/LogEvent.hpp
5667
../clp/ir/parsing.cpp
5768
../clp/ir/parsing.hpp
69+
../clp/ir/parsing.inc
70+
../clp/ir/types.hpp
71+
../clp/LogSurgeonReader.cpp
72+
../clp/LogSurgeonReader.hpp
5873
../clp/NetworkReader.cpp
5974
../clp/NetworkReader.hpp
6075
../clp/networking/socket_utils.cpp
6176
../clp/networking/socket_utils.hpp
77+
../clp/Query.cpp
78+
../clp/Query.hpp
79+
../clp/QueryToken.cpp
80+
../clp/QueryToken.hpp
6281
../clp/ReaderInterface.cpp
6382
../clp/ReaderInterface.hpp
6483
../clp/ReadOnlyMemoryMappedFile.cpp
@@ -69,10 +88,12 @@ set(
6988
../clp/streaming_archive/Constants.hpp
7089
../clp/streaming_compression/zstd/Decompressor.cpp
7190
../clp/streaming_compression/zstd/Decompressor.hpp
91+
../clp/StringReader.cpp
92+
../clp/StringReader.hpp
7293
../clp/Thread.cpp
7394
../clp/Thread.hpp
74-
../clp/TraceableException.hpp
7595
../clp/time_types.hpp
96+
../clp/TraceableException.hpp
7697
../clp/type_utils.hpp
7798
../clp/utf8_utils.cpp
7899
../clp/utf8_utils.hpp
@@ -92,6 +113,7 @@ if(CLP_BUILD_CLP_S_CLP_DEPENDENCIES)
92113
clp_s_clp_dependencies
93114
PUBLIC
94115
clp::string_utils
116+
log_surgeon::log_surgeon
95117
ystdlib::containers
96118
PRIVATE
97119
Boost::regex
@@ -223,8 +245,6 @@ set(
223245
TraceableException.hpp
224246
Utils.cpp
225247
Utils.hpp
226-
VariableEncoder.cpp
227-
VariableEncoder.hpp
228248
)
229249

230250
if(CLP_BUILD_CLP_S_ARCHIVEWRITER)
@@ -286,8 +306,6 @@ set(
286306
TraceableException.hpp
287307
Utils.cpp
288308
Utils.hpp
289-
VariableDecoder.cpp
290-
VariableDecoder.hpp
291309
)
292310

293311
if(CLP_BUILD_CLP_S_ARCHIVEREADER)

components/core/src/clp_s/ColumnReader.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#include "ColumnReader.hpp"
22

3+
#include "../clp/EncodedVariableInterpreter.hpp"
34
#include "BufferViewReader.hpp"
45
#include "ColumnWriter.hpp"
56
#include "Utils.hpp"
6-
#include "VariableDecoder.hpp"
77

88
namespace clp_s {
99
void Int64ColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
@@ -113,9 +113,14 @@ ClpStringColumnReader::extract_string_value_into_buffer(uint64_t cur_message, st
113113
}
114114

115115
int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);
116-
auto encoded_vars = m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_vars());
116+
auto encoded_vars = m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_variables());
117117

118-
VariableDecoder::decode_variables_into_message(entry, *m_var_dict, encoded_vars, buffer);
118+
clp::EncodedVariableInterpreter::decode_variables_into_message(
119+
entry,
120+
*m_var_dict,
121+
encoded_vars,
122+
buffer
123+
);
119124
}
120125

121126
void ClpStringColumnReader::extract_escaped_string_value_into_buffer(
@@ -149,7 +154,7 @@ UnalignedMemSpan<int64_t> ClpStringColumnReader::get_encoded_vars(uint64_t cur_m
149154

150155
int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);
151156

152-
return m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_vars());
157+
return m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_variables());
153158
}
154159

155160
void VariableStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {

components/core/src/clp_s/ColumnWriter.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
#include "ColumnWriter.hpp"
22

3+
#include <cstdint>
4+
#include <variant>
5+
6+
#include "../clp/Defs.h"
7+
#include "../clp/EncodedVariableInterpreter.hpp"
8+
#include "ParsedMessage.hpp"
9+
#include "ZstdCompressor.hpp"
10+
311
namespace clp_s {
412
size_t Int64ColumnWriter::add_value(ParsedMessage::variable_t& value) {
513
m_values.push_back(std::get<int64_t>(value));
@@ -49,15 +57,16 @@ void BooleanColumnWriter::store(ZstdCompressor& compressor) {
4957
}
5058

5159
size_t ClpStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
52-
std::string string_var = std::get<std::string>(value);
53-
uint64_t id;
54-
uint64_t offset = m_encoded_vars.size();
55-
VariableEncoder::encode_and_add_to_dictionary(
56-
string_var,
60+
uint64_t offset{m_encoded_vars.size()};
61+
std::vector<clp::variable_dictionary_id_t> temp_var_dict_ids;
62+
clp::EncodedVariableInterpreter::encode_and_add_to_dictionary(
63+
std::get<std::string>(value),
5764
m_logtype_entry,
5865
*m_var_dict,
59-
m_encoded_vars
66+
m_encoded_vars,
67+
temp_var_dict_ids
6068
);
69+
clp::logtype_dictionary_id_t id{};
6170
m_log_dict->add_entry(m_logtype_entry, id);
6271
auto encoded_id = encode_log_dict_id(id, offset);
6372
m_logtypes.push_back(encoded_id);
@@ -74,16 +83,15 @@ void ClpStringColumnWriter::store(ZstdCompressor& compressor) {
7483
}
7584

7685
size_t VariableStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
77-
std::string string_var = std::get<std::string>(value);
78-
uint64_t id;
79-
m_var_dict->add_entry(string_var, id);
80-
m_variables.push_back(id);
81-
return sizeof(int64_t);
86+
clp::variable_dictionary_id_t id{};
87+
m_var_dict->add_entry(std::get<std::string>(value), id);
88+
m_var_dict_ids.push_back(id);
89+
return sizeof(clp::variable_dictionary_id_t);
8290
}
8391

8492
void VariableStringColumnWriter::store(ZstdCompressor& compressor) {
85-
size_t size = m_variables.size() * sizeof(int64_t);
86-
compressor.write(reinterpret_cast<char const*>(m_variables.data()), size);
93+
auto size{m_var_dict_ids.size() * sizeof(clp::variable_dictionary_id_t)};
94+
compressor.write(reinterpret_cast<char const*>(m_var_dict_ids.data()), size);
8795
}
8896

8997
size_t DateStringColumnWriter::add_value(ParsedMessage::variable_t& value) {

components/core/src/clp_s/ColumnWriter.hpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
#include <utility>
55
#include <variant>
66

7+
#include "../clp/Defs.h"
78
#include "DictionaryWriter.hpp"
89
#include "FileWriter.hpp"
910
#include "ParsedMessage.hpp"
1011
#include "TimestampDictionaryWriter.hpp"
11-
#include "VariableEncoder.hpp"
1212
#include "ZstdCompressor.hpp"
1313

1414
namespace clp_s {
@@ -117,6 +117,9 @@ class BooleanColumnWriter : public BaseColumnWriter {
117117

118118
class ClpStringColumnWriter : public BaseColumnWriter {
119119
public:
120+
// Types
121+
using encoded_log_dict_id_t = uint64_t;
122+
120123
// Constructor
121124
ClpStringColumnWriter(
122125
int32_t id,
@@ -141,16 +144,16 @@ class ClpStringColumnWriter : public BaseColumnWriter {
141144
* @param encoded_id
142145
* @return the encoded log dict id
143146
*/
144-
static int64_t get_encoded_log_dict_id(uint64_t encoded_id) {
145-
return (int64_t)encoded_id & cLogDictIdMask;
147+
static clp::logtype_dictionary_id_t get_encoded_log_dict_id(encoded_log_dict_id_t encoded_id) {
148+
return static_cast<clp::logtype_dictionary_id_t>(encoded_id & cLogDictIdMask);
146149
}
147150

148151
/**
149152
* @param encoded_id
150153
* @return The encoded offset
151154
*/
152-
static int64_t get_encoded_offset(uint64_t encoded_id) {
153-
return ((int64_t)encoded_id & cOffsetMask) >> cOffsetBitPosition;
155+
static uint64_t get_encoded_offset(encoded_log_dict_id_t encoded_id) {
156+
return (encoded_id & cOffsetMask) >> cOffsetBitPosition;
154157
}
155158

156159
private:
@@ -160,20 +163,21 @@ class ClpStringColumnWriter : public BaseColumnWriter {
160163
* @param offset
161164
* @return The encoded log dict id
162165
*/
163-
static int64_t encode_log_dict_id(uint64_t id, uint64_t offset) {
164-
return ((int64_t)id) | ((int64_t)offset) << cOffsetBitPosition;
166+
static encoded_log_dict_id_t
167+
encode_log_dict_id(clp::logtype_dictionary_id_t id, uint64_t offset) {
168+
return static_cast<encoded_log_dict_id_t>(id) | (offset << cOffsetBitPosition);
165169
}
166170

167171
static constexpr int cOffsetBitPosition = 24;
168-
static constexpr int64_t cLogDictIdMask = ~(-1ULL << cOffsetBitPosition);
169-
static constexpr int64_t cOffsetMask = ~cLogDictIdMask;
172+
static constexpr uint64_t cLogDictIdMask = (1ULL << cOffsetBitPosition) - 1;
173+
static constexpr uint64_t cOffsetMask = ~cLogDictIdMask;
170174

171175
std::shared_ptr<VariableDictionaryWriter> m_var_dict;
172176
std::shared_ptr<LogTypeDictionaryWriter> m_log_dict;
173177
LogTypeDictionaryEntry m_logtype_entry;
174178

175-
std::vector<int64_t> m_logtypes;
176-
std::vector<int64_t> m_encoded_vars;
179+
std::vector<encoded_log_dict_id_t> m_logtypes;
180+
std::vector<clp::encoded_variable_t> m_encoded_vars;
177181
};
178182

179183
class VariableStringColumnWriter : public BaseColumnWriter {
@@ -193,7 +197,7 @@ class VariableStringColumnWriter : public BaseColumnWriter {
193197

194198
private:
195199
std::shared_ptr<VariableDictionaryWriter> m_var_dict;
196-
std::vector<int64_t> m_variables;
200+
std::vector<clp::variable_dictionary_id_t> m_var_dict_ids;
197201
};
198202

199203
class DateStringColumnWriter : public BaseColumnWriter {

0 commit comments

Comments
 (0)