Skip to content
4 changes: 4 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -481,9 +481,13 @@ set(SOURCE_FILES_unitTest
src/clp/EncodedVariableInterpreter.cpp
src/clp/EncodedVariableInterpreter.hpp
src/clp/ErrorCode.hpp
src/clp/ffi/EncodedTextAst.hpp
src/clp/ffi/EncodedTextAstError.cpp
src/clp/ffi/EncodedTextAstError.hpp
src/clp/ffi/encoding_methods.cpp
src/clp/ffi/encoding_methods.hpp
src/clp/ffi/encoding_methods.inc
src/clp/ffi/test/test_EncodedTextAst.cpp
src/clp/ffi/test/test_StringBlob.cpp
src/clp/ffi/ir_stream/byteswap.hpp
src/clp/ffi/ir_stream/Deserializer.hpp
Expand Down
264 changes: 264 additions & 0 deletions components/core/src/clp/ffi/EncodedTextAst.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#ifndef CLP_FFI_ENCODEDTEXTAST_HPP
#define CLP_FFI_ENCODEDTEXTAST_HPP

#include <cstddef>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include <concepts>

#include <ystdlib/error_handling/Result.hpp>

#include "../ir/types.hpp"
#include "EncodedTextAstError.hpp"
#include "encoding_methods.hpp"
#include "StringBlob.hpp"
#include "type_utils.hpp"
#include "../type_utils.hpp"

namespace clp::ffi {
/**
* Method signature requirements for handling constant text segments in an encoded text AST.
* @tparam EncodedTextAstConstantHandlerType
*/
template <typename EncodedTextAstConstantHandlerType>
concept EncodedTextAstConstantHandlerReq
= requires(EncodedTextAstConstantHandlerType handler, std::string_view constant) {
{ handler(constant) } -> std::same_as<void>;
};

/**
* Method signature requirements for handling int variables in an encoded text AST.
* @tparam EncodedTextAstIntVarHandlerType
* @tparam encoded_variable_t
*/
template <typename EncodedTextAstIntVarHandlerType, typename encoded_variable_t>
concept EncodedTextAstIntVarHandlerReq
= requires(EncodedTextAstIntVarHandlerType handler, encoded_variable_t var) {
{ handler(var) } -> std::same_as<void>;
};

/**
* Method signature requirements for handling float variables in an encoded text AST.
* @tparam EncodedTextAstFloatVarHandlerType
* @tparam encoded_variable_t
*/
template <typename EncodedTextAstFloatVarHandlerType, typename encoded_variable_t>
concept EncodedTextAstFloatVarHandlerReq
= requires(EncodedTextAstFloatVarHandlerType handler, encoded_variable_t var) {
{ handler(var) } -> std::same_as<void>;
};

/**
* Method signature requirements for handling dictionary variables in an encoded text AST.
* @tparam EncodedTextAstDictVarHandlerType
*/
template <typename EncodedTextAstDictVarHandlerType>
concept EncodedTextAstDictVarHandlerReq
= requires(EncodedTextAstDictVarHandlerType handler, std::string_view var) {
{ handler(var) } -> std::same_as<void>;
};

/**
* A parsed and encoded unstructured text string.
* @tparam encoded_variable_t The type of encoded variables in the string.
*/
template <ir::EncodedVariableTypeReq encoded_variable_t>
class EncodedTextAst {
public:
// Factory function
/**
* @param encoded_vars
* @param string_blob
* @return A result containing the newly created `EncodedTextAst` instance on success, or an
* error code indicating the failure:
* - EncodedTextAstErrEnum::MissingLogtype: if `string_blob` contains no strings.
*/
[[nodiscard]] static auto
create(std::vector<encoded_variable_t> encoded_vars, StringBlob string_blob)
-> ystdlib::error_handling::Result<EncodedTextAst> {
if (string_blob.get_num_strings() < 1) {
return EncodedTextAstErr{EncodedTextAstErrEnum::MissingLogtype};
}
return EncodedTextAst{std::move(encoded_vars), std::move(string_blob)};
}

// Default copy & move constructors and assignment operators
EncodedTextAst(EncodedTextAst const&) = default;
EncodedTextAst(EncodedTextAst&&) noexcept = default;
auto operator=(EncodedTextAst const&) -> EncodedTextAst& = default;
auto operator=(EncodedTextAst&&) noexcept -> EncodedTextAst& = default;

// Destructor
~EncodedTextAst() = default;

// Methods
[[nodiscard]] auto get_logtype() const -> std::string_view {
return m_string_blob.get_string(m_num_dict_vars).value();
}

/**
* Decodes the encoded text AST into its string form by calling the given handlers for each
* component of the message.
* @tparam unescape_logtype Whether to remove the escape characters from the logtype before
* calling `constant_handler`.
* @param constant_handler
* @param int_var_handler
* @param float_var_handler
* @param dict_var_handler
* @return A void result on success, or an error code indicating the failure:
* - EncodedTextAstErrEnum::MissingEncodedVar if an encoded variable is missing.
* - EncodedTextAstErrEnum::MissingDictVar if a dictionary variable is missing.
* - EncodedTextAstErrEnum::UnexpectedTrailingEscapeCharacter if the logtype ends with an
* unexpected escape character.
*/
template <bool unescape_logtype>
[[nodiscard]] auto decode(
EncodedTextAstConstantHandlerReq auto&& constant_handler,
EncodedTextAstIntVarHandlerReq<encoded_variable_t> auto&& int_var_handler,
EncodedTextAstFloatVarHandlerReq<encoded_variable_t> auto&& float_var_handler,
EncodedTextAstDictVarHandlerReq auto&& dict_var_handler
) const -> ystdlib::error_handling::Result<void>;

/**
* Decodes and un-parses the encoded text AST into its string form.
* @return A result containing the decoded string on success, or an error code indicating the
* failure:
* - Forwards `decode`'s return values on failure.
*/
[[nodiscard]] auto to_string() const -> ystdlib::error_handling::Result<std::string> {
std::string decoded_string;
YSTDLIB_ERROR_HANDLING_TRYV(
decode<true>(
[&](std::string_view constant) { decoded_string.append(constant); },
[&](encoded_variable_t int_var) {
decoded_string.append(decode_integer_var(int_var));
},
[&](encoded_variable_t float_var) {
decoded_string.append(decode_float_var(float_var));
},
[&](std::string_view dict_var) { decoded_string.append(dict_var); }
)
);
return decoded_string;
}

private:
// Constructor
EncodedTextAst(std::vector<encoded_variable_t> encoded_vars, StringBlob string_blob)
: m_encoded_vars{std::move(encoded_vars)},
m_string_blob{std::move(string_blob)},
m_num_dict_vars{m_string_blob.get_num_strings() - 1} {}

// Variables
std::vector<encoded_variable_t> m_encoded_vars;
StringBlob m_string_blob;
size_t m_num_dict_vars;
};

template <ir::EncodedVariableTypeReq encoded_variable_t>
template <bool unescape_logtype>
[[nodiscard]] auto EncodedTextAst<encoded_variable_t>::decode(
EncodedTextAstConstantHandlerReq auto&& constant_handler,
EncodedTextAstIntVarHandlerReq<encoded_variable_t> auto&& int_var_handler,
EncodedTextAstFloatVarHandlerReq<encoded_variable_t> auto&& float_var_handler,
EncodedTextAstDictVarHandlerReq auto&& dict_var_handler
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why rvalue references here? I think passing callbacks by value is more common + it's what the previous implementation does.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will switch back to use passing-by-value. However, do u think it makes more sense to update our code base to use rvalue ref for callbacks? This makes the callback object more flexible: the callback can be not just a lambda but also an object that holds stateful values. Using rvalue allows us to pass the object into the call and use it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai can u create an issue to track this codebase-level update?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Create a GitHub issue to track the callback parameter passing strategy discussion

gh issue create \
  --title "Evaluate using rvalue references for callback parameters codebase-wide" \
  --body "## Context

During the review of PR #1561, a discussion arose regarding the parameter passing strategy for callbacks in the codebase.

Currently, most of the codebase passes callbacks by value. The PR initially used rvalue references (\`auto&&\`) for callback parameters, which offers potential benefits:

- **Increased flexibility**: Allows callbacks to be not just lambdas, but also objects that hold stateful values
- **Efficiency**: Using rvalue references allows passing objects into the call and using them later without unnecessary copies

## Proposal

Evaluate whether it would be beneficial to update the codebase to consistently use rvalue references for callback parameters instead of pass-by-value.

## References

- Discussion: https://github.com/y-scope/clp/pull/1561#discussion_r2500321748
- Related PR: https://github.com/y-scope/clp/pull/1561

Requested by: @LinZhihao-723" \
  --assignee LinZhihao-723 \
  --repo y-scope/clp

Length of output: 42


@LinZhihao-723, I've created issue #1570 to track the evaluation of using rvalue references for callback parameters codebase-wide. The issue includes the context from this discussion, the rationale for the potential change, and references back to this PR and comment thread.


🧠 Learnings used
Learnt from: junhaoliao
Repo: y-scope/clp PR: 939
File: components/package-template/src/etc/clp-config.yml:64-64
Timestamp: 2025-06-24T20:13:46.758Z
Learning: When users ask CodeRabbit to create an issue after providing suggestions, they want a GitHub issue created with the high-level requirements and context, not specific code implementations.

Learnt from: gibber9809
Repo: y-scope/clp PR: 1163
File: components/core/src/clp_s/ColumnWriter.hpp:120-122
Timestamp: 2025-08-06T23:57:49.762Z
Learning: In the CLP codebase, the `encoded_log_dict_id_t` type alias in `components/core/src/clp_s/ColumnWriter.hpp` is appropriately placed as a class-scoped alias because the ColumnWriter.hpp header is already included in all places where this type is used, eliminating the need to move it to a common header.

Learnt from: LinZhihao-723
Repo: y-scope/clp PR: 549
File: components/core/tests/test-ir_encoding_methods.cpp:1180-1186
Timestamp: 2024-10-08T15:52:50.753Z
Learning: In the context of loop constructs, LinZhihao-723 prefers using `while (true)` loops and does not consider alternative loop constructs necessarily more readable.

Learnt from: LinZhihao-723
Repo: y-scope/clp PR: 549
File: components/core/tests/test-ir_encoding_methods.cpp:1180-1186
Timestamp: 2024-10-01T07:59:11.208Z
Learning: In the context of loop constructs, LinZhihao-723 prefers using `while (true)` loops and does not consider alternative loop constructs necessarily more readable.

) const -> ystdlib::error_handling::Result<void> {
auto const logtype{get_logtype()};
auto const logtype_length = logtype.length();
auto const num_encoded_vars{m_encoded_vars.size()};

size_t next_static_text_begin_pos{0};
size_t dictionary_vars_idx{0};
size_t encoded_vars_idx{0};

for (size_t curr_pos{0}; curr_pos < logtype_length; ++curr_pos) {
auto const c{logtype[curr_pos]};
switch (c) {
case enum_to_underlying_type(ir::VariablePlaceholder::Float): {
constant_handler(logtype.substr(
next_static_text_begin_pos,
curr_pos - next_static_text_begin_pos
));
next_static_text_begin_pos = curr_pos + 1;
if (encoded_vars_idx >= num_encoded_vars) {
return EncodedTextAstErr{EncodedTextAstErrEnum::MissingEncodedVar};
}
float_var_handler(m_encoded_vars[encoded_vars_idx]);
++encoded_vars_idx;
break;
}

case enum_to_underlying_type(ir::VariablePlaceholder::Integer): {
constant_handler(logtype.substr(
next_static_text_begin_pos,
curr_pos - next_static_text_begin_pos
));
next_static_text_begin_pos = curr_pos + 1;
if (encoded_vars_idx >= num_encoded_vars) {
return EncodedTextAstErr{EncodedTextAstErrEnum::MissingEncodedVar};
}
int_var_handler(m_encoded_vars[encoded_vars_idx]);
++encoded_vars_idx;
break;
}

case enum_to_underlying_type(ir::VariablePlaceholder::Dictionary): {
constant_handler(logtype.substr(
next_static_text_begin_pos,
curr_pos - next_static_text_begin_pos
));
next_static_text_begin_pos = curr_pos + 1;
if (dictionary_vars_idx >= m_num_dict_vars) {
return EncodedTextAstErr{EncodedTextAstErrEnum::MissingDictVar};
}
dict_var_handler(m_string_blob.get_string(dictionary_vars_idx).value());
++dictionary_vars_idx;
break;
}

case enum_to_underlying_type(ir::VariablePlaceholder::Escape): {
// Ensure the escape character is followed by a character that's being escaped
if (curr_pos == logtype_length - 1) {
return EncodedTextAstErr{
EncodedTextAstErrEnum::UnexpectedTrailingEscapeCharacter
};
}

if constexpr (unescape_logtype) {
constant_handler(logtype.substr(
next_static_text_begin_pos,
curr_pos - next_static_text_begin_pos
));
// Skip the escape character
next_static_text_begin_pos = curr_pos + 1;
}

// The character after the escape character is static text (regardless of whether it
// is a variable placeholder), so increment cur_pos by 1 to ensure we don't process
// the next character in any of the other cases (instead it will be added to the
// message).
++curr_pos;
break;
}

default:
// Regular characters. Do nothing.
continue;
}
}

// Add remainder
if (next_static_text_begin_pos < logtype_length) {
constant_handler(logtype.substr(
next_static_text_begin_pos,
logtype_length - next_static_text_begin_pos
));
}

return ystdlib::error_handling::success();
}
} // namespace clp::ffi

#endif // CLP_FFI_ENCODEDTEXTAST_HPP
29 changes: 29 additions & 0 deletions components/core/src/clp/ffi/EncodedTextAstError.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "EncodedTextAstError.hpp"

#include <string>

#include <ystdlib/error_handling/ErrorCode.hpp>

using clp::ffi::EncodedTextAstErrEnum;
using EncodedTextAstErrorCategory = ystdlib::error_handling::ErrorCategory<EncodedTextAstErrEnum>;

template <>
auto EncodedTextAstErrorCategory::name() const noexcept -> char const* {
return "clp::ffi::ir_stream::IrErrorCode";
}

template <>
auto EncodedTextAstErrorCategory::message(EncodedTextAstErrEnum error_enum) const -> std::string {
switch (error_enum) {
case EncodedTextAstErrEnum::MissingEncodedVar:
return "An encoded variable is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::MissingDictVar:
return "A dictionary variable is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::MissingLogtype:
return "The logtype is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::UnexpectedTrailingEscapeCharacter:
return "Unexpected escape character without escaped value at the end of the logtype";
Comment on lines 19 to 25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return "An encoded variable is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::MissingDictVar:
return "A dictionary variable is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::MissingLogtype:
return "The logtype is missing from the `EncodedTextAst`";
case EncodedTextAstErrEnum::UnexpectedTrailingEscapeCharacter:
return "Unexpected escape character without escaped value at the end of the logtype";
return "An encoded variable is missing from the `EncodedTextAst`.";
case EncodedTextAstErrEnum::MissingDictVar:
return "A dictionary variable is missing from the `EncodedTextAst`.";
case EncodedTextAstErrEnum::MissingLogtype:
return "The logtype is missing from the `EncodedTextAst`.";
case EncodedTextAstErrEnum::UnexpectedTrailingEscapeCharacter:
return "Unexpected escape character without escaped value at the end of the logtype.";

We end the error messages with periods for our other error categories, so editing this for consistency.

Copy link
Member Author

@LinZhihao-723 LinZhihao-723 Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose that we try to avoid adding periods unless it's a logging statement. Two benefits of doing so:

  1. In a logging statement we usually do:
  LOG(fmt("{}: {}", err.category().name(), err.message()));

While in other logging statement we do:

  LOG(fmt("{} is not valid.", addr))

In the first statement we're assuming the error message will end with a period. But this becomes hard to manage as some error codes don't end with . while others do (e.g., std::errc doesn't end with .: https://en.cppreference.com/w/cpp/error/errc.html)

  1. Not including the period in the error message allows us to format the message in the middle of a sentence if needed.

default:
return "Unknown error code enum.";
}
}
24 changes: 24 additions & 0 deletions components/core/src/clp/ffi/EncodedTextAstError.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef CLP_FFI_ENCODEDTEXTASTERROR_HPP
#define CLP_FFI_ENCODEDTEXTASTERROR_HPP

#include <cstdint>

#include <ystdlib/error_handling/ErrorCode.hpp>

namespace clp::ffi {
/**
* Error enums for `EncodedTextAst`.
*/
enum class EncodedTextAstErrEnum : uint8_t {
MissingDictVar = 1,
MissingEncodedVar,
MissingLogtype,
UnexpectedTrailingEscapeCharacter,
};

using EncodedTextAstErr = ystdlib::error_handling::ErrorCode<EncodedTextAstErrEnum>;
} // namespace clp::ffi

YSTDLIB_ERROR_HANDLING_MARK_AS_ERROR_CODE_ENUM(clp::ffi::EncodedTextAstErrEnum);

#endif // CLP_FFI_ENCODEDTEXTASTERROR_HPP
Loading
Loading