Skip to content

Commit a8444ff

Browse files
feat(kv-ir): Add support for duplicate columns in projections. (#1245)
Co-authored-by: Lin Zhihao <[email protected]>
1 parent c917e1f commit a8444ff

File tree

8 files changed

+77
-43
lines changed

8 files changed

+77
-43
lines changed

components/core/src/clp/ffi/ir_stream/search/NewProjectedSchemaTreeNodeCallbackReq.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#ifndef CLP_FFI_IR_STREAM_SEARCH_NEWPROJECTEDSCHEMATREENODECALLBACKREQ_HPP
22
#define CLP_FFI_IR_STREAM_SEARCH_NEWPROJECTEDSCHEMATREENODECALLBACKREQ_HPP
33

4+
#include <cstddef>
45
#include <string_view>
56
#include <type_traits>
7+
#include <utility>
68

79
#include <ystdlib/error_handling/Result.hpp>
810

@@ -18,7 +20,7 @@ namespace clp::ffi::ir_stream::search {
1820
* @tparam NewProjectedSchemaTreeNodeCallbackType
1921
* @param arg_0 Whether the schema-tree node is for an auto-generated kv-pair.
2022
* @param arg_1 The ID of the schema-tree node.
21-
* @param arg_2 The projected key path.
23+
* @param arg_2 The projected key path and its original index in the list of projections.
2224
* @return A void result on success or a user-defined error code indicating the failure.
2325
*/
2426
template <typename NewProjectedSchemaTreeNodeCallbackType>
@@ -27,7 +29,7 @@ concept NewProjectedSchemaTreeNodeCallbackReq = std::is_invocable_r_v<
2729
NewProjectedSchemaTreeNodeCallbackType,
2830
bool,
2931
SchemaTree::Node::id_t,
30-
std::string_view>;
32+
std::pair<std::string_view, size_t>>;
3133
} // namespace clp::ffi::ir_stream::search
3234

3335
#endif // CLP_FFI_IR_STREAM_SEARCH_NEWPROJECTEDSCHEMATREENODECALLBACKREQ_HPP

components/core/src/clp/ffi/ir_stream/search/QueryHandler.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ class QueryHandler {
3232
public:
3333
// Factory function
3434
/**
35-
* @param new_projected_schema_tree_node_callback
35+
* @param new_projected_schema_tree_node_callback Callback for newly resolved projections.
36+
* @param query The search query.
37+
* @param projections The columns to project.
38+
* @param case_sensitive_match Whether to use case-sensitive match for string comparison.
39+
* @param allow_duplicate_projected_columns Whether to allow duplicate projected columns.
3640
* @return A result containing the newly constructed `QueryHandler` on success, or an error code
3741
* indicating the failure:
3842
* - Forwards `QueryHandlerImpl::create`'s return values.
@@ -42,15 +46,17 @@ class QueryHandler {
4246
std::shared_ptr<clp_s::search::ast::Expression> query,
4347
std::vector<std::pair<std::string, clp_s::search::ast::literal_type_bitmask_t>> const&
4448
projections,
45-
bool case_sensitive_match
49+
bool case_sensitive_match,
50+
bool allow_duplicate_projected_columns = false
4651
) -> ystdlib::error_handling::Result<QueryHandler> {
4752
return QueryHandler{
4853
new_projected_schema_tree_node_callback,
4954
YSTDLIB_ERROR_HANDLING_TRYX(
5055
QueryHandlerImpl::create(
5156
std::move(query),
5257
projections,
53-
case_sensitive_match
58+
case_sensitive_match,
59+
allow_duplicate_projected_columns
5460
)
5561
)
5662
};

components/core/src/clp/ffi/ir_stream/search/QueryHandlerImpl.cpp

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "QueryHandlerImpl.hpp"
22

3+
#include <cstddef>
34
#include <exception>
45
#include <memory>
56
#include <optional>
@@ -54,6 +55,7 @@ using clp_s::search::ast::literal_type_bitmask_t;
5455
/**
5556
* Creates column descriptors and column-to-original-key map from the given projections.
5657
* @param projections
58+
* @param allow_duplicate_projected_columns Whether to allow duplicate projected columns.
5759
* @return A result containing a pair or an error code indicating the failure:
5860
* - The pair:
5961
* - A vector of projected columns.
@@ -65,7 +67,8 @@ using clp_s::search::ast::literal_type_bitmask_t;
6567
* descriptor for the projected key.
6668
*/
6769
[[nodiscard]] auto create_projected_columns_and_projection_map(
68-
std::vector<std::pair<std::string, literal_type_bitmask_t>> const& projections
70+
std::vector<std::pair<std::string, literal_type_bitmask_t>> const& projections,
71+
bool allow_duplicate_projected_columns
6972
)
7073
-> ystdlib::error_handling::Result<std::pair<
7174
std::vector<std::shared_ptr<ColumnDescriptor>>,
@@ -74,7 +77,7 @@ using clp_s::search::ast::literal_type_bitmask_t;
7477
/**
7578
* Creates initial partial resolutions for the given query and projections.
7679
* @param query
77-
* @param projected_column_to_original_key
80+
* @param projected_column_to_original_key_and_index
7881
* @return A result containing a pair or an error code indicating the failure:
7982
* - The pair:
8083
* - The partial resolution for auto-generated keys.
@@ -86,7 +89,7 @@ using clp_s::search::ast::literal_type_bitmask_t;
8689
*/
8790
[[nodiscard]] auto create_initial_partial_resolutions(
8891
std::shared_ptr<Expression> const& query,
89-
QueryHandlerImpl::ProjectionMap const& projected_column_to_original_key
92+
QueryHandlerImpl::ProjectionMap const& projected_column_to_original_key_and_index
9093
)
9194
-> ystdlib::error_handling::Result<std::pair<
9295
QueryHandlerImpl::PartialResolutionMap,
@@ -182,20 +185,24 @@ auto preprocess_query(std::shared_ptr<Expression> query)
182185
}
183186

184187
auto create_projected_columns_and_projection_map(
185-
std::vector<std::pair<std::string, literal_type_bitmask_t>> const& projections
188+
std::vector<std::pair<std::string, literal_type_bitmask_t>> const& projections,
189+
bool allow_duplicate_projected_columns
186190
)
187191
-> ystdlib::error_handling::Result<std::pair<
188192
std::vector<std::shared_ptr<ColumnDescriptor>>,
189193
QueryHandlerImpl::ProjectionMap>> {
190194
std::unordered_set<std::string_view> unique_projected_columns;
191195
std::vector<std::shared_ptr<ColumnDescriptor>> projected_columns;
192-
QueryHandlerImpl::ProjectionMap projected_column_to_original_key;
196+
QueryHandlerImpl::ProjectionMap projected_column_to_original_key_and_index;
193197

194-
for (auto const& [key, types] : projections) {
195-
if (unique_projected_columns.contains(key)) {
196-
return ErrorCode{ErrorCodeEnum::DuplicateProjectedColumn};
198+
for (size_t index{0}; index < projections.size(); ++index) {
199+
auto const& [key, types] = projections[index];
200+
if (false == allow_duplicate_projected_columns) {
201+
if (unique_projected_columns.contains(key)) {
202+
return ErrorCode{ErrorCodeEnum::DuplicateProjectedColumn};
203+
}
204+
unique_projected_columns.emplace(key);
197205
}
198-
unique_projected_columns.emplace(key);
199206

200207
std::vector<std::string> descriptor_tokens;
201208
std::string descriptor_namespace;
@@ -220,26 +227,29 @@ auto create_projected_columns_and_projection_map(
220227
{
221228
return ErrorCode{ErrorCodeEnum::ProjectionColumnDescriptorCreationFailure};
222229
}
223-
projected_column_to_original_key.emplace(column_descriptor.get(), key);
230+
projected_column_to_original_key_and_index.emplace(
231+
column_descriptor.get(),
232+
std::make_pair(key, index)
233+
);
224234
projected_columns.emplace_back(std::move(column_descriptor));
225235
} catch (std::exception const& e) {
226236
return ErrorCode{ErrorCodeEnum::ProjectionColumnDescriptorCreationFailure};
227237
}
228238
}
229239

230-
return {std::move(projected_columns), std::move(projected_column_to_original_key)};
240+
return {std::move(projected_columns), std::move(projected_column_to_original_key_and_index)};
231241
}
232242

233243
auto create_initial_partial_resolutions(
234244
std::shared_ptr<Expression> const& query,
235-
QueryHandlerImpl::ProjectionMap const& projected_column_to_original_key
245+
QueryHandlerImpl::ProjectionMap const& projected_column_to_original_key_and_index
236246
)
237247
-> ystdlib::error_handling::Result<std::pair<
238248
QueryHandlerImpl::PartialResolutionMap,
239249
QueryHandlerImpl::PartialResolutionMap>> {
240250
QueryHandlerImpl::PartialResolutionMap auto_gen_namespace_partial_resolutions;
241251
QueryHandlerImpl::PartialResolutionMap user_gen_namespace_partial_resolutions;
242-
for (auto const& [col, key] : projected_column_to_original_key) {
252+
for (auto const& [col, _] : projected_column_to_original_key_and_index) {
243253
auto const optional_is_auto_gen{is_auto_generated(col->get_namespace())};
244254
if (false == optional_is_auto_gen.has_value()) {
245255
// Ignore unrecognized namespace
@@ -408,22 +418,27 @@ auto evaluate_wildcard_filter(
408418
auto QueryHandlerImpl::create(
409419
std::shared_ptr<Expression> query,
410420
std::vector<std::pair<std::string, literal_type_bitmask_t>> const& projections,
411-
bool case_sensitive_match
421+
bool case_sensitive_match,
422+
bool allow_duplicate_projected_columns
412423
) -> ystdlib::error_handling::Result<QueryHandlerImpl> {
413424
query = YSTDLIB_ERROR_HANDLING_TRYX(preprocess_query(query));
414-
auto [projected_columns, projected_column_to_original_key]
415-
= YSTDLIB_ERROR_HANDLING_TRYX(create_projected_columns_and_projection_map(projections));
425+
auto [projected_columns, projected_column_to_original_key_and_index]
426+
= YSTDLIB_ERROR_HANDLING_TRYX(create_projected_columns_and_projection_map(
427+
projections,
428+
allow_duplicate_projected_columns
429+
));
416430
auto [auto_gen_namespace_partial_resolutions, user_gen_namespace_partial_resolutions]
417-
= YSTDLIB_ERROR_HANDLING_TRYX(
418-
create_initial_partial_resolutions(query, projected_column_to_original_key)
419-
);
431+
= YSTDLIB_ERROR_HANDLING_TRYX(create_initial_partial_resolutions(
432+
query,
433+
projected_column_to_original_key_and_index
434+
));
420435

421436
return QueryHandlerImpl{
422437
std::move(query),
423438
std::move(auto_gen_namespace_partial_resolutions),
424439
std::move(user_gen_namespace_partial_resolutions),
425440
std::move(projected_columns),
426-
std::move(projected_column_to_original_key),
441+
std::move(projected_column_to_original_key_and_index),
427442
case_sensitive_match
428443
};
429444
}

components/core/src/clp/ffi/ir_stream/search/QueryHandlerImpl.hpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef CLP_FFI_IR_STREAM_SEARCH_QUERYHANDLERIMPL_HPP
22
#define CLP_FFI_IR_STREAM_SEARCH_QUERYHANDLERIMPL_HPP
33

4+
#include <cstddef>
45
#include <memory>
56
#include <optional>
67
#include <string>
@@ -117,7 +118,8 @@ class QueryHandlerImpl {
117118
clp_s::search::ast::DescriptorList::iterator m_next_token_it;
118119
};
119120

120-
using ProjectionMap = std::unordered_map<clp_s::search::ast::ColumnDescriptor*, std::string>;
121+
using ProjectionMap = std::
122+
unordered_map<clp_s::search::ast::ColumnDescriptor*, std::pair<std::string, size_t>>;
121123

122124
using PartialResolutionMap = std::
123125
unordered_map<SchemaTree::Node::id_t, std::vector<ColumnDescriptorTokenIterator>>;
@@ -127,6 +129,7 @@ class QueryHandlerImpl {
127129
* @param query The search query.
128130
* @param projections The columns to project.
129131
* @param case_sensitive_match Whether to use case-sensitive match for string comparison.
132+
* @param allow_duplicate_projected_columns Whether to allow duplicate projected columns.
130133
* @return A result containing the newly constructed `QueryHandler` on success, or an error code
131134
* indicating the failure:
132135
* - Forwards `preprocess_query`'s return values.
@@ -137,7 +140,8 @@ class QueryHandlerImpl {
137140
std::shared_ptr<clp_s::search::ast::Expression> query,
138141
std::vector<std::pair<std::string, clp_s::search::ast::literal_type_bitmask_t>> const&
139142
projections,
140-
bool case_sensitive_match
143+
bool case_sensitive_match,
144+
bool allow_duplicate_projected_columns
141145
) -> ystdlib::error_handling::Result<QueryHandlerImpl>;
142146

143147
// Delete copy constructor and assignment operator
@@ -292,7 +296,7 @@ class QueryHandlerImpl {
292296
PartialResolutionMap auto_gen_namespace_partial_resolutions,
293297
PartialResolutionMap user_gen_namespace_partial_resolutions,
294298
std::vector<std::shared_ptr<clp_s::search::ast::ColumnDescriptor>> projected_columns,
295-
ProjectionMap projected_column_to_original_key,
299+
ProjectionMap projected_column_to_original_key_and_index,
296300
bool case_sensitive_match
297301
)
298302
: m_query{std::move(query)},
@@ -306,7 +310,9 @@ class QueryHandlerImpl {
306310
std::move(user_gen_namespace_partial_resolutions)
307311
},
308312
m_projected_columns{std::move(projected_columns)},
309-
m_projected_column_to_original_key{std::move(projected_column_to_original_key)},
313+
m_projected_column_to_original_key_and_index{
314+
std::move(projected_column_to_original_key_and_index)
315+
},
310316
m_case_sensitive_match{case_sensitive_match} {}
311317

312318
// Methods
@@ -388,7 +394,7 @@ class QueryHandlerImpl {
388394
std::unordered_set<SchemaTree::Node::id_t>>
389395
m_resolved_column_to_schema_tree_node_ids;
390396
std::vector<std::shared_ptr<clp_s::search::ast::ColumnDescriptor>> m_projected_columns;
391-
ProjectionMap m_projected_column_to_original_key;
397+
ProjectionMap m_projected_column_to_original_key_and_index;
392398
bool m_case_sensitive_match;
393399
std::vector<std::pair<AstExprIterator, ast_evaluation_result_bitmask_t>> m_ast_dfs_stack;
394400
};
@@ -492,11 +498,13 @@ auto QueryHandlerImpl::handle_column_resolution_on_new_schema_tree_node(
492498
}
493499

494500
auto* col{token_it.get_column_descriptor()};
495-
if (m_projected_column_to_original_key.contains(col)) {
501+
auto const original_key_and_index_it = m_projected_column_to_original_key_and_index.find(col);
502+
if (m_projected_column_to_original_key_and_index.end() != original_key_and_index_it) {
503+
auto const& [original_key, projected_index] = original_key_and_index_it->second;
496504
YSTDLIB_ERROR_HANDLING_TRYV(new_projected_schema_tree_node_callback(
497505
is_auto_generated,
498506
node_id,
499-
m_projected_column_to_original_key.at(col)
507+
std::make_pair(original_key, projected_index)
500508
));
501509
return ystdlib::error_handling::success();
502510
}

components/core/src/clp/ffi/ir_stream/search/test/test_QueryHandlerImpl.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <cstddef>
12
#include <map>
23
#include <memory>
34
#include <set>
@@ -410,7 +411,7 @@ TEST_CASE(
410411
auto query_stream{std::istringstream{kql_query_str}};
411412
auto query{clp_s::search::kql::parse_kql_expression(query_stream)};
412413

413-
auto query_handler_impl_result{QueryHandlerImpl::create(query, {}, true)};
414+
auto query_handler_impl_result{QueryHandlerImpl::create(query, {}, true, false)};
414415
REQUIRE_FALSE(query_handler_impl_result.has_error());
415416
auto& query_handler_impl{query_handler_impl_result.value()};
416417

@@ -521,19 +522,21 @@ TEST_CASE("query_handler_handle_projection", "[ffi][ir_stream][search][QueryHand
521522
unresolvable_projections_from_unrecognized_namespaces.cend()
522523
);
523524

524-
auto query_handler_impl_result{QueryHandlerImpl::create(null_query, projections, true)};
525+
auto query_handler_impl_result{QueryHandlerImpl::create(null_query, projections, true, false)};
525526
REQUIRE_FALSE(query_handler_impl_result.has_error());
526527
auto& query_handler_impl{query_handler_impl_result.value()};
527528

528529
SchemaTree::Node::id_t node_id{1};
529530
std::map<std::string, std::set<SchemaTree::Node::id_t>> actual_resolved_projections;
530531
auto new_projected_schema_tree_node_callback
531-
= [&](bool is_auto_gen,
532-
SchemaTree::Node::id_t node_id,
533-
std::string_view key) -> ystdlib::error_handling::Result<void> {
532+
= [&](
533+
bool is_auto_gen,
534+
SchemaTree::Node::id_t node_id,
535+
std::pair<std::string_view, size_t> key_and_index
536+
) -> ystdlib::error_handling::Result<void> {
534537
REQUIRE((is_auto_generated == is_auto_gen));
535538
auto [column_it, column_inserted] = actual_resolved_projections.try_emplace(
536-
std::string{key},
539+
std::string{key_and_index.first},
537540
std::set<SchemaTree::Node::id_t>{}
538541
);
539542
auto [node_id_it, node_id_inserted] = column_it->second.emplace(node_id);
@@ -615,7 +618,7 @@ TEST_CASE("query_handler_evaluation_kv_pair_log_event", "[ffi][ir_stream][search
615618
auto query{clp_s::search::kql::parse_kql_expression(query_stream)};
616619
REQUIRE((nullptr != query));
617620

618-
auto query_handler_impl_result{QueryHandlerImpl::create(query, {}, true)};
621+
auto query_handler_impl_result{QueryHandlerImpl::create(query, {}, true, false)};
619622
REQUIRE_FALSE(query_handler_impl_result.has_error());
620623
auto& query_handler_impl{query_handler_impl_result.value()};
621624

components/core/src/clp/ffi/ir_stream/search/test/utils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ auto ColumnQueryPossibleMatches::serialize() const -> std::string {
6767
auto trivial_new_projected_schema_tree_node_callback(
6868
[[maybe_unused]] bool is_auto_generated,
6969
[[maybe_unused]] SchemaTree::Node::id_t node_id,
70-
[[maybe_unused]] std::string_view projected_key_path
70+
[[maybe_unused]] std::pair<std::string_view, size_t> projected_key_path_and_index
7171
) -> ystdlib::error_handling::Result<void> {
7272
return ystdlib::error_handling::success();
7373
}

components/core/src/clp/ffi/ir_stream/search/test/utils.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ class ColumnQueryPossibleMatches {
7474
* doing anything.
7575
* @param is_auto_generated
7676
* @param node_id
77-
* @param projected_key_path
77+
* @param projected_key_path_and_index
7878
* @return A void result.
7979
*/
8080
[[nodiscard]] auto trivial_new_projected_schema_tree_node_callback(
8181
bool is_auto_generated,
8282
SchemaTree::Node::id_t node_id,
83-
std::string_view projected_key_path
83+
std::pair<std::string_view, size_t> projected_key_path_and_index
8484
) -> ystdlib::error_handling::Result<void>;
8585

8686
/**

components/core/src/clp_s/kv_ir_search.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ auto deserialize_and_search_kv_ir_stream(
200200
auto trivial_new_projected_schema_tree_node_callback
201201
= []([[maybe_unused]] bool is_auto_generated,
202202
[[maybe_unused]] SchemaTree::Node::id_t node_id,
203-
[[maybe_unused]] std::string_view projected_key_path)
203+
[[maybe_unused]] std::pair<std::string_view, size_t> projected_key_and_index)
204204
-> ystdlib::error_handling::Result<void> { return ystdlib::error_handling::success(); };
205205
using QueryHandlerType = clp::ffi::ir_stream::search::QueryHandler<
206206
decltype(trivial_new_projected_schema_tree_node_callback)>;

0 commit comments

Comments
 (0)