Skip to content

Commit c60dcb3

Browse files
cql3: implement metadata::calculate_metadata_id()
CQLv5 introduced metadata_id, which is a checksum computed from column names and types, to track schema changes in prepared statements. This commit introduces calculate_metadata_id to compute such id for given metadata. Please note that calculate_metadata_id() produces different hashes than Cassandra's computeResultMetadataId(). We use SHA256 truncated to 128 bits instead of MD5. There are also two smaller technical differences: calculate_metadata_id() doesn't add unneeded zeros and it adds a length of a string when an sstring is being fed to the hasher. The difference is intentional because MD5 has known vulnerabilities, moreover we don't want to introduce any dependency between our metadata_id and Cassandra's. This change: - Add cql_metadata_id_type - Implement metadata::calculate_metadata_id() - Add boost tests to confirm correctness of the function
1 parent 7646e14 commit c60dcb3

File tree

4 files changed

+171
-0
lines changed

4 files changed

+171
-0
lines changed

cql3/result_set.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
99
*/
1010

11+
#include <cstdint>
1112
#include "utils/assert.hh"
13+
#include "utils/hashers.hh"
1214
#include "cql3/result_set.hh"
1315

1416
namespace cql3 {
@@ -71,6 +73,18 @@ lw_shared_ptr<const service::pager::paging_state> metadata::paging_state() const
7173
return _paging_state;
7274
}
7375

76+
// Metadata_id is a checksum computed from given metadata to track schema changes in prepared statements.
77+
// Originally introduced in CQLv5.
78+
cql3::cql_metadata_id_type metadata::calculate_metadata_id() const {
79+
auto h = sha256_hasher();
80+
for (uint32_t i = 0; i < _column_info->_column_count; ++i) {
81+
feed_hash(h, _column_info->_names[i]->name->name());
82+
feed_hash(h, _column_info->_names[i]->type->name());
83+
}
84+
// Return first 16 bytes to have the same length as Cassandra's MD5
85+
return cql_metadata_id_type(h.finalize().substr(0, 16));
86+
}
87+
7488
prepared_metadata::prepared_metadata(const std::vector<lw_shared_ptr<column_specification>>& names,
7589
const std::vector<uint16_t>& partition_key_bind_indices,
7690
bool is_conditional)

cql3/result_set.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#pragma once
1212

1313
#include <vector>
14+
#include "cql3/statements/prepared_statement.hh"
1415
#include "utils/chunked_vector.hh"
1516
#include "enum_set.hh"
1617
#include "service/pager/paging_state.hh"
@@ -89,6 +90,8 @@ public:
8990
const std::vector<lw_shared_ptr<column_specification>>& get_names() const {
9091
return _column_info->_names;
9192
}
93+
94+
cql3::cql_metadata_id_type calculate_metadata_id() const;
9295
};
9396

9497
::shared_ptr<const cql3::metadata> make_empty_metadata();

cql3/statements/prepared_statement.hh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,21 @@
1818
#include <vector>
1919

2020
#include "exceptions/exceptions.hh"
21+
#include "types/types.hh"
2122

2223
namespace cql3 {
2324

2425
class prepare_context;
2526
class column_specification;
2627
class cql_statement;
2728

29+
struct cql_metadata_id_type {
30+
explicit cql_metadata_id_type(bytes&& metadata_id) : _metadata_id(std::move(metadata_id)) {}
31+
bytes _metadata_id;
32+
33+
bool operator==(const cql_metadata_id_type& other) const = default;
34+
};
35+
2836
namespace statements {
2937

3038
struct invalidated_prepared_usage_attempt {
@@ -61,3 +69,10 @@ public:
6169
}
6270

6371
}
72+
73+
template <> struct fmt::formatter<cql3::cql_metadata_id_type> {
74+
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
75+
auto format(const cql3::cql_metadata_id_type& m, fmt::format_context& ctx) const {
76+
return fmt::format_to(ctx.out(), "{}", m._metadata_id);
77+
}
78+
};

test/boost/schema_change_test.cc

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
*/
88

99

10+
#include "bytes.hh"
11+
#include <boost/intrusive/detail/hook_traits.hpp>
1012
#include <iostream>
1113
#include <fmt/ranges.h>
1214
#include <seastar/core/thread.hh>
15+
#include <utility>
1316
#undef SEASTAR_TESTING_MAIN
1417
#include <seastar/testing/test_case.hh>
1518
#include <seastar/util/defer.hh>
@@ -30,6 +33,7 @@
3033
#include "test/lib/exception_utils.hh"
3134
#include "test/lib/log.hh"
3235
#include "cdc/cdc_extension.hh"
36+
#include "test/lib/test_utils.hh"
3337

3438
BOOST_AUTO_TEST_SUITE(schema_change_test)
3539

@@ -1160,4 +1164,139 @@ SEASTAR_TEST_CASE(test_system_schema_version_is_stable) {
11601164
});
11611165
}
11621166

1167+
// The purpose of this check is to make sure that we don't accidentally change the metadata_id.
1168+
// The metadata_id should be stable to avoid ping-pong when driver connects to a mixed cluster.
1169+
void verify_metadata_id_is_stable(cql3::cql_metadata_id_type metadata_id, sstring known_hash) {
1170+
BOOST_REQUIRE_EQUAL(metadata_id._metadata_id, from_hex(known_hash));
1171+
}
1172+
1173+
BOOST_AUTO_TEST_CASE(metadata_id_from_empty_metadata) {
1174+
auto m = cql3::metadata{std::vector<lw_shared_ptr<cql3::column_specification>>{}};
1175+
auto metadata_id = m.calculate_metadata_id();
1176+
BOOST_REQUIRE_EQUAL(metadata_id._metadata_id.size(), 16);
1177+
verify_metadata_id_is_stable(metadata_id, "e3b0c44298fc1c149afbf4c8996fb924");
1178+
}
1179+
1180+
cql3::cql_metadata_id_type compute_metadata_id(std::vector<std::pair<sstring, shared_ptr<const abstract_type>>> columns, sstring ks = "ks", sstring cf = "cf") {
1181+
std::vector<lw_shared_ptr<cql3::column_specification>> columns_specification;
1182+
for (const auto& column : columns) {
1183+
columns_specification.push_back(make_lw_shared(cql3::column_specification(ks, cf, make_shared<cql3::column_identifier>(column.first, false), column.second)));
1184+
}
1185+
return cql3::metadata{columns_specification}.calculate_metadata_id();
1186+
}
1187+
1188+
BOOST_AUTO_TEST_CASE(metadata_id_with_different_keyspace_and_table) {
1189+
const auto c = std::make_pair("id", uuid_type);
1190+
auto h1 = compute_metadata_id({c}, "ks1", "cf1");
1191+
auto h2 = compute_metadata_id({c}, "ks2", "cf2");
1192+
1193+
BOOST_REQUIRE_EQUAL(h1, h2);
1194+
verify_metadata_id_is_stable(h1, "d0c38eb409a57bb14497c35b80dfaaf1");
1195+
}
1196+
1197+
BOOST_AUTO_TEST_CASE(metadata_id_with_different_column_name) {
1198+
auto h1 = compute_metadata_id({{"id", uuid_type}});
1199+
auto h2 = compute_metadata_id({{"id2", uuid_type}});
1200+
1201+
BOOST_REQUIRE_NE(h1, h2);
1202+
verify_metadata_id_is_stable(h1, "d0c38eb409a57bb14497c35b80dfaaf1");
1203+
verify_metadata_id_is_stable(h2, "ae0bc2741d0480f0ebf4ee18a9bca7c7");
1204+
}
1205+
1206+
BOOST_AUTO_TEST_CASE(metadata_id_with_different_column_type) {
1207+
const auto column_name = "id";
1208+
auto h1 = compute_metadata_id({{column_name, uuid_type}});
1209+
auto h2 = compute_metadata_id({{column_name, int32_type}});
1210+
1211+
BOOST_REQUIRE_NE(h1, h2);
1212+
verify_metadata_id_is_stable(h1, "d0c38eb409a57bb14497c35b80dfaaf1");
1213+
verify_metadata_id_is_stable(h2, "b62d95c978e2e2498100ad8d20979868");
1214+
}
1215+
1216+
BOOST_AUTO_TEST_CASE(metadata_id_with_different_column_number) {
1217+
const auto c1 = std::make_pair("val1", int32_type);
1218+
const auto c2 = std::make_pair("val2", int32_type);
1219+
auto h1 = compute_metadata_id({c1});
1220+
auto h2 = compute_metadata_id({c1, c2});
1221+
1222+
BOOST_REQUIRE_NE(h1, h2);
1223+
verify_metadata_id_is_stable(h1, "f38171ab2b2e4d98e3f76a4640de5b32");
1224+
verify_metadata_id_is_stable(h2, "31c5cb5d0d41fbc426266248cc37941a");
1225+
}
1226+
1227+
BOOST_AUTO_TEST_CASE(metadata_id_with_different_column_order) {
1228+
const auto c1 = std::make_pair("val1", int32_type);
1229+
const auto c2 = std::make_pair("val2", int32_type);
1230+
auto h1 = compute_metadata_id({c1, c2});
1231+
auto h2 = compute_metadata_id({c2, c1});
1232+
1233+
BOOST_REQUIRE_NE(h1, h2);
1234+
verify_metadata_id_is_stable(h1, "31c5cb5d0d41fbc426266248cc37941a");
1235+
verify_metadata_id_is_stable(h2, "b52512f2b76d3e0695dcaf7b0a71efac");
1236+
}
1237+
1238+
BOOST_AUTO_TEST_CASE(metadata_id_with_udt) {
1239+
1240+
auto compute_metadata_id_for_type = [&](
1241+
const std::vector<bytes>& names,
1242+
const std::vector<data_type>& types,
1243+
const char* udt_name = "udt_name",
1244+
const bool multi_cell = true) {
1245+
BOOST_REQUIRE_EQUAL(names.size(), types.size());
1246+
return compute_metadata_id({{
1247+
"val1",
1248+
user_type_impl::get_instance("ks", udt_name, names, types, multi_cell)}}
1249+
);
1250+
};
1251+
1252+
auto h1 = compute_metadata_id_for_type({"f1"}, {int32_type});
1253+
1254+
// Different field number
1255+
auto h2 = compute_metadata_id_for_type({"f1", "f2"}, {int32_type, int32_type});
1256+
BOOST_REQUIRE_NE(h1, h2);
1257+
1258+
// Different field name
1259+
auto h3 = compute_metadata_id_for_type({"f2"}, {int32_type});
1260+
BOOST_REQUIRE_NE(h1, h3);
1261+
1262+
// Different field type
1263+
auto h4 = compute_metadata_id_for_type({"f1"}, {float_type});
1264+
BOOST_REQUIRE_NE(h1, h4);
1265+
1266+
// Different UDT name
1267+
auto h5 = compute_metadata_id_for_type({"f1"}, {int32_type}, "different_udt_name");
1268+
BOOST_REQUIRE_NE(h1, h5);
1269+
1270+
// False multi_cell mark
1271+
auto h6 = compute_metadata_id_for_type({"f1"}, {int32_type}, "udt_name", false);
1272+
BOOST_REQUIRE_NE(h1, h6);
1273+
1274+
verify_metadata_id_is_stable(h1, "9e556a9632191ac829c961c94719073a");
1275+
verify_metadata_id_is_stable(h2, "f0a58cd95fed3009b67ff6b4bda1fae1");
1276+
verify_metadata_id_is_stable(h3, "6a99234baebad33d9b9081cbdef9cd8b");
1277+
verify_metadata_id_is_stable(h4, "72780d64c71ec0265bb48194ec5b0f75");
1278+
verify_metadata_id_is_stable(h5, "767b01cdb5a61f90af9d824338de40e9");
1279+
verify_metadata_id_is_stable(h6, "02f16bdc4b235791a44983fe56618006");
1280+
}
1281+
1282+
cql3::cql_metadata_id_type get_metadata_id(cql_test_env& e, sstring const& table) {
1283+
auto msg = e.execute_cql(format("SELECT * FROM {};", table)).get();
1284+
auto rows = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
1285+
return rows->rs().get_metadata().calculate_metadata_id();
1286+
}
1287+
1288+
SEASTAR_TEST_CASE(metadata_id_unchanged) {
1289+
return do_with_cql_env_thread([] (cql_test_env& e) {
1290+
cquery_nofail(e, "CREATE TABLE t(p int PRIMARY KEY, c1 int)");
1291+
cquery_nofail(e, "INSERT INTO t(p, c1) VALUES (0, 0)");
1292+
const auto initial_metadata_id = get_metadata_id(e, "t");
1293+
1294+
cquery_nofail(e, "ALTER TABLE t ADD (c2 int)");
1295+
BOOST_REQUIRE_NE(initial_metadata_id, get_metadata_id(e, "t"));
1296+
1297+
cquery_nofail(e, "ALTER TABLE t DROP c2");
1298+
BOOST_REQUIRE_EQUAL(initial_metadata_id, get_metadata_id(e, "t"));
1299+
});
1300+
}
1301+
11631302
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)