Skip to content

Commit 7603af9

Browse files
feat: Support MSGPACK_OBJECT_ARRAY for hashes (#4167)
Signed-off-by: Julien Jerphanion <git@jjerphan.xyz> Co-authored-by: Johan Mabille <johan.mabille@gmail.com>
1 parent 19af326 commit 7603af9

File tree

4 files changed

+1447
-26
lines changed

4 files changed

+1447
-26
lines changed

libmamba/src/core/shards.cpp

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,15 @@ namespace mamba
9090

9191
/**
9292
* Extract a hash value (sha256 or md5) from a msgpack object.
93-
* Handles string, binary, and extension types, converting bytes to hex strings.
93+
* Handles string, binary, extension, and array types, converting bytes to hex strings.
94+
*
95+
* @param obj The msgpack object containing the hash
96+
* @param field_name The name of the field (for error messages)
97+
* @return The hash as a hex string, or empty string if parsing fails
9498
*/
95-
auto msgpack_object_to_hash_string(const msgpack_object& obj) -> std::string
99+
auto
100+
msgpack_object_to_hash_string(const msgpack_object& obj, const std::string& field_name = "")
101+
-> std::string
96102
{
97103
if (obj.type == MSGPACK_OBJECT_STR)
98104
{
@@ -114,16 +120,56 @@ namespace mamba
114120
reinterpret_cast<const std::byte*>(obj.via.ext.ptr + obj.via.ext.size)
115121
);
116122
}
123+
else if (obj.type == MSGPACK_OBJECT_ARRAY)
124+
{
125+
// Handle array type - array of positive integers (bytes)
126+
if (obj.via.array.size == 0)
127+
{
128+
return std::string();
129+
}
130+
131+
// Array of bytes (positive integers) - convert to hex string
132+
std::vector<std::byte> bytes;
133+
bytes.reserve(obj.via.array.size);
134+
for (std::uint32_t i = 0; i < obj.via.array.size; ++i)
135+
{
136+
const msgpack_object& elem = obj.via.array.ptr[i];
137+
if (elem.type == MSGPACK_OBJECT_POSITIVE_INTEGER)
138+
{
139+
bytes.push_back(static_cast<std::byte>(elem.via.u64 & 0xFF));
140+
}
141+
else
142+
{
143+
LOG_WARNING
144+
<< "Array element " << i << " in " << field_name
145+
<< " is not a positive integer (type: " << static_cast<int>(elem.type)
146+
<< "), cannot convert to bytes";
147+
return std::string();
148+
}
149+
}
150+
return util::bytes_to_hex_str(bytes.data(), bytes.data() + bytes.size());
151+
}
152+
else if (obj.type == MSGPACK_OBJECT_NIL)
153+
{
154+
// Nil is allowed for optional fields, return empty string
155+
return std::string();
156+
}
117157
else
118158
{
119-
// Try to convert as string
159+
// Try to convert as string for other types (e.g., positive/negative integers)
120160
try
121161
{
122162
return msgpack_object_to_string(obj);
123163
}
124-
catch (...)
164+
catch (const std::exception& e)
125165
{
126-
// Return empty string if conversion fails
166+
std::string error_msg = "Failed to parse "
167+
+ (field_name.empty() ? "hash" : field_name)
168+
+ " from msgpack: unexpected type "
169+
+ std::to_string(static_cast<int>(obj.type));
170+
LOG_ERROR << error_msg << ": " << e.what();
171+
// Return empty string - validation will check that at least one checksum is
172+
// present
127173
return std::string();
128174
}
129175
}
@@ -191,11 +237,19 @@ namespace mamba
191237
}
192238
else if (key == "sha256")
193239
{
194-
record.sha256 = msgpack_object_to_hash_string(val_obj);
240+
std::string hash = msgpack_object_to_hash_string(val_obj, "sha256");
241+
if (!hash.empty())
242+
{
243+
record.sha256 = hash;
244+
}
195245
}
196246
else if (key == "md5")
197247
{
198-
record.md5 = msgpack_object_to_hash_string(val_obj);
248+
std::string hash = msgpack_object_to_hash_string(val_obj, "md5");
249+
if (!hash.empty())
250+
{
251+
record.md5 = hash;
252+
}
199253
}
200254
else if (key == "depends")
201255
{
@@ -263,6 +317,17 @@ namespace mamba
263317
}
264318
}
265319

320+
// Validate that at least one checksum (md5 or sha256) is present
321+
// Shards must have checksums for package verification
322+
if (!record.sha256.has_value() && !record.md5.has_value())
323+
{
324+
throw std::runtime_error(
325+
"Shard package record for '" + record.name
326+
+ "' is missing both md5 and sha256 checksums. "
327+
+ "At least one checksum is required."
328+
);
329+
}
330+
266331
return record;
267332
}
268333
}

libmamba/tests/include/test_shard_utils.hpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,30 @@ namespace mambatests
128128
const std::map<std::string, std::vector<std::uint8_t>>& shards
129129
) -> std::vector<std::uint8_t>;
130130

131+
/**
132+
* Hash format enum for specifying how to encode md5/sha256 in msgpack.
133+
*/
134+
enum class HashFormat
135+
{
136+
String, ///< As a string
137+
Bytes, ///< As binary data (BIN type)
138+
ArrayBytes ///< As an array of positive integers (bytes)
139+
};
140+
131141
/**
132142
* Create a shard package record msgpack.
133143
*
134144
* @param name Package name
135145
* @param version Package version
136146
* @param build Build string
137147
* @param build_number Build number
138-
* @param sha256 SHA256 hash (optional, can be string or bytes)
139-
* @param md5 MD5 hash (optional, can be string or bytes)
148+
* @param sha256 SHA256 hash (optional, can be string, bytes, or array)
149+
* @param md5 MD5 hash (optional, can be string, bytes, or array)
140150
* @param depends Dependencies
141151
* @param constrains Constraints
142152
* @param noarch Noarch type
153+
* @param sha256_format Format for sha256 encoding
154+
* @param md5_format Format for md5 encoding
143155
* @return Serialized msgpack data
144156
*/
145157
auto create_shard_package_record_msgpack(
@@ -152,8 +164,8 @@ namespace mambatests
152164
const std::vector<std::string>& depends = {},
153165
const std::vector<std::string>& constrains = {},
154166
const std::optional<std::string>& noarch = std::nullopt,
155-
bool sha256_as_bytes = false,
156-
bool md5_as_bytes = false
167+
HashFormat sha256_format = HashFormat::String,
168+
HashFormat md5_format = HashFormat::String
157169
) -> std::vector<std::uint8_t>;
158170
}
159171
}

libmamba/tests/src/core/test_shard_utils.cpp

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,8 @@ namespace mambatests
323323
const std::vector<std::string>& depends,
324324
const std::vector<std::string>& constrains,
325325
const std::optional<std::string>& noarch,
326-
bool sha256_as_bytes,
327-
bool md5_as_bytes
326+
HashFormat sha256_format,
327+
HashFormat md5_format
328328
) -> std::vector<std::uint8_t>
329329
{
330330
msgpack_sbuffer sbuf;
@@ -383,11 +383,12 @@ namespace mambatests
383383
// sha256 (optional)
384384
if (sha256.has_value())
385385
{
386-
msgpack_pack_str(&pk, 5);
387-
msgpack_pack_str_body(&pk, "sha256", 5);
388-
if (sha256_as_bytes)
386+
msgpack_pack_str(&pk, 6);
387+
msgpack_pack_str_body(&pk, "sha256", 6);
388+
389+
if (sha256_format == HashFormat::Bytes)
389390
{
390-
// Convert hex string to bytes
391+
// Convert hex string to bytes (BIN type)
391392
std::vector<std::uint8_t> hash_bytes;
392393
hash_bytes.reserve(sha256->size() / 2);
393394
for (size_t i = 0; i < sha256->size(); i += 2)
@@ -403,7 +404,29 @@ namespace mambatests
403404
msgpack_pack_bin(&pk, hash_bytes.size());
404405
msgpack_pack_bin_body(&pk, hash_bytes.data(), hash_bytes.size());
405406
}
406-
else
407+
else if (sha256_format == HashFormat::ArrayBytes)
408+
{
409+
// Convert hex string to array of integers (bytes)
410+
std::vector<std::uint8_t> hash_bytes;
411+
hash_bytes.reserve(sha256->size() / 2);
412+
for (size_t i = 0; i < sha256->size(); i += 2)
413+
{
414+
if (i + 1 < sha256->size())
415+
{
416+
std::string byte_str = sha256->substr(i, 2);
417+
hash_bytes.push_back(
418+
static_cast<std::uint8_t>(std::stoul(byte_str, nullptr, 16))
419+
);
420+
}
421+
}
422+
msgpack_pack_array(&pk, hash_bytes.size());
423+
for (const auto& byte : hash_bytes)
424+
{
425+
// Pack as unsigned int (msgpack will encode it efficiently)
426+
msgpack_pack_unsigned_int(&pk, static_cast<unsigned int>(byte));
427+
}
428+
}
429+
else // HashFormat::String (default)
407430
{
408431
msgpack_pack_str(&pk, sha256->size());
409432
msgpack_pack_str_body(&pk, sha256->c_str(), sha256->size());
@@ -415,9 +438,10 @@ namespace mambatests
415438
{
416439
msgpack_pack_str(&pk, 3);
417440
msgpack_pack_str_body(&pk, "md5", 3);
418-
if (md5_as_bytes)
441+
442+
if (md5_format == HashFormat::Bytes)
419443
{
420-
// Convert hex string to bytes
444+
// Convert hex string to bytes (BIN type)
421445
std::vector<std::uint8_t> hash_bytes;
422446
hash_bytes.reserve(md5->size() / 2);
423447
for (size_t i = 0; i < md5->size(); i += 2)
@@ -433,7 +457,29 @@ namespace mambatests
433457
msgpack_pack_bin(&pk, hash_bytes.size());
434458
msgpack_pack_bin_body(&pk, hash_bytes.data(), hash_bytes.size());
435459
}
436-
else
460+
else if (md5_format == HashFormat::ArrayBytes)
461+
{
462+
// Convert hex string to array of integers (bytes)
463+
std::vector<std::uint8_t> hash_bytes;
464+
hash_bytes.reserve(md5->size() / 2);
465+
for (size_t i = 0; i < md5->size(); i += 2)
466+
{
467+
if (i + 1 < md5->size())
468+
{
469+
std::string byte_str = md5->substr(i, 2);
470+
hash_bytes.push_back(
471+
static_cast<std::uint8_t>(std::stoul(byte_str, nullptr, 16))
472+
);
473+
}
474+
}
475+
msgpack_pack_array(&pk, hash_bytes.size());
476+
for (const auto& byte : hash_bytes)
477+
{
478+
// Pack as unsigned int (msgpack will encode it efficiently)
479+
msgpack_pack_unsigned_int(&pk, static_cast<unsigned int>(byte));
480+
}
481+
}
482+
else // HashFormat::String (default)
437483
{
438484
msgpack_pack_str(&pk, md5->size());
439485
msgpack_pack_str_body(&pk, md5->c_str(), md5->size());

0 commit comments

Comments
 (0)