Skip to content

Commit d0348dc

Browse files
Fix (u)int8 implicit char conversion in stringstream (#5445)
This PR fixes the implicit character conversion that occurs when writing a `(u)int8` value to a `stringstream` in the helper function `std::string to_str(const T& value)`. By casting to `(u)int32`, we avoid this conversion. This issue was discovered after encountering a `UnicodeDecodeError` when using `operator<<` in the TileDB-Py API on a TileDB Array schema that contained a `(u)int8` attribute. Both a minimal TileDB-Py reproduction and the original issue involving TileDB-SOMA now work as expected. [sc-61915] --- TYPE: NO_HISTORY | BUG DESC: Fix implicit character conversion in `to_str` by casting (u)int8 to (u)int32. --------- Co-authored-by: Theodore Tsirpanis <[email protected]>
1 parent 74ef793 commit d0348dc

File tree

3 files changed

+69
-2
lines changed

3 files changed

+69
-2
lines changed

tiledb/sm/misc/parse_argument.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,12 @@ std::string to_str(const void* value, Datatype type) {
241241
std::stringstream ss;
242242
switch (type) {
243243
case Datatype::INT8:
244-
ss << *(const int8_t*)value;
244+
// cast to int32 to avoid char conversion to ASCII
245+
ss << static_cast<int32_t>(*(const int8_t*)value);
245246
break;
246247
case Datatype::UINT8:
247-
ss << *(const uint8_t*)value;
248+
// cast to uint32 to avoid char conversion to ASCII
249+
ss << static_cast<uint32_t>(*(const uint8_t*)value);
248250
break;
249251
case Datatype::INT16:
250252
ss << *(const int16_t*)value;

tiledb/sm/misc/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ commence(unit_test misc)
3636
unit_hilbert.cc
3737
unit_integral_type_casts.cc
3838
unit_math.cc
39+
unit_parse_argument.cc
3940
)
4041
conclude(unit_test)
4142

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/**
2+
* @file unit_parse_argument.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2025 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* Tests for useful (global) functions.
31+
*/
32+
33+
#include "catch.hpp"
34+
#include "tiledb/sm/enums/datatype.h"
35+
#include "tiledb/sm/misc/parse_argument.h"
36+
37+
using namespace tiledb::sm::utils::parse;
38+
using namespace tiledb::sm;
39+
40+
TEST_CASE("Test to_str function for integers", "[to_str][integer]") {
41+
int8_t int8_value = -10;
42+
uint8_t uint8_value = 10;
43+
44+
REQUIRE(to_str(&int8_value, Datatype::INT8) == "-10");
45+
REQUIRE(to_str(&uint8_value, Datatype::UINT8) == "10");
46+
47+
int16_t int16_value = -10;
48+
uint16_t uint16_value = 10;
49+
50+
REQUIRE(to_str(&int16_value, Datatype::INT16) == "-10");
51+
REQUIRE(to_str(&uint16_value, Datatype::UINT16) == "10");
52+
53+
int32_t int32_value = -10;
54+
uint32_t uint32_value = 10;
55+
56+
REQUIRE(to_str(&int32_value, Datatype::INT32) == "-10");
57+
REQUIRE(to_str(&uint32_value, Datatype::UINT32) == "10");
58+
59+
int64_t int64_value = -10;
60+
uint64_t uint64_value = 10;
61+
62+
REQUIRE(to_str(&int64_value, Datatype::INT64) == "-10");
63+
REQUIRE(to_str(&uint64_value, Datatype::UINT64) == "10");
64+
}

0 commit comments

Comments
 (0)