Skip to content

Commit 3afccf2

Browse files
authored
Issue 666: fix for writing utf8 strings (#669)
* add utf8 attribute tests and relax blosc tests * update actions * update ubuntu runner * add uft8 dataset tests * add fix for writing utf8 strings * clean the patch
1 parent 8f8a962 commit 3afccf2

File tree

6 files changed

+93
-18
lines changed

6 files changed

+93
-18
lines changed

.github/workflows/cmake-build.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
# image: ${{ matrix.image }}
3333
# options: '--user=root'
3434
steps:
35-
- uses: actions/checkout@v2
35+
- uses: actions/checkout@v4
3636
- name: Set environment variables
3737
run: |
3838
bash .github/workflows/set_env_vars.sh \
@@ -58,7 +58,7 @@ jobs:
5858
conan lock create conanfile.py ${CONAN_ARGS} --lockfile-packages --lockfile-out base.lock
5959
conan lock create conanfile.py ${CONAN_ARGS} --build missing
6060
- name: cache conan dependencies
61-
uses: actions/cache@v2
61+
uses: actions/cache@v4
6262
with:
6363
path: ~/.conan/data
6464
key: conan-${{ matrix.profile }}-${{ hashfiles('base.lock') }}-${{ hashFiles('conan.lock') }}
@@ -105,7 +105,7 @@ jobs:
105105
]
106106
runs-on: windows-2022
107107
steps:
108-
- uses: actions/checkout@v2
108+
- uses: actions/checkout@v4
109109
- name: Add MSVC to PATH
110110
uses: ilammy/msvc-dev-cmd@v1
111111
- name: Set environment variables
@@ -126,7 +126,7 @@ jobs:
126126
conan lock create conanfile.py ${CONAN_ARGS} --lockfile-packages --lockfile-out base.lock
127127
conan lock create conanfile.py ${CONAN_ARGS} --build missing
128128
- name: cache conan dependencies
129-
uses: actions/cache@v2
129+
uses: actions/cache@v4
130130
with:
131131
path: ~/.conan/data
132132
key: conan-vs2022-${{ hashfiles('base.lock') }}-${{ hashFiles('conan.lock') }}
@@ -173,8 +173,8 @@ jobs:
173173
]
174174
runs-on: macos-13
175175
steps:
176-
- uses: actions/checkout@v2
177-
- uses: actions/setup-python@v2
176+
- uses: actions/checkout@v4
177+
- uses: actions/setup-python@v4
178178
with:
179179
python-version: '3.x'
180180
- name: Set environment variables
@@ -206,7 +206,7 @@ jobs:
206206
conan lock create conanfile.py ${CONAN_ARGS} --lockfile-packages --lockfile-out base.lock
207207
conan lock create conanfile.py ${CONAN_ARGS} --build missing
208208
- name: cache conan dependencies
209-
uses: actions/cache@v2
209+
uses: actions/cache@v4
210210
with:
211211
path: ~/.conan/data
212212
key: conan-apple-clang12-${{ hashfiles('base.lock') }}-${{ hashFiles('conan.lock') }}
@@ -258,11 +258,11 @@ jobs:
258258
mpi,
259259
serial
260260
]
261-
runs-on: ubuntu-20.04
261+
runs-on: ubuntu-24.04
262262
container:
263263
image: debian:bookworm
264264
steps:
265-
- uses: actions/checkout@v2
265+
- uses: actions/checkout@v4
266266
- name: Set environment variables
267267
run: |
268268
bash .github/workflows/set_env_vars.sh \

src/h5cpp/attribute/attribute.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,9 +350,16 @@ void Attribute::write(const T &data,const datatype::Datatype &mem_type) const
350350
template<typename T>
351351
void Attribute::write(const T &data) const
352352
{
353-
hdf5::datatype::DatatypeHolder mem_type_holder;
354-
355-
write(data,mem_type_holder.get<T>());
353+
auto file_type = datatype();
354+
if(file_type.get_class() == datatype::Class::String)
355+
{
356+
write(data,file_type);
357+
}
358+
else
359+
{
360+
hdf5::datatype::DatatypeHolder mem_type_holder;
361+
write(data,mem_type_holder.get<T>());
362+
}
356363
}
357364

358365
template<typename T>

src/h5cpp/node/dataset.hpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -987,17 +987,31 @@ std::uint32_t Dataset::read_chunk(T &data,
987987
template<typename T>
988988
void Dataset::write(const T &data,const property::DatasetTransferList &dtpl)
989989
{
990-
hdf5::datatype::DatatypeHolder mem_type_holder;
991990
hdf5::dataspace::DataspaceHolder mem_space_holder(space_pool);
992-
write_reshape(data, mem_type_holder.get(data), mem_space_holder.get(data), dtpl);
991+
if(file_type_.get_class() == datatype::Class::String)
992+
{
993+
write_reshape(data, file_type_, mem_space_holder.get(data), dtpl);
994+
}
995+
else
996+
{
997+
hdf5::datatype::DatatypeHolder mem_type_holder;
998+
write_reshape(data, mem_type_holder.get(data), mem_space_holder.get(data), dtpl);
999+
}
9931000
}
9941001

9951002
template<typename T>
9961003
void Dataset::write(const T &data,const property::DatasetTransferList &dtpl) const
9971004
{
998-
hdf5::datatype::DatatypeHolder mem_type_holder;
9991005
hdf5::dataspace::DataspaceHolder mem_space_holder;
1000-
write_reshape(data, mem_type_holder.get(data), mem_space_holder.get(data), dtpl);
1006+
if(file_type_.get_class() == datatype::Class::String)
1007+
{
1008+
write_reshape(data, file_type_, mem_space_holder.get(data), dtpl);
1009+
}
1010+
else
1011+
{
1012+
hdf5::datatype::DatatypeHolder mem_type_holder;
1013+
write_reshape(data, mem_type_holder.get(data), mem_space_holder.get(data), dtpl);
1014+
}
10011015
}
10021016

10031017
template<typename T>

test/attribute/attribute_variable_string_io.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,37 @@ SCENARIO("variable string attribute IO") {
4545
auto simple_space = dataspace::Simple{{6}};
4646
auto scalar_space = dataspace::Scalar();
4747
auto string_type = datatype::create<std::string>();
48+
auto utf8_type = datatype::create<std::string>();
49+
utf8_type.encoding(datatype::CharacterEncoding::UTF8);
4850

51+
GIVEN("a utf8 scalar attribute") {
52+
auto space = dataspace::Scalar();
53+
auto attr = root_group.attributes.create("utf8_scalar", utf8_type, space);
54+
AND_GIVEN("a string of arbitrary length") {
55+
std::string write = "hello";
56+
THEN("we can write the string to the attribute") {
57+
REQUIRE_NOTHROW(attr.write(write));
58+
std::string read;
59+
AND_THEN("read the attribute using the default datatype") {
60+
REQUIRE_NOTHROW(attr.read(read));
61+
REQUIRE(write == read);
62+
}
63+
AND_THEN("read the attribute using the attributes datatype") {
64+
REQUIRE_NOTHROW(attr.read(read, attr.datatype()));
65+
REQUIRE(write == read);
66+
}
67+
}
68+
}
69+
THEN("we can write a const char string to the attribute") {
70+
REQUIRE_NOTHROW(attr.write("A short notice"));
71+
AND_THEN("read it again") {
72+
std::string expect = "A short notice";
73+
std::string read;
74+
REQUIRE_NOTHROW(attr.read(read));
75+
REQUIRE_THAT(expect, Catch::Matchers::Equals(read));
76+
}
77+
}
78+
}
4979
GIVEN("a scalar attribute") {
5080
auto space = dataspace::Scalar();
5181
auto attr = root_group.attributes.create("scalar", string_type, space);

test/filter/external_filter_test.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,9 @@ SCENARIO("External filter Blosc LZ4") {
184184
REQUIRE(flags[0] == filter::Availability::Mandatory);
185185
REQUIRE_THAT(filters[0].cd_values(), Equals(params));
186186
REQUIRE(filters[0].id() == static_cast<int>(FILTER_BLOSC));
187-
REQUIRE(filters[0].name() ==
188-
"HDF5 blosc filter; see http://www.hdfgroup.org/services/contributions.html");
187+
if (filters[0].name() != "blosc")
188+
REQUIRE(filters[0].name() ==
189+
"HDF5 blosc filter; see http://www.hdfgroup.org/services/contributions.html");
189190
}
190191
}
191192
}

test/node/dataset_variable_string_io.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,31 @@ SCENARIO("testing variable length string IO") {
3939
auto string_type = hdf5::datatype::create<std::string>();
4040
hdf5::dataspace::Scalar scalar_space;
4141
hdf5::dataspace::Simple simple_space({7});
42+
auto utf8_type = datatype::create<std::string>();
43+
utf8_type.encoding(datatype::CharacterEncoding::UTF8);
4244
hdf5::property::DatasetTransferList dtpl;
4345

46+
GIVEN("a scalar dataset") {
47+
node::Dataset dataset(f.root(), "utf_scalar", utf8_type, scalar_space);
48+
THEN("we can write a single string value to it") {
49+
std::string value = "hello";
50+
dataset.write(value);
51+
AND_THEN("read it back") {
52+
std::string readback;
53+
dataset.read(readback);
54+
REQUIRE(readback == value);
55+
}
56+
}
57+
THEN("we can write a string from a char pointer") {
58+
dataset.write("this is a test");
59+
AND_THEN("read this back") {
60+
std::string readback;
61+
dataset.read(readback);
62+
REQUIRE(readback == "this is a test");
63+
}
64+
}
65+
}
66+
4467
GIVEN("a scalar dataset") {
4568
node::Dataset dataset(f.root(), "scalar", string_type, scalar_space);
4669
THEN("we can write a single string value to it") {

0 commit comments

Comments
 (0)