From 314643e85e828398bd793582d70804eac83a032b Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 20 Mar 2026 10:43:15 +0000 Subject: [PATCH 1/8] Create test file with fixed-size string attributes --- test/create_h5py_test_string_attributes.py | 62 +++++++++++++++++++++ test/h5py_test_string_attributes.h5 | Bin 0 -> 1720 bytes 2 files changed, 62 insertions(+) create mode 100644 test/create_h5py_test_string_attributes.py create mode 100644 test/h5py_test_string_attributes.h5 diff --git a/test/create_h5py_test_string_attributes.py b/test/create_h5py_test_string_attributes.py new file mode 100644 index 000000000..2d3c2c160 --- /dev/null +++ b/test/create_h5py_test_string_attributes.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np + +f = h5py.File("h5py_test_string_attributes.h5", "w") + + +fix_size_nullterm_tid = h5py.h5t.C_S1.copy() +fix_size_nullterm_tid.set_size(4) +fix_size_nullterm_tid.set_strpad(h5py.h5t.STR_NULLTERM) +f.attrs.create( + "fixed_size_string_nullterm_full", "123", dtype=h5py.Datatype(fix_size_nullterm_tid) +) +f.attrs.create( + "fixed_size_string_nullterm_trunc", + "1234", + dtype=h5py.Datatype(fix_size_nullterm_tid), +) +f.attrs.create( + "fixed_size_string_nullterm_part", "1", dtype=h5py.Datatype(fix_size_nullterm_tid) +) +f.attrs.create( + "fixed_size_string_nullterm_empty", "", dtype=h5py.Datatype(fix_size_nullterm_tid) +) + + +fix_size_nullpad_tid = h5py.h5t.C_S1.copy() +fix_size_nullpad_tid.set_size(4) +fix_size_nullpad_tid.set_strpad(h5py.h5t.STR_NULLPAD) +f.attrs.create( + "fixed_size_string_nullpad_full", "1234", dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.attrs.create( + "fixed_size_string_nullpad_trunc", + "12345", + dtype=h5py.Datatype(fix_size_nullpad_tid), +) +f.attrs.create( + "fixed_size_string_nullpad_part", "1", dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.attrs.create( + "fixed_size_string_nullpad_empty", "", dtype=h5py.Datatype(fix_size_nullpad_tid) +) + +fix_size_spacepad_tid = h5py.h5t.C_S1.copy() +fix_size_spacepad_tid.set_size(4) +fix_size_spacepad_tid.set_strpad(h5py.h5t.STR_SPACEPAD) +f.attrs.create( + "fixed_size_string_spacepad_full", + "1234", + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.attrs.create( + "fixed_size_string_spacepad_trunc", + "12345", + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.attrs.create( + "fixed_size_string_spacepad_part", "1", dtype=h5py.Datatype(fix_size_spacepad_tid) +) +f.attrs.create( + "fixed_size_string_spacepad_empty", "", dtype=h5py.Datatype(fix_size_spacepad_tid) +) diff --git a/test/h5py_test_string_attributes.h5 b/test/h5py_test_string_attributes.h5 new file mode 100644 index 0000000000000000000000000000000000000000..aa08db40f2127a9340176b7dd649993e5de9b8d3 GIT binary patch literal 1720 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@pf*ouS5f~pPp8#brLg@}Dy@CnCU}WH90Le)} z#RZ_|D=p zFi(i13of&Js z9!M~uy9YCTaQUmaATc==9zeptbdNoJ2&E^uIjHFgr+E%wC*n@$aPuJP3Edn81qF}+ Sgwqq;9MtrLW**Quuz3JA$ Date: Fri, 20 Mar 2026 11:05:59 +0000 Subject: [PATCH 2/8] Implement tests describing expected behavior --- test/CMakeLists.txt | 1 + test/core/CMakeLists.txt | 3 +- test/core/fixed_length_string_test.cpp | 181 +++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 test/core/fixed_length_string_test.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1ca1f920d..7093bbda8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,6 +19,7 @@ include_directories(${PROJECT_SOURCE_DIR}/src configure_file(h5py_test_data.h5 h5py_test_data.h5 COPYONLY) configure_file(h5py_test_boolattr.h5 h5py_test_boolattr.h5 COPYONLY) +configure_file(h5py_test_string_attributes.h5 h5py_test_string_attributes.h5 COPYONLY) configure_file(pniio_test_boolattr.h5 pniio_test_boolattr.h5 COPYONLY) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt index 110c3d9fa..f1d94b8db 100644 --- a/test/core/CMakeLists.txt +++ b/test/core/CMakeLists.txt @@ -1,5 +1,5 @@ -set(test_sources +set(test_sources ObjectHandleDefault.cpp object_handle_test.cpp file_object_handle_test.cpp @@ -10,6 +10,7 @@ set(test_sources attribute_object_handle_test.cpp property_objects_handle_test.cpp error_objects_handle_test.cpp + fixed_length_string_test.cpp test_environment.cpp iteration_index_test.cpp iteration_order_test.cpp diff --git a/test/core/fixed_length_string_test.cpp b/test/core/fixed_length_string_test.cpp new file mode 100644 index 000000000..971c45383 --- /dev/null +++ b/test/core/fixed_length_string_test.cpp @@ -0,0 +1,181 @@ + +#ifdef H5CPP_CATCH2_V2 +#include +#else +#include +#endif +#include + +using namespace hdf5; + +SCENARIO("testing reading of fixed-size string attributes") +{ + auto h5py_file = file::open("../h5py_test_string_attributes.h5", file::AccessFlags::ReadOnly); + auto root_group = h5py_file.root(); + + SECTION("null-terminated string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_empty"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should be empty (i.e., contain \"\", without trailing \\0)") + { + REQUIRE(value == ""); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_part"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the string '1' (without trailing \\0)") + { + REQUIRE(value == "1"); + } + } + } + SECTION("written string was \"123\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_full"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the entire string '123' (without trailing \\0)") + { + REQUIRE(value == "123"); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_trunc"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the truncated string '123' (without trailing \\0)") + { + REQUIRE(value == "123"); + } + } + } + } + SECTION("null-padded string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_empty"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should be empty (i.e., contain \"\", without any \\0-padding)") + { + REQUIRE(value == ""); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_part"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the string '1' (without any \\0-padding)") + { + REQUIRE(value == "1"); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_full"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the entire string '1234' (without any \\0-padding)") + { + REQUIRE(value == "1234"); + } + } + } + SECTION("written string was \"12345\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_trunc"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the truncated string '1234' (without any \\0-padding)") + { + REQUIRE(value == "1234"); + } + } + } + } + SECTION("space-padded string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_empty"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should be empty (i.e., contain \"\", without any space-padding)") + { + REQUIRE(value == ""); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_part"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the string '1' (without any space-padding)") + { + REQUIRE(value == "1"); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_full"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the entire string '1234' (without any space-padding)") + { + REQUIRE(value == "1234"); + } + } + } + SECTION("written string was \"12345\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_trunc"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the truncated string '1234' (without any space-padding)") + { + REQUIRE(value == "1234"); + } + } + } + } +} \ No newline at end of file From fe32ffaa5a141930bac3fa4ee3829e520fc0b3dc Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 20 Mar 2026 12:54:24 +0000 Subject: [PATCH 3/8] Read only non-padding non-nullterminator chars into std::strings --- src/h5cpp/core/fixed_length_string.hpp | 27 ++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/h5cpp/core/fixed_length_string.hpp b/src/h5cpp/core/fixed_length_string.hpp index de2346c14..7b6b9d833 100644 --- a/src/h5cpp/core/fixed_length_string.hpp +++ b/src/h5cpp/core/fixed_length_string.hpp @@ -25,6 +25,7 @@ // #pragma once +#include #include #include #include @@ -117,10 +118,32 @@ struct FixedLengthStringTrait //! @param buffer reference to the IO buffer //! @return new data instance static DataType from_buffer(const BufferType &buffer, - const datatype::String &, + const datatype::String &memory_type, const dataspace::Dataspace &) { - return DataType(buffer.begin(),buffer.end()); + const auto padding = memory_type.padding(); + auto end_it = buffer.end(); + switch (padding) + { + case datatype::StringPad::NullTerm: + end_it = std::find(buffer.begin(), buffer.end(), '\0'); + break; + case datatype::StringPad::NullPad: + // get iterator to last padding \0 character + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const auto &c) + { return c == '\0'; }) + .base(); + break; + case datatype::StringPad::SpacePad: + // get iterator to last padding space character + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const auto &c) + { return c == ' '; }) + .base(); + break; + default: + break; + } + return DataType(buffer.begin(), end_it); } }; From bd9ae044d646f785a60cc9238541ec66dcde0498 Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 20 Mar 2026 14:29:43 +0000 Subject: [PATCH 4/8] Fix other test depending on old behavior --- test/attribute/attribute_fixed_string_io.cpp | 53 ++++++++----------- test/node/dataset_fixed_string_io.cpp | 4 +- test/node/dataset_h5py_string_compat_test.cpp | 2 +- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/test/attribute/attribute_fixed_string_io.cpp b/test/attribute/attribute_fixed_string_io.cpp index 5a6655d54..7bddc9159 100644 --- a/test/attribute/attribute_fixed_string_io.cpp +++ b/test/attribute/attribute_fixed_string_io.cpp @@ -66,9 +66,6 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { THEN("we can read the data using this type") { std::string read; REQUIRE_NOTHROW(a.read(read, read_type)); - // need to remove trailing \0 from the string - read.erase(std::remove(read.begin(), read.end(), '\0'), read.end()); - REQUIRE(write == read); } } @@ -77,20 +74,17 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { AND_GIVEN("an instance of a smaller string") { std::string write = "hell"; - THEN("we have to resize the string to the appropriate size") { - write.resize(5); - AND_THEN("we can write it to the attribute") { - REQUIRE_NOTHROW(a.write(write, string_type)); - AND_THEN("read it with the attribte datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read, a.datatype())); - REQUIRE_THAT(write, Catch::Matchers::Equals(read)); - } - AND_THEN("read it with the default datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read)); - REQUIRE_THAT(write, Catch::Matchers::Equals(read)); - } + AND_THEN("we can write it to the attribute") { + REQUIRE_NOTHROW(a.write(write, string_type)); + AND_THEN("read it with the attribte datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read, a.datatype())); + REQUIRE_THAT(write, Catch::Matchers::Equals(read)); + } + AND_THEN("read it with the default datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read)); + REQUIRE_THAT(write, Catch::Matchers::Equals(read)); } } } @@ -117,20 +111,17 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { dataspace::Simple({1})); AND_GIVEN("a string of size 4") { std::string write = "hell"; - THEN("we have to resize the string") { - write.resize(5); - AND_THEN("write it to the attribute") { - REQUIRE_NOTHROW(a.write(write, string_type)); - AND_THEN("we can read it back using the attribute datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read, a.datatype())); - REQUIRE(write == read); - } - AND_THEN("read the data with the default datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read)); - REQUIRE(write == read); - } + AND_THEN("write it to the attribute") { + REQUIRE_NOTHROW(a.write(write, string_type)); + AND_THEN("we can read it back using the attribute datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read, a.datatype())); + REQUIRE(write == read); + } + AND_THEN("read the data with the default datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read)); + REQUIRE(write == read); } } } diff --git a/test/node/dataset_fixed_string_io.cpp b/test/node/dataset_fixed_string_io.cpp index 0b5a603bd..ab498c651 100644 --- a/test/node/dataset_fixed_string_io.cpp +++ b/test/node/dataset_fixed_string_io.cpp @@ -88,12 +88,12 @@ SCENARIO("testing fixed string IO") { THEN("we can read the first element with a hyperslab") { dataset.read(buffer, read_type, scalar_space, dataspace::Hyperslab{{0, 0}, {1, 1}}); - REQUIRE(buffer == "AAAAA "); + REQUIRE(buffer == "AAAAA"); } THEN("we can read the last element with a hyperslab") { dataset.read(buffer, read_type, scalar_space, dataspace::Hyperslab{{1, 2}, {1, 1}}); - REQUIRE(buffer == "FFFFF "); + REQUIRE(buffer == "FFFFF"); } } GIVEN("a 6 character fixed length string type") { diff --git a/test/node/dataset_h5py_string_compat_test.cpp b/test/node/dataset_h5py_string_compat_test.cpp index 0bbc17923..ccb61d34d 100644 --- a/test/node/dataset_h5py_string_compat_test.cpp +++ b/test/node/dataset_h5py_string_compat_test.cpp @@ -46,7 +46,7 @@ SCENARIO("testing h5py compatible string IO") { THEN("we can read the content from the dataset to a buffer") { std::string buffer; dataset.read(buffer, memory_type, memory_space, dataset.dataspace()); - REQUIRE(buffer == "hello from h5py "); + REQUIRE(buffer == "hello from h5py"); } } } From 8d4749493cf22e21d14cd31112f3a5a02c14d04a Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 20 Mar 2026 14:40:29 +0000 Subject: [PATCH 5/8] Make it c++11 compatible again --- src/h5cpp/core/fixed_length_string.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/h5cpp/core/fixed_length_string.hpp b/src/h5cpp/core/fixed_length_string.hpp index 7b6b9d833..6ceb85df3 100644 --- a/src/h5cpp/core/fixed_length_string.hpp +++ b/src/h5cpp/core/fixed_length_string.hpp @@ -130,13 +130,13 @@ struct FixedLengthStringTrait break; case datatype::StringPad::NullPad: // get iterator to last padding \0 character - end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const auto &c) + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const BufferType::value_type &c) { return c == '\0'; }) .base(); break; case datatype::StringPad::SpacePad: // get iterator to last padding space character - end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const auto &c) + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const BufferType::value_type &c) { return c == ' '; }) .base(); break; From dc61ff688f3020e7331445b85b428e7b7793938f Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 27 Mar 2026 13:58:28 +0000 Subject: [PATCH 6/8] Introduce function to switch on or off trimming --- src/h5cpp/attribute/attribute.cpp | 46 +++++++++++++++++ src/h5cpp/attribute/attribute.hpp | 33 +++++++++++- src/h5cpp/core/fixed_length_string.hpp | 15 +++++- test/core/fixed_length_string_test.cpp | 69 +++++++++++++++++++++----- 4 files changed, 148 insertions(+), 15 deletions(-) diff --git a/src/h5cpp/attribute/attribute.cpp b/src/h5cpp/attribute/attribute.cpp index 6987ba984..810a4c221 100644 --- a/src/h5cpp/attribute/attribute.cpp +++ b/src/h5cpp/attribute/attribute.cpp @@ -128,6 +128,52 @@ void Attribute::close() } +// special overload for reading attributes into a std::string +void Attribute::read(std::string &data, bool trim) const +{ + auto file_type = datatype(); + if (file_type.get_class() == datatype::Class::String) + { + read(data, file_type, trim); + } + else + { + hdf5::datatype::DatatypeHolder mem_type_holder; + read(data, mem_type_holder.get(), file_type, trim); + } +} + + +void Attribute::read(std::string &data, const datatype::Datatype &mem_type, bool trim) const +{ + datatype::Datatype file_type = datatype(); + read(data, mem_type, file_type, trim); +} + + +void Attribute::read(std::string &data, const datatype::Datatype &mem_type, const datatype::Datatype &file_type, bool trim) const +{ + check_size(dataspace::create(data), dataspace(), "read"); + + if (file_type.get_class() == datatype::Class::String) + { + datatype::String string_type(file_type); + + if (string_type.is_variable_length()) + { + read_variable_length_string(data, mem_type); + } + else + { + read_fixed_length_string(data, mem_type, trim); + } + } + else + { + read_contiguous_data(data, mem_type); + } +} + } // namespace attribute } // namespace hdf5 diff --git a/src/h5cpp/attribute/attribute.hpp b/src/h5cpp/attribute/attribute.hpp index 1b5a5e92b..980d81384 100644 --- a/src/h5cpp/attribute/attribute.hpp +++ b/src/h5cpp/attribute/attribute.hpp @@ -202,6 +202,10 @@ class DLL_EXPORT Attribute template void read(T &data,const datatype::Datatype &mem_type) const; + void read(std::string &data, bool trim = false) const; + + void read(std::string &data, const datatype::Datatype &mem_type, bool trim = false) const; + private: node::Link parent_link_; ObjectHandle handle_; @@ -209,6 +213,8 @@ class DLL_EXPORT Attribute template void read(T &data,const datatype::Datatype &mem_type, const datatype::Datatype &file_type) const; + void read(std::string &data, const datatype::Datatype &mem_type, const datatype::Datatype &file_type, bool trim = false) const; + template void write_fixed_length_string(const T &data, const datatype::Datatype &mem_type) const @@ -273,7 +279,32 @@ class DLL_EXPORT Attribute } - template + void read_fixed_length_string(std::string &data, + const datatype::Datatype &mem_type, bool trim = false) const + { + using Trait = FixedLengthStringTrait; + using SpaceTrait = hdf5::dataspace::TypeTrait; + + auto buffer = Trait::BufferType::create(mem_type, SpaceTrait::create(data)); + + if (H5Aread(static_cast(handle_), + static_cast(mem_type), + buffer.data()) < 0) + { + error::Singleton::instance().throw_with_stack("Failure to read data from attribute!"); + } + + if (trim) + { + data = Trait::from_buffer_trimmed(buffer, mem_type, SpaceTrait::create(data)); + } + else + { + data = Trait::from_buffer(buffer, mem_type, SpaceTrait::create(data)); + } + } + + template void read_variable_length_string(T &data, const datatype::Datatype &mem_type) const { diff --git a/src/h5cpp/core/fixed_length_string.hpp b/src/h5cpp/core/fixed_length_string.hpp index 6ceb85df3..1633bd9ad 100644 --- a/src/h5cpp/core/fixed_length_string.hpp +++ b/src/h5cpp/core/fixed_length_string.hpp @@ -118,8 +118,21 @@ struct FixedLengthStringTrait //! @param buffer reference to the IO buffer //! @return new data instance static DataType from_buffer(const BufferType &buffer, - const datatype::String &memory_type, + const datatype::String &, const dataspace::Dataspace &) + { + return DataType(buffer.begin(), buffer.end()); + } + + //! + //! @brief store data from buffer in target memory and trim any tailing null-terminating characters + //! + //! @param buffer reference to the IO buffer + //! @param memory_type the datatype describing how the string is stored in the file + //! @return new data instance + static DataType from_buffer_trimmed(const BufferType &buffer, + const datatype::String &memory_type, + const dataspace::Dataspace &) { const auto padding = memory_type.padding(); auto end_it = buffer.end(); diff --git a/test/core/fixed_length_string_test.cpp b/test/core/fixed_length_string_test.cpp index 971c45383..d473a2144 100644 --- a/test/core/fixed_length_string_test.cpp +++ b/test/core/fixed_length_string_test.cpp @@ -18,10 +18,20 @@ SCENARIO("testing reading of fixed-size string attributes") SECTION("written string was empty") { auto attribute = root_group.attributes["fixed_size_string_nullterm_empty"]; - WHEN("reading back the value into a std::string") + WHEN("reading back the value into a std::string with trimming disabled") { std::string value; attribute.read(value); + THEN("the std::string should be of length 4 and start with \\0") + { + REQUIRE(value.size() == 4); + REQUIRE(value[0] == '\0'); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); THEN("the std::string should be empty (i.e., contain \"\", without trailing \\0)") { REQUIRE(value == ""); @@ -31,11 +41,22 @@ SCENARIO("testing reading of fixed-size string attributes") SECTION("written string was \"1\"") { auto attribute = root_group.attributes["fixed_size_string_nullterm_part"]; - WHEN("reading back the value into a std::string") + WHEN("reading back the value into a std::string with trimming disabled") { std::string value; attribute.read(value); - THEN("the std::string should contain the string '1' (without trailing \\0)") + THEN("the std::string should be of length 4 and start with \"1\\0\"") + { + REQUIRE(value.size() == 4); + REQUIRE(value[0] == '1'); + REQUIRE(value[1] == '\0'); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should contain the string \"1\" (without trailing \\0)") { REQUIRE(value == "1"); } @@ -44,10 +65,20 @@ SCENARIO("testing reading of fixed-size string attributes") SECTION("written string was \"123\"") { auto attribute = root_group.attributes["fixed_size_string_nullterm_full"]; - WHEN("reading back the value into a std::string") + WHEN("reading back the value into a std::string with trimming disabled") { std::string value; attribute.read(value); + THEN("the std::string should of length 4 and contain \"123\0\"") + { + std::string expected("123\0", 4); + REQUIRE(value == expected); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); THEN("the std::string should contain the entire string '123' (without trailing \\0)") { REQUIRE(value == "123"); @@ -57,11 +88,21 @@ SCENARIO("testing reading of fixed-size string attributes") SECTION("written string was \"1234\"") { auto attribute = root_group.attributes["fixed_size_string_nullterm_trunc"]; - WHEN("reading back the value into a std::string") + WHEN("reading back the value into a std::string with trimming disabled") { std::string value; attribute.read(value); - THEN("the std::string should contain the truncated string '123' (without trailing \\0)") + THEN("the std::string should be of length 4 and contain the truncated string \"123\0\"") + { + std::string expected("123\0",4); + REQUIRE(value == expected); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should be of length 4 and contain the truncated string '123' (without trailing \\0)") { REQUIRE(value == "123"); } @@ -77,9 +118,10 @@ SCENARIO("testing reading of fixed-size string attributes") { std::string value; attribute.read(value); - THEN("the std::string should be empty (i.e., contain \"\", without any \\0-padding)") + THEN("the std::string should consist only \\0 characters") { - REQUIRE(value == ""); + std::string expected("\0\0\0\0", 4); + REQUIRE(value == expected); } } } @@ -92,7 +134,8 @@ SCENARIO("testing reading of fixed-size string attributes") attribute.read(value); THEN("the std::string should contain the string '1' (without any \\0-padding)") { - REQUIRE(value == "1"); + std::string expected("1\0\0\0", 4); + REQUIRE(value == expected); } } } @@ -132,9 +175,9 @@ SCENARIO("testing reading of fixed-size string attributes") { std::string value; attribute.read(value); - THEN("the std::string should be empty (i.e., contain \"\", without any space-padding)") + THEN("the std::string should contain only spaces") { - REQUIRE(value == ""); + REQUIRE(value == " "); } } } @@ -145,9 +188,9 @@ SCENARIO("testing reading of fixed-size string attributes") { std::string value; attribute.read(value); - THEN("the std::string should contain the string '1' (without any space-padding)") + THEN("the std::string should contain the string '1 ' ('1' with three padding spaces)") { - REQUIRE(value == "1"); + REQUIRE(value == "1 "); } } } From 187d480aa92857a6a8603af618768f292f288060 Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 27 Mar 2026 13:58:37 +0000 Subject: [PATCH 7/8] Revert "Fix other test depending on old behavior" This reverts commit bd9ae044d646f785a60cc9238541ec66dcde0498. --- test/attribute/attribute_fixed_string_io.cpp | 53 +++++++++++-------- test/node/dataset_fixed_string_io.cpp | 4 +- test/node/dataset_h5py_string_compat_test.cpp | 2 +- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/test/attribute/attribute_fixed_string_io.cpp b/test/attribute/attribute_fixed_string_io.cpp index 7bddc9159..5a6655d54 100644 --- a/test/attribute/attribute_fixed_string_io.cpp +++ b/test/attribute/attribute_fixed_string_io.cpp @@ -66,6 +66,9 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { THEN("we can read the data using this type") { std::string read; REQUIRE_NOTHROW(a.read(read, read_type)); + // need to remove trailing \0 from the string + read.erase(std::remove(read.begin(), read.end(), '\0'), read.end()); + REQUIRE(write == read); } } @@ -74,17 +77,20 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { AND_GIVEN("an instance of a smaller string") { std::string write = "hell"; - AND_THEN("we can write it to the attribute") { - REQUIRE_NOTHROW(a.write(write, string_type)); - AND_THEN("read it with the attribte datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read, a.datatype())); - REQUIRE_THAT(write, Catch::Matchers::Equals(read)); - } - AND_THEN("read it with the default datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read)); - REQUIRE_THAT(write, Catch::Matchers::Equals(read)); + THEN("we have to resize the string to the appropriate size") { + write.resize(5); + AND_THEN("we can write it to the attribute") { + REQUIRE_NOTHROW(a.write(write, string_type)); + AND_THEN("read it with the attribte datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read, a.datatype())); + REQUIRE_THAT(write, Catch::Matchers::Equals(read)); + } + AND_THEN("read it with the default datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read)); + REQUIRE_THAT(write, Catch::Matchers::Equals(read)); + } } } } @@ -111,17 +117,20 @@ SCENARIO("Working with attributes and fixed UTF8 strings") { dataspace::Simple({1})); AND_GIVEN("a string of size 4") { std::string write = "hell"; - AND_THEN("write it to the attribute") { - REQUIRE_NOTHROW(a.write(write, string_type)); - AND_THEN("we can read it back using the attribute datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read, a.datatype())); - REQUIRE(write == read); - } - AND_THEN("read the data with the default datatype") { - std::string read; - REQUIRE_NOTHROW(a.read(read)); - REQUIRE(write == read); + THEN("we have to resize the string") { + write.resize(5); + AND_THEN("write it to the attribute") { + REQUIRE_NOTHROW(a.write(write, string_type)); + AND_THEN("we can read it back using the attribute datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read, a.datatype())); + REQUIRE(write == read); + } + AND_THEN("read the data with the default datatype") { + std::string read; + REQUIRE_NOTHROW(a.read(read)); + REQUIRE(write == read); + } } } } diff --git a/test/node/dataset_fixed_string_io.cpp b/test/node/dataset_fixed_string_io.cpp index ab498c651..0b5a603bd 100644 --- a/test/node/dataset_fixed_string_io.cpp +++ b/test/node/dataset_fixed_string_io.cpp @@ -88,12 +88,12 @@ SCENARIO("testing fixed string IO") { THEN("we can read the first element with a hyperslab") { dataset.read(buffer, read_type, scalar_space, dataspace::Hyperslab{{0, 0}, {1, 1}}); - REQUIRE(buffer == "AAAAA"); + REQUIRE(buffer == "AAAAA "); } THEN("we can read the last element with a hyperslab") { dataset.read(buffer, read_type, scalar_space, dataspace::Hyperslab{{1, 2}, {1, 1}}); - REQUIRE(buffer == "FFFFF"); + REQUIRE(buffer == "FFFFF "); } } GIVEN("a 6 character fixed length string type") { diff --git a/test/node/dataset_h5py_string_compat_test.cpp b/test/node/dataset_h5py_string_compat_test.cpp index ccb61d34d..0bbc17923 100644 --- a/test/node/dataset_h5py_string_compat_test.cpp +++ b/test/node/dataset_h5py_string_compat_test.cpp @@ -46,7 +46,7 @@ SCENARIO("testing h5py compatible string IO") { THEN("we can read the content from the dataset to a buffer") { std::string buffer; dataset.read(buffer, memory_type, memory_space, dataset.dataspace()); - REQUIRE(buffer == "hello from h5py"); + REQUIRE(buffer == "hello from h5py "); } } } From f16bb357ecf21add4652af63b523e859fc18a8e8 Mon Sep 17 00:00:00 2001 From: FlyingSamson Date: Fri, 27 Mar 2026 14:03:35 +0000 Subject: [PATCH 8/8] Prepare test file with datasets of different fixed size string types --- ...ate_h5py_test_fixed_size_string_dataset.py | 92 ++++++++++++++++++ test/h5py_test_fixed_size_string_datasets.h5 | Bin 0 -> 7576 bytes 2 files changed, 92 insertions(+) create mode 100644 test/create_h5py_test_fixed_size_string_dataset.py create mode 100644 test/h5py_test_fixed_size_string_datasets.h5 diff --git a/test/create_h5py_test_fixed_size_string_dataset.py b/test/create_h5py_test_fixed_size_string_dataset.py new file mode 100644 index 000000000..c24bd0bfc --- /dev/null +++ b/test/create_h5py_test_fixed_size_string_dataset.py @@ -0,0 +1,92 @@ +import h5py +import numpy + +f = h5py.File("h5py_test_fixed_size_string_datasets.h5","w") + +fix_size_nullterm_tid = h5py.h5t.C_S1.copy() +fix_size_nullterm_tid.set_size(4) +fix_size_nullterm_tid.set_strpad(h5py.h5t.STR_NULLTERM) + +f.create_dataset( + "fixed_size_string_nullterm_full", + shape=(1,), + data=["123"], + dtype=h5py.Datatype(fix_size_nullterm_tid)) + +f.create_dataset( + "fixed_size_string_nullterm_trunc", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_nullterm_tid), +) + +f.create_dataset( + "fixed_size_string_nullterm_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_nullterm_tid) +) + +f.create_dataset( + "fixed_size_string_nullterm_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_nullterm_tid) +) + + +fix_size_nullpad_tid = h5py.h5t.C_S1.copy() +fix_size_nullpad_tid.set_size(4) +fix_size_nullpad_tid.set_strpad(h5py.h5t.STR_NULLPAD) +f.create_dataset( + "fixed_size_string_nullpad_full", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.create_dataset( + "fixed_size_string_nullpad_trunc", + shape=(1,), + data=["12345"], + dtype=h5py.Datatype(fix_size_nullpad_tid), +) +f.create_dataset( + "fixed_size_string_nullpad_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.create_dataset( + "fixed_size_string_nullpad_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) + +fix_size_spacepad_tid = h5py.h5t.C_S1.copy() +fix_size_spacepad_tid.set_size(4) +fix_size_spacepad_tid.set_strpad(h5py.h5t.STR_SPACEPAD) +f.create_dataset( + "fixed_size_string_spacepad_full", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.create_dataset( + "fixed_size_string_spacepad_trunc", + shape=(1,), + data=["12345"], + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.create_dataset( + "fixed_size_string_spacepad_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_spacepad_tid) +) +f.create_dataset( + "fixed_size_string_spacepad_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_spacepad_tid) +) diff --git a/test/h5py_test_fixed_size_string_datasets.h5 b/test/h5py_test_fixed_size_string_datasets.h5 new file mode 100644 index 0000000000000000000000000000000000000000..cb9c005a27cd9b630f7fd8006e58089e47c6c9d8 GIT binary patch literal 7576 zcmeHL%}&BV5S|t=4bcP?6^SMsJaF)UKL?MRfZ?P>y%z$-kN|0*Cg{l{z5$QGBXIQS zBk(BRbh|r?S!x$UQetJ7bZ2&Eb~^hp-FD}-RJhuRXX4;C!+ypeAfd~3J(E~w8BHcV}G^qn0v0zh7u1}KQq9l??!+` z5*Z(Yi-c^%#{k8&IO_6Z`Q9@rhmwBGqY?x!SkN4-31ZJ6jdqnCk4g|!(C&Rt5PJr^ zom2W8~^|S literal 0 HcmV?d00001