diff --git a/src/h5cpp/attribute/attribute.cpp b/src/h5cpp/attribute/attribute.cpp index 6987ba984e..810a4c2217 100644 --- a/src/h5cpp/attribute/attribute.cpp +++ b/src/h5cpp/attribute/attribute.cpp @@ -128,6 +128,52 @@ void Attribute::close() } +// special overload for reading attributes into a std::string +void Attribute::read(std::string &data, bool trim) const +{ + auto file_type = datatype(); + if (file_type.get_class() == datatype::Class::String) + { + read(data, file_type, trim); + } + else + { + hdf5::datatype::DatatypeHolder mem_type_holder; + read(data, mem_type_holder.get(), file_type, trim); + } +} + + +void Attribute::read(std::string &data, const datatype::Datatype &mem_type, bool trim) const +{ + datatype::Datatype file_type = datatype(); + read(data, mem_type, file_type, trim); +} + + +void Attribute::read(std::string &data, const datatype::Datatype &mem_type, const datatype::Datatype &file_type, bool trim) const +{ + check_size(dataspace::create(data), dataspace(), "read"); + + if (file_type.get_class() == datatype::Class::String) + { + datatype::String string_type(file_type); + + if (string_type.is_variable_length()) + { + read_variable_length_string(data, mem_type); + } + else + { + read_fixed_length_string(data, mem_type, trim); + } + } + else + { + read_contiguous_data(data, mem_type); + } +} + } // namespace attribute } // namespace hdf5 diff --git a/src/h5cpp/attribute/attribute.hpp b/src/h5cpp/attribute/attribute.hpp index 1b5a5e92b3..980d81384f 100644 --- a/src/h5cpp/attribute/attribute.hpp +++ b/src/h5cpp/attribute/attribute.hpp @@ -202,6 +202,10 @@ class DLL_EXPORT Attribute template void read(T &data,const datatype::Datatype &mem_type) const; + void read(std::string &data, bool trim = false) const; + + void read(std::string &data, const datatype::Datatype &mem_type, bool trim = false) const; + private: node::Link parent_link_; ObjectHandle handle_; @@ -209,6 +213,8 @@ class DLL_EXPORT Attribute template void read(T &data,const datatype::Datatype &mem_type, const datatype::Datatype &file_type) const; + void read(std::string &data, const datatype::Datatype &mem_type, const datatype::Datatype &file_type, bool trim = false) const; + template void write_fixed_length_string(const T &data, const datatype::Datatype &mem_type) const @@ -273,7 +279,32 @@ class DLL_EXPORT Attribute } - template + void read_fixed_length_string(std::string &data, + const datatype::Datatype &mem_type, bool trim = false) const + { + using Trait = FixedLengthStringTrait; + using SpaceTrait = hdf5::dataspace::TypeTrait; + + auto buffer = Trait::BufferType::create(mem_type, SpaceTrait::create(data)); + + if (H5Aread(static_cast(handle_), + static_cast(mem_type), + buffer.data()) < 0) + { + error::Singleton::instance().throw_with_stack("Failure to read data from attribute!"); + } + + if (trim) + { + data = Trait::from_buffer_trimmed(buffer, mem_type, SpaceTrait::create(data)); + } + else + { + data = Trait::from_buffer(buffer, mem_type, SpaceTrait::create(data)); + } + } + + template void read_variable_length_string(T &data, const datatype::Datatype &mem_type) const { diff --git a/src/h5cpp/core/fixed_length_string.hpp b/src/h5cpp/core/fixed_length_string.hpp index de2346c149..1633bd9adb 100644 --- a/src/h5cpp/core/fixed_length_string.hpp +++ b/src/h5cpp/core/fixed_length_string.hpp @@ -25,6 +25,7 @@ // #pragma once +#include #include #include #include @@ -120,7 +121,42 @@ struct FixedLengthStringTrait const datatype::String &, const dataspace::Dataspace &) { - return DataType(buffer.begin(),buffer.end()); + return DataType(buffer.begin(), buffer.end()); + } + + //! + //! @brief store data from buffer in target memory and trim any tailing null-terminating characters + //! + //! @param buffer reference to the IO buffer + //! @param memory_type the datatype describing how the string is stored in the file + //! @return new data instance + static DataType from_buffer_trimmed(const BufferType &buffer, + const datatype::String &memory_type, + const dataspace::Dataspace &) + { + const auto padding = memory_type.padding(); + auto end_it = buffer.end(); + switch (padding) + { + case datatype::StringPad::NullTerm: + end_it = std::find(buffer.begin(), buffer.end(), '\0'); + break; + case datatype::StringPad::NullPad: + // get iterator to last padding \0 character + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const BufferType::value_type &c) + { return c == '\0'; }) + .base(); + break; + case datatype::StringPad::SpacePad: + // get iterator to last padding space character + end_it = std::find_if_not(buffer.rbegin(), buffer.rend(), [](const BufferType::value_type &c) + { return c == ' '; }) + .base(); + break; + default: + break; + } + return DataType(buffer.begin(), end_it); } }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1ca1f920d6..7093bbda80 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,6 +19,7 @@ include_directories(${PROJECT_SOURCE_DIR}/src configure_file(h5py_test_data.h5 h5py_test_data.h5 COPYONLY) configure_file(h5py_test_boolattr.h5 h5py_test_boolattr.h5 COPYONLY) +configure_file(h5py_test_string_attributes.h5 h5py_test_string_attributes.h5 COPYONLY) configure_file(pniio_test_boolattr.h5 pniio_test_boolattr.h5 COPYONLY) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt index 110c3d9fa6..f1d94b8db3 100644 --- a/test/core/CMakeLists.txt +++ b/test/core/CMakeLists.txt @@ -1,5 +1,5 @@ -set(test_sources +set(test_sources ObjectHandleDefault.cpp object_handle_test.cpp file_object_handle_test.cpp @@ -10,6 +10,7 @@ set(test_sources attribute_object_handle_test.cpp property_objects_handle_test.cpp error_objects_handle_test.cpp + fixed_length_string_test.cpp test_environment.cpp iteration_index_test.cpp iteration_order_test.cpp diff --git a/test/core/fixed_length_string_test.cpp b/test/core/fixed_length_string_test.cpp new file mode 100644 index 0000000000..d473a21441 --- /dev/null +++ b/test/core/fixed_length_string_test.cpp @@ -0,0 +1,224 @@ + +#ifdef H5CPP_CATCH2_V2 +#include +#else +#include +#endif +#include + +using namespace hdf5; + +SCENARIO("testing reading of fixed-size string attributes") +{ + auto h5py_file = file::open("../h5py_test_string_attributes.h5", file::AccessFlags::ReadOnly); + auto root_group = h5py_file.root(); + + SECTION("null-terminated string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_empty"]; + WHEN("reading back the value into a std::string with trimming disabled") + { + std::string value; + attribute.read(value); + THEN("the std::string should be of length 4 and start with \\0") + { + REQUIRE(value.size() == 4); + REQUIRE(value[0] == '\0'); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should be empty (i.e., contain \"\", without trailing \\0)") + { + REQUIRE(value == ""); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_part"]; + WHEN("reading back the value into a std::string with trimming disabled") + { + std::string value; + attribute.read(value); + THEN("the std::string should be of length 4 and start with \"1\\0\"") + { + REQUIRE(value.size() == 4); + REQUIRE(value[0] == '1'); + REQUIRE(value[1] == '\0'); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should contain the string \"1\" (without trailing \\0)") + { + REQUIRE(value == "1"); + } + } + } + SECTION("written string was \"123\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_full"]; + WHEN("reading back the value into a std::string with trimming disabled") + { + std::string value; + attribute.read(value); + THEN("the std::string should of length 4 and contain \"123\0\"") + { + std::string expected("123\0", 4); + REQUIRE(value == expected); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should contain the entire string '123' (without trailing \\0)") + { + REQUIRE(value == "123"); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullterm_trunc"]; + WHEN("reading back the value into a std::string with trimming disabled") + { + std::string value; + attribute.read(value); + THEN("the std::string should be of length 4 and contain the truncated string \"123\0\"") + { + std::string expected("123\0",4); + REQUIRE(value == expected); + } + } + WHEN("reading back the value into a std::string with trimming enabled") + { + std::string value; + attribute.read(value, true); + THEN("the std::string should be of length 4 and contain the truncated string '123' (without trailing \\0)") + { + REQUIRE(value == "123"); + } + } + } + } + SECTION("null-padded string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_empty"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should consist only \\0 characters") + { + std::string expected("\0\0\0\0", 4); + REQUIRE(value == expected); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_part"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the string '1' (without any \\0-padding)") + { + std::string expected("1\0\0\0", 4); + REQUIRE(value == expected); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_full"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the entire string '1234' (without any \\0-padding)") + { + REQUIRE(value == "1234"); + } + } + } + SECTION("written string was \"12345\"") + { + auto attribute = root_group.attributes["fixed_size_string_nullpad_trunc"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the truncated string '1234' (without any \\0-padding)") + { + REQUIRE(value == "1234"); + } + } + } + } + SECTION("space-padded string attribute of size 4") + { + SECTION("written string was empty") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_empty"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain only spaces") + { + REQUIRE(value == " "); + } + } + } + SECTION("written string was \"1\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_part"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the string '1 ' ('1' with three padding spaces)") + { + REQUIRE(value == "1 "); + } + } + } + SECTION("written string was \"1234\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_full"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the entire string '1234' (without any space-padding)") + { + REQUIRE(value == "1234"); + } + } + } + SECTION("written string was \"12345\"") + { + auto attribute = root_group.attributes["fixed_size_string_spacepad_trunc"]; + WHEN("reading back the value into a std::string") + { + std::string value; + attribute.read(value); + THEN("the std::string should contain the truncated string '1234' (without any space-padding)") + { + REQUIRE(value == "1234"); + } + } + } + } +} \ No newline at end of file diff --git a/test/create_h5py_test_fixed_size_string_dataset.py b/test/create_h5py_test_fixed_size_string_dataset.py new file mode 100644 index 0000000000..c24bd0bfc2 --- /dev/null +++ b/test/create_h5py_test_fixed_size_string_dataset.py @@ -0,0 +1,92 @@ +import h5py +import numpy + +f = h5py.File("h5py_test_fixed_size_string_datasets.h5","w") + +fix_size_nullterm_tid = h5py.h5t.C_S1.copy() +fix_size_nullterm_tid.set_size(4) +fix_size_nullterm_tid.set_strpad(h5py.h5t.STR_NULLTERM) + +f.create_dataset( + "fixed_size_string_nullterm_full", + shape=(1,), + data=["123"], + dtype=h5py.Datatype(fix_size_nullterm_tid)) + +f.create_dataset( + "fixed_size_string_nullterm_trunc", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_nullterm_tid), +) + +f.create_dataset( + "fixed_size_string_nullterm_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_nullterm_tid) +) + +f.create_dataset( + "fixed_size_string_nullterm_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_nullterm_tid) +) + + +fix_size_nullpad_tid = h5py.h5t.C_S1.copy() +fix_size_nullpad_tid.set_size(4) +fix_size_nullpad_tid.set_strpad(h5py.h5t.STR_NULLPAD) +f.create_dataset( + "fixed_size_string_nullpad_full", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.create_dataset( + "fixed_size_string_nullpad_trunc", + shape=(1,), + data=["12345"], + dtype=h5py.Datatype(fix_size_nullpad_tid), +) +f.create_dataset( + "fixed_size_string_nullpad_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.create_dataset( + "fixed_size_string_nullpad_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_nullpad_tid) +) + +fix_size_spacepad_tid = h5py.h5t.C_S1.copy() +fix_size_spacepad_tid.set_size(4) +fix_size_spacepad_tid.set_strpad(h5py.h5t.STR_SPACEPAD) +f.create_dataset( + "fixed_size_string_spacepad_full", + shape=(1,), + data=["1234"], + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.create_dataset( + "fixed_size_string_spacepad_trunc", + shape=(1,), + data=["12345"], + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.create_dataset( + "fixed_size_string_spacepad_part", + shape=(1,), + data=["1"], + dtype=h5py.Datatype(fix_size_spacepad_tid) +) +f.create_dataset( + "fixed_size_string_spacepad_empty", + shape=(1,), + data=[""], + dtype=h5py.Datatype(fix_size_spacepad_tid) +) diff --git a/test/create_h5py_test_string_attributes.py b/test/create_h5py_test_string_attributes.py new file mode 100644 index 0000000000..2d3c2c1601 --- /dev/null +++ b/test/create_h5py_test_string_attributes.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np + +f = h5py.File("h5py_test_string_attributes.h5", "w") + + +fix_size_nullterm_tid = h5py.h5t.C_S1.copy() +fix_size_nullterm_tid.set_size(4) +fix_size_nullterm_tid.set_strpad(h5py.h5t.STR_NULLTERM) +f.attrs.create( + "fixed_size_string_nullterm_full", "123", dtype=h5py.Datatype(fix_size_nullterm_tid) +) +f.attrs.create( + "fixed_size_string_nullterm_trunc", + "1234", + dtype=h5py.Datatype(fix_size_nullterm_tid), +) +f.attrs.create( + "fixed_size_string_nullterm_part", "1", dtype=h5py.Datatype(fix_size_nullterm_tid) +) +f.attrs.create( + "fixed_size_string_nullterm_empty", "", dtype=h5py.Datatype(fix_size_nullterm_tid) +) + + +fix_size_nullpad_tid = h5py.h5t.C_S1.copy() +fix_size_nullpad_tid.set_size(4) +fix_size_nullpad_tid.set_strpad(h5py.h5t.STR_NULLPAD) +f.attrs.create( + "fixed_size_string_nullpad_full", "1234", dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.attrs.create( + "fixed_size_string_nullpad_trunc", + "12345", + dtype=h5py.Datatype(fix_size_nullpad_tid), +) +f.attrs.create( + "fixed_size_string_nullpad_part", "1", dtype=h5py.Datatype(fix_size_nullpad_tid) +) +f.attrs.create( + "fixed_size_string_nullpad_empty", "", dtype=h5py.Datatype(fix_size_nullpad_tid) +) + +fix_size_spacepad_tid = h5py.h5t.C_S1.copy() +fix_size_spacepad_tid.set_size(4) +fix_size_spacepad_tid.set_strpad(h5py.h5t.STR_SPACEPAD) +f.attrs.create( + "fixed_size_string_spacepad_full", + "1234", + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.attrs.create( + "fixed_size_string_spacepad_trunc", + "12345", + dtype=h5py.Datatype(fix_size_spacepad_tid), +) +f.attrs.create( + "fixed_size_string_spacepad_part", "1", dtype=h5py.Datatype(fix_size_spacepad_tid) +) +f.attrs.create( + "fixed_size_string_spacepad_empty", "", dtype=h5py.Datatype(fix_size_spacepad_tid) +) diff --git a/test/h5py_test_fixed_size_string_datasets.h5 b/test/h5py_test_fixed_size_string_datasets.h5 new file mode 100644 index 0000000000..cb9c005a27 Binary files /dev/null and b/test/h5py_test_fixed_size_string_datasets.h5 differ diff --git a/test/h5py_test_string_attributes.h5 b/test/h5py_test_string_attributes.h5 new file mode 100644 index 0000000000..aa08db40f2 Binary files /dev/null and b/test/h5py_test_string_attributes.h5 differ