diff --git a/include/iris/x4/ast/position_tagged.hpp b/include/iris/x4/ast/position_tagged.hpp deleted file mode 100644 index b7e05be71..000000000 --- a/include/iris/x4/ast/position_tagged.hpp +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef IRIS_ZZ_X4_AST_POSITION_TAGGED_HPP -#define IRIS_ZZ_X4_AST_POSITION_TAGGED_HPP - -/*============================================================================= - Copyright (c) 2014 Joel de Guzman - Copyright (c) 2025 Nana Sakisaka - Copyright (c) 2026 The Iris Project Contributors - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -==============================================================================*/ - -#include - -#include -#include - -namespace iris::x4 { - -namespace ast { - -struct position_tagged -{ - // Use this to annotate an AST with the iterator position. - // These ids are used as a key to the position_cache (below) - // and marks the start and end of an AST node. - int id_first = -1; - int id_last = -1; -}; - -template -class position_cache -{ -public: - using iterator_type = typename Container::value_type; - - position_cache(iterator_type first, iterator_type last) - : first_(first) - , last_(last) - {} - - template - requires std::derived_from - [[nodiscard]] std::ranges::subrange - position_of(Attr const& attr) const - { - return std::ranges::subrange{ - positions_.at(attr.id_first), - positions_.at(attr.id_last) - }; - } - - template - requires (!std::derived_from) - [[nodiscard]] std::ranges::subrange - position_of(Attr const&) const - { - // returns an empty position - return std::ranges::subrange{}; - } - - // This will catch all nodes except those inheriting from position_tagged - template - requires (!std::derived_from) - static void annotate(Attr&, iterator_type const&, iterator_type const&) - { - // (no-op) no need for tags - } - - template - requires std::derived_from - void annotate(Attr& attr, iterator_type first, iterator_type last) - { - attr.id_first = static_cast(positions_.size()); - positions_.push_back(std::move(first)); - attr.id_last = static_cast(positions_.size()); - positions_.push_back(std::move(last)); - } - - [[nodiscard]] Container const& - get_positions() const noexcept - { - return positions_; - } - - iterator_type first() const { return first_; } - iterator_type last() const { return last_; } - -private: - Container positions_; - iterator_type first_; - iterator_type last_; -}; - -} // ast - -using position_tagged [[deprecated("Use `ast::`")]] = ast::position_tagged; - -template -using position_cache [[deprecated("Use `ast::`")]] = ast::position_cache; - -} // iris::x4 - -#endif diff --git a/include/iris/x4/char/char_set.hpp b/include/iris/x4/char/char_set.hpp index 04efac39c..5756c534d 100644 --- a/include/iris/x4/char/char_set.hpp +++ b/include/iris/x4/char/char_set.hpp @@ -13,9 +13,11 @@ #include #include #include -#include #include +#include + +#include #include #include @@ -47,6 +49,17 @@ struct char_range : char_parser> } char_type from, to; + + [[nodiscard]] std::string get_x4_info() const + { + // TODO: make more user-friendly && make the format consistent with above + // TODO: escape + return std::format( + "char_range \"{}-{}\"", + iris::unicode::transcode(typename Encoding::string_type(1, this->from)), + iris::unicode::transcode(typename Encoding::string_type(1, this->to)) + ); + } }; // Parser for a character set @@ -103,29 +116,14 @@ struct char_set : char_parser> } detail::basic_chset chset; -}; -template -struct get_info> -{ - using result_type = std::string; - [[nodiscard]] constexpr std::string operator()(char_set const& /* p */) const + [[nodiscard]] std::string get_x4_info() const { + // TODO: escape return "char-set"; // TODO: make more user-friendly } }; -template -struct get_info> -{ - using result_type = std::string; - [[nodiscard]] constexpr std::string operator()(char_range const& p) const - { - // TODO: make more user-friendly && make the format consistent with above - return "char_range \"" + x4::to_utf8(Encoding::toucs4(p.from)) + '-' + x4::to_utf8(Encoding::toucs4(p.to))+ '"'; - } -}; - } // iris::x4 #endif diff --git a/include/iris/x4/char/literal_char.hpp b/include/iris/x4/char/literal_char.hpp index 4eba2d4b0..3950a05c8 100644 --- a/include/iris/x4/char/literal_char.hpp +++ b/include/iris/x4/char/literal_char.hpp @@ -11,9 +11,11 @@ ==============================================================================*/ #include -#include #include +#include + +#include #include #include @@ -51,18 +53,17 @@ struct literal_char : char_parser> [[nodiscard]] constexpr classify_type classify_ch() const noexcept { return classify_ch_; } -private: - classify_type classify_ch_{}; -}; - -template -struct get_info> -{ - using result_type = std::string; - [[nodiscard]] std::string operator()(literal_char const& p) const + [[nodiscard]] std::string get_x4_info() const { - return '\'' + x4::to_utf8(Encoding::toucs4(p.classify_ch())) + '\''; + // TODO: escape quote + return std::format( + "'{}'", + iris::unicode::transcode(typename Encoding::string_type(1, this->classify_ch_)) + ); } + +private: + classify_type classify_ch_{}; }; } // iris::x4 diff --git a/include/iris/x4/char_encoding/standard.hpp b/include/iris/x4/char_encoding/standard.hpp index c2f0c03d6..eb2370e65 100644 --- a/include/iris/x4/char_encoding/standard.hpp +++ b/include/iris/x4/char_encoding/standard.hpp @@ -11,6 +11,8 @@ file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ +#include + #include #include #include @@ -22,6 +24,7 @@ namespace iris::x4::char_encoding { struct standard { using char_type = char; + using string_type = std::string; using classify_type = unsigned char; [[nodiscard]] static constexpr bool diff --git a/include/iris/x4/char_encoding/standard_wide.hpp b/include/iris/x4/char_encoding/standard_wide.hpp index c8ed3e1d2..cf09f7087 100644 --- a/include/iris/x4/char_encoding/standard_wide.hpp +++ b/include/iris/x4/char_encoding/standard_wide.hpp @@ -24,6 +24,7 @@ namespace iris::x4::char_encoding { struct standard_wide { using char_type = wchar_t; + using string_type = std::wstring; using classify_type = wchar_t; template diff --git a/include/iris/x4/char_encoding/unicode.hpp b/include/iris/x4/char_encoding/unicode.hpp index daa5b5743..42df691cd 100644 --- a/include/iris/x4/char_encoding/unicode.hpp +++ b/include/iris/x4/char_encoding/unicode.hpp @@ -11,7 +11,11 @@ file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ -#include +#include +#include +#include + +#include #include @@ -20,6 +24,7 @@ namespace iris::x4::char_encoding { struct unicode { using char_type = char32_t; + using string_type = std::u32string; using classify_type = x4::unicode::classify_type; [[nodiscard]] static constexpr bool diff --git a/include/iris/x4/char_encoding/unicode/classification.hpp b/include/iris/x4/char_encoding/unicode/category.hpp similarity index 59% rename from include/iris/x4/char_encoding/unicode/classification.hpp rename to include/iris/x4/char_encoding/unicode/category.hpp index 68833be82..4da07006d 100644 --- a/include/iris/x4/char_encoding/unicode/classification.hpp +++ b/include/iris/x4/char_encoding/unicode/category.hpp @@ -1,5 +1,5 @@ -#ifndef IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFICATION_HPP -#define IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFICATION_HPP +#ifndef IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CATEGORY_HPP +#define IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CATEGORY_HPP /*============================================================================= Copyright (c) 2001-2011 Joel de Guzman @@ -13,11 +13,6 @@ table builder) (c) Peter Kankowski, 2008 ==============================================================================*/ -#include -#include -#include -#include - #include namespace iris::x4::unicode { @@ -266,120 +261,6 @@ enum script } // properties -[[nodiscard]] constexpr properties::category get_category(classify_type ch) noexcept -{ - return static_cast(detail::category_lookup(ch) & 0x3F); -} - -[[nodiscard]] constexpr properties::major_category get_major_category(classify_type ch) noexcept -{ - return static_cast(unicode::get_category(ch) >> 3); -} - -[[nodiscard]] constexpr bool is_punctuation(classify_type ch) noexcept -{ - return unicode::get_major_category(ch) == properties::punctuation; -} - -[[nodiscard]] constexpr bool is_decimal_number(classify_type ch) noexcept -{ - return unicode::get_category(ch) == properties::decimal_number; -} - -[[nodiscard]] constexpr bool is_hex_digit(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::hex_digit) != 0; -} - -[[nodiscard]] constexpr bool is_control(classify_type ch) noexcept -{ - return unicode::get_category(ch) == properties::control; -} - -[[nodiscard]] constexpr bool is_alphabetic(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::alphabetic) != 0; -} - -[[nodiscard]] constexpr bool is_alphanumeric(classify_type ch) noexcept -{ - return unicode::is_decimal_number(ch) || unicode::is_alphabetic(ch); -} - -[[nodiscard]] constexpr bool is_uppercase(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::uppercase) != 0; -} - -[[nodiscard]] constexpr bool is_lowercase(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::lowercase) != 0; -} - -[[nodiscard]] constexpr bool is_white_space(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::white_space) != 0; -} - -[[nodiscard]] constexpr bool is_blank(classify_type ch) noexcept -{ - switch (ch) - { - case '\n': case '\v': case '\f': case '\r': - return false; - default: - return unicode::is_white_space(ch) && - !( - unicode::get_category(ch) == properties::line_separator || - unicode::get_category(ch) == properties::paragraph_separator - ); - } -} - -[[nodiscard]] constexpr bool is_graph(classify_type ch) noexcept -{ - return !( - unicode::is_white_space(ch) || - unicode::get_category(ch) == properties::control || - unicode::get_category(ch) == properties::surrogate || - unicode::get_category(ch) == properties::unassigned - ); -} - -[[nodiscard]] constexpr bool is_print(classify_type ch) noexcept -{ - return (unicode::is_graph(ch) || unicode::is_blank(ch)) && !unicode::is_control(ch); -} - -[[nodiscard]] constexpr bool is_noncharacter_code_point(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; -} - -[[nodiscard]] constexpr bool is_default_ignorable_code_point(classify_type ch) noexcept -{ - return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; -} - -[[nodiscard]] constexpr properties::script get_script(classify_type ch) noexcept -{ - return static_cast(detail::script_lookup(ch)); -} - -[[nodiscard]] constexpr classify_type to_lowercase(classify_type ch) noexcept -{ - // The table returns 0 to signal that this code maps to itself - classify_type const r = detail::lowercase_lookup(ch); - return r == 0 ? ch : r; -} - -[[nodiscard]] constexpr classify_type to_uppercase(classify_type ch) noexcept -{ - // The table returns 0 to signal that this code maps to itself - classify_type const r = detail::uppercase_lookup(ch); - return r == 0 ? ch : r; -} - } // iris::x4::unicode #endif diff --git a/include/iris/x4/char_encoding/unicode/classify_case.hpp b/include/iris/x4/char_encoding/unicode/classify_case.hpp new file mode 100644 index 000000000..777a3f12e --- /dev/null +++ b/include/iris/x4/char_encoding/unicode/classify_case.hpp @@ -0,0 +1,35 @@ +#ifndef IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_CASE_HPP +#define IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_CASE_HPP + +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2025 Nana Sakisaka + Copyright (c) 2026 The Iris Project Contributors + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ + +#include +#include +#include + +namespace iris::x4::unicode { + +[[nodiscard]] constexpr classify_type to_lowercase(classify_type ch) noexcept +{ + // The table returns 0 to signal that this code maps to itself + classify_type const r = detail::lowercase_lookup(ch); + return r == 0 ? ch : r; +} + +[[nodiscard]] constexpr classify_type to_uppercase(classify_type ch) noexcept +{ + // The table returns 0 to signal that this code maps to itself + classify_type const r = detail::uppercase_lookup(ch); + return r == 0 ? ch : r; +} + +} // iris::x4::unicode + +#endif diff --git a/include/iris/x4/char_encoding/unicode/classify_category.hpp b/include/iris/x4/char_encoding/unicode/classify_category.hpp new file mode 100644 index 000000000..812df1ffd --- /dev/null +++ b/include/iris/x4/char_encoding/unicode/classify_category.hpp @@ -0,0 +1,115 @@ +#ifndef IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_CATEGORY_HPP +#define IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_CATEGORY_HPP + +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2025 Nana Sakisaka + Copyright (c) 2026 The Iris Project Contributors + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ + +#include +#include + +namespace iris::x4::unicode { + +[[nodiscard]] constexpr properties::category get_category(classify_type ch) noexcept +{ + return static_cast(detail::category_lookup(ch) & 0x3F); +} + +[[nodiscard]] constexpr properties::major_category get_major_category(classify_type ch) noexcept +{ + return static_cast(unicode::get_category(ch) >> 3); +} + +[[nodiscard]] constexpr bool is_punctuation(classify_type ch) noexcept +{ + return unicode::get_major_category(ch) == properties::punctuation; +} + +[[nodiscard]] constexpr bool is_decimal_number(classify_type ch) noexcept +{ + return unicode::get_category(ch) == properties::decimal_number; +} + +[[nodiscard]] constexpr bool is_hex_digit(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::hex_digit) != 0; +} + +[[nodiscard]] constexpr bool is_control(classify_type ch) noexcept +{ + return unicode::get_category(ch) == properties::control; +} + +[[nodiscard]] constexpr bool is_alphabetic(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::alphabetic) != 0; +} + +[[nodiscard]] constexpr bool is_alphanumeric(classify_type ch) noexcept +{ + return unicode::is_decimal_number(ch) || unicode::is_alphabetic(ch); +} + +[[nodiscard]] constexpr bool is_uppercase(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::uppercase) != 0; +} + +[[nodiscard]] constexpr bool is_lowercase(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::lowercase) != 0; +} + +[[nodiscard]] constexpr bool is_white_space(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::white_space) != 0; +} + +[[nodiscard]] constexpr bool is_blank(classify_type ch) noexcept +{ + switch (ch) + { + case '\n': case '\v': case '\f': case '\r': + return false; + default: + return unicode::is_white_space(ch) && + !( + unicode::get_category(ch) == properties::line_separator || + unicode::get_category(ch) == properties::paragraph_separator + ); + } +} + +[[nodiscard]] constexpr bool is_graph(classify_type ch) noexcept +{ + return !( + unicode::is_white_space(ch) || + unicode::get_category(ch) == properties::control || + unicode::get_category(ch) == properties::surrogate || + unicode::get_category(ch) == properties::unassigned + ); +} + +[[nodiscard]] constexpr bool is_print(classify_type ch) noexcept +{ + return (unicode::is_graph(ch) || unicode::is_blank(ch)) && !unicode::is_control(ch); +} + +[[nodiscard]] constexpr bool is_noncharacter_code_point(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; +} + +[[nodiscard]] constexpr bool is_default_ignorable_code_point(classify_type ch) noexcept +{ + return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; +} + +} // iris::x4::unicode + +#endif diff --git a/include/iris/x4/char_encoding/unicode/classify_script.hpp b/include/iris/x4/char_encoding/unicode/classify_script.hpp new file mode 100644 index 000000000..9e5fdf026 --- /dev/null +++ b/include/iris/x4/char_encoding/unicode/classify_script.hpp @@ -0,0 +1,25 @@ +#ifndef IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_SCRIPT_HPP +#define IRIS_ZZ_X4_CHAR_ENCODING_UNICODE_CLASSIFY_SCRIPT_HPP + +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2025 Nana Sakisaka + Copyright (c) 2026 The Iris Project Contributors + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ + +#include +#include + +namespace iris::x4::unicode { + +[[nodiscard]] constexpr properties::script get_script(classify_type ch) noexcept +{ + return static_cast(detail::script_lookup(ch)); +} + +} // iris::x4::unicode + +#endif diff --git a/include/iris/x4/core/action.hpp b/include/iris/x4/core/action.hpp index d0a3195f9..e3ad26b34 100644 --- a/include/iris/x4/core/action.hpp +++ b/include/iris/x4/core/action.hpp @@ -17,11 +17,12 @@ #include #include -#include +#include // subrange #include #include #include #include +#include namespace iris::x4 { @@ -107,6 +108,11 @@ struct action : proxy_parser> constexpr void operator[](auto const&) const = delete; // You can't add semantic action for semantic action + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format("{}[f]", get_info{}(this->subject)); + } + private: // Semantic action with no parameter: `p[([] { /* ... */ })]` template diff --git a/include/iris/x4/core/skip_over.hpp b/include/iris/x4/core/skip_over.hpp index 51a9f6eeb..83910a2d0 100644 --- a/include/iris/x4/core/skip_over.hpp +++ b/include/iris/x4/core/skip_over.hpp @@ -42,6 +42,25 @@ enum struct builtin_skipper_kind : char space, }; +namespace detail { + +template +struct builtin_skipper_traits; + +template<> +struct builtin_skipper_traits +{ + static constexpr char const* name = "blank"; +}; + +template<> +struct builtin_skipper_traits +{ + static constexpr char const* name = "space"; +}; + +} // detail + template Se, class Context> requires X4Subject> constexpr void skip_over(It& first, Se const& last, Context const& ctx) diff --git a/include/iris/x4/debug/annotate.hpp b/include/iris/x4/debug/annotate.hpp index bdd000e4d..515223961 100644 --- a/include/iris/x4/debug/annotate.hpp +++ b/include/iris/x4/debug/annotate.hpp @@ -29,6 +29,7 @@ struct error_handler; enum struct annotated_rule_kind : unsigned { + annotate_none = 0, annotate_success = 1 << 0, // suitable for any child parsers annotate_expectation_failure = 1 << 1, // suitable for root parser annotate_trace = 1 << 2, // suitable for any child parsers (except for the primitive ones) @@ -52,7 +53,7 @@ template struct expectation_failure; template -struct annotated_rule +struct annotated_rule : detail::annotated_rule_base { template Se, class Context, X4Attribute Attr> requires diff --git a/include/iris/x4/debug/default_error_handler.hpp b/include/iris/x4/debug/default_error_handler.hpp index efe194e0c..de251c2bd 100644 --- a/include/iris/x4/debug/default_error_handler.hpp +++ b/include/iris/x4/debug/default_error_handler.hpp @@ -13,58 +13,82 @@ #include #include -#include #include -#include -#include +#include +#include #include +#include +#include #include #include #include -#include namespace iris::x4 { -template +template Se = It> class default_error_handler { - static constexpr int IndentSpaces = 2; - static constexpr int CharsToPrint = 20; - public: using iterator_type = It; + using sentinel_type = Se; - default_error_handler( - It first, It last, - std::ostream& err_out, - std::string file = "", - int tabs = 4 - ) - : err_out_(err_out) - , file_(file) - , tabs_(tabs) - , pos_cache_(first, last) - {} - - template Se, class Context, X4Attribute Attr> - void on_success(It const& first, Se const& last, Context const& /*ctx*/, Attr& attr) + static constexpr int indent_space_width = 2; + static constexpr int code_points_to_print = 20; + static constexpr int highlight_chars = 2; + + inline static auto const colorize_cfg = iris::ansi_colorize::colorizer<>::make_config({ + {"$tag", "fg:rgb(140,140,140)"}, + {"$text", "fg:rgb(249,190,182)"}, + {"$key", "fg:rgb(118,118,118)|bold"}, + {"$attr", "fg:rgb(145,220,254)"}, + {"$fail", "fg:rgb(141,44,43)|bold"}, + + // source highlight for expectation failure + {"$expect_left", "bg:rgb(40,40,140)"}, + {"$expect_right", "bg:rgb(120,0,0)"}, + }); + + [[nodiscard]] static bool is_internal_rule(std::string_view rule_name) noexcept { - pos_cache_.annotate(attr, first, last); + if (rule_name.empty()) return true; + if (rule_name.starts_with(std::string_view{"__"})) return true; + return false; } - template Se, class Context> - void on_expectation_failure(It const&, Se const&, Context const& /*ctx*/, expectation_failure const& failure) + default_error_handler(It source_first, Se source_last, std::ostream* error_out, std::ostream* trace_out, std::filesystem::path file_path = {}) + : source_first_(std::move(source_first)) + , source_last_(std::move(source_last)) + , error_out_(error_out) + , trace_out_(trace_out) + , file_path_(std::move(file_path)) { - (*this)(failure.where(), "Error! Expecting: " + failure.which() + " here:"); + } + + [[nodiscard]] It source_first() const { return source_first_; } + [[nodiscard]] Se source_last() const { return source_last_; } + + [[nodiscard]] std::ostream* error_out() const noexcept { return error_out_; } + [[nodiscard]] std::ostream* trace_out() const noexcept { return trace_out_; } + [[nodiscard]] std::filesystem::path const& file_path() const noexcept { return file_path_; } + + //template + //void on_success(It const first, std::sentinel_for auto const last, Context const& /*ctx*/, Attr& attr) + //{ + //} + + template + void on_expectation_failure(It const, std::sentinel_for auto const, Context const& /*ctx*/, expectation_failure const& failure) + { + this->print_expectation(failure.where(), "error: expecting `" + failure.which() + "` here:"); } template void on_trace( It first, - std::sentinel_for auto last, - Context const& /* ctx */, + std::sentinel_for auto const last, + Context const& /*ctx*/, Attr const& attr, std::string_view rule_name, tracer_state const state @@ -72,204 +96,161 @@ class default_error_handler { using enum tracer_state; + if (!this->trace_out()) return; + switch (state) { case pre_parse: - default_error_handler::print_indent(trace_indent_++); - err_out_ << '<' << rule_name << '>' << std::endl; - default_error_handler::print_some("try", first, last); + if (default_error_handler::is_internal_rule(rule_name)) ++tracer_internal_rule_stack_; + if (tracer_internal_rule_stack_ > 0) break; + + this->print_indent(tracer_indent_++); + this->print_trace("[$tag]<{}>[/$tag]\n", rule_name); + this->print_some("try ", first, last); break; case parse_succeeded: - default_error_handler::print_some("success", first, last); + if (default_error_handler::is_internal_rule(rule_name)) { + --tracer_internal_rule_stack_; + if (tracer_internal_rule_stack_ >= 0) break; + + } else { + if (tracer_internal_rule_stack_ > 0) break; + } + this->print_some("ok ", first, last); + if constexpr (!std::same_as) { - default_error_handler::print_indent(trace_indent_); - err_out_ << ""; - traits::print_attribute(err_out_, attr); - err_out_ << ""; - err_out_ << std::endl; + this->print_indent(tracer_indent_); + + this->print_trace("attr "); + this->print_trace("[$attr]"); + x4::print_attribute(*this->trace_out(), attr); + this->print_trace("[/$attr]\n"); } - default_error_handler::print_indent(--trace_indent_); - err_out_ << "' << std::endl; + this->print_indent(--tracer_indent_); + + this->print_trace("[$tag][/$tag]\n", rule_name); break; case parse_failed: - default_error_handler::print_indent(trace_indent_); - err_out_ << "" << std::endl; - default_error_handler::print_indent(--trace_indent_); - err_out_ << "' << std::endl; + if (default_error_handler::is_internal_rule(rule_name)) { + --tracer_internal_rule_stack_; + if (tracer_internal_rule_stack_ >= 0) break; + + } else { + if (tracer_internal_rule_stack_ > 0) break; + } + + this->print_indent(tracer_indent_); + this->print_trace("[$fail]fail[/$fail]\n"); + this->print_indent(--tracer_indent_); + + this->print_trace("[$tag][/$tag]\n", rule_name); + break; + + default: break; } } -private: - void operator()(It err_pos, std::string const& error_message) const; - void operator()(It err_first, It err_last, std::string const& error_message) const; - - void operator()(ast::position_tagged const& pos, std::string const& message) const + void print_line_highlight(std::ranges::subrange const line, It const err_pos) const { - auto where = pos_cache_.position_of(pos); - (*this)(where.begin(), where.end(), message); + if (!error_out_) return; + + using char_type = std::iterator_traits::value_type; + using string_view_type = std::basic_string_view; + + auto const [left_it, left_count] = iris::unicode::bounded_prev(line.begin(), err_pos, highlight_chars); + auto const [right_it, right_count] = iris::unicode::bounded_next(err_pos, line.end(), highlight_chars); + + if (left_count > 0) { + this->print_error( + "{}[$expect_left]{}[/$expect_left]", + iris::unicode::transcode(string_view_type{line.begin(), left_it}), + iris::unicode::transcode(string_view_type{left_it, err_pos}) + ); + } + if (right_count > 0) { + this->print_error( + "[$expect_right]{}[/$expect_right]{}", + iris::unicode::transcode(string_view_type{err_pos, right_it}), + iris::unicode::transcode(string_view_type{right_it, line.end()}) + ); + } + *error_out_ << "\n"; } - [[nodiscard]] std::ranges::subrange - position_of(ast::position_tagged const& pos) const +private: + template + void print_error(std::string_view fmt_str, Args&&... args) const { - return pos_cache_.position_of(pos); + iris::colorize_format_to(*error_out_, colorize_cfg, fmt_str, std::forward(args)...); } - [[nodiscard]] ast::position_cache> const& - get_position_cache() const noexcept + template + void print_trace(std::string_view fmt_str, Args&&... args) const { - return pos_cache_; + iris::colorize_format_to(*trace_out_, colorize_cfg, fmt_str, std::forward(args)...); } - // tracer related void print_indent(int n) const { - n *= IndentSpaces; + n *= indent_space_width; for (int i = 0; i != n; ++i) { - err_out_ << ' '; + *trace_out_ << ' '; } } - void print_some(std::string_view tag, It first, It last) const + void print_some(char const* tag, It first, It const last) const { - default_error_handler::print_indent(trace_indent_); + this->print_indent(tracer_indent_); - err_out_ << '<' << tag << '>'; - - for (int i = 0; first != last && i != CharsToPrint && *first; ++i, ++first) { - traits::print_token(err_out_, *first); + if (first == last) { + this->print_trace("{}[$key]eoi[/$key]\n", tag); + return; } - err_out_ << "' << std::endl; - - // TODO: convert invalid xml characters (e.g. '<') to valid character entities - } - void print_file_line(std::size_t line) const; - void print_line(It line_start, It last) const; - void print_indicator(It& line_start, It last, char ind) const; - It get_line_start(It first, It pos) const; - std::size_t position(It i) const; + this->print_trace("{}[$key]|[/$key]", tag); - std::ostream& err_out_; - std::string file_; - int tabs_; - ast::position_cache> pos_cache_; + this->print_trace("[$text]"); + x4::print_chars(*trace_out_, first, last, code_points_to_print); + this->print_trace("[/$text][$key]|[/$key]\n"); + } - int trace_indent_ = 0; -}; + void print_expectation(It err_pos, std::string_view error_message) const + { + if (!error_out_) return; -template -void default_error_handler::print_file_line(std::size_t line) const -{ - if (file_ != "") { - err_out_ << "In file " << file_ << ", "; - } else { - err_out_ << "In "; - } + x4::skip_whitespace_for_print(err_pos, source_last_); - err_out_ << "line " << line << ':' << '\n'; -} + this->print_file_line(x4::calc_line_number(source_first_, err_pos)); + *error_out_ << error_message << '\n'; -template -void default_error_handler::print_line(It start, It last) const -{ - auto end = start; - while (end != last) { - auto c = *end; - if (c == '\r' || c == '\n') break; - ++end; + std::ranges::subrange const line{ + x4::fetch_line_start(source_first_, err_pos), + x4::fetch_line_last(err_pos, source_last_) + }; + this->print_line_highlight(line, err_pos); } - using char_type = typename std::iterator_traits::value_type; - std::basic_string line{start, end}; - err_out_ << x4::to_utf8(line) << '\n'; -} -template -void default_error_handler::print_indicator(It& start, It last, char ind) const -{ - for (; start != last; ++start) { - auto c = *start; - if (c == '\r' || c == '\n') break; - if (c == '\t') { - for (int i = 0; i < tabs_; ++i) { - err_out_ << ind; - } - } else { - err_out_ << ind; - } - } -} + void print_file_line(int line) const + { + if (!error_out_) return; -template -It default_error_handler::get_line_start(It first, It pos) const -{ - It latest = first; - for (It i = first; i != pos;) { - if (*i == '\r' || *i == '\n') { - latest = ++i; + if (file_path_.empty()) { + std::print(*error_out_, "[in-memory source]({}): ", line); } else { - ++i; - } - } - return latest; -} - -template -std::size_t default_error_handler::position(It i) const -{ - std::size_t line {1}; - typename std::iterator_traits::value_type prev {0}; - - for (It pos = pos_cache_.first(); pos != i; ++pos) { - auto c = *pos; - switch (c) { - case '\n': - if (prev != '\r') ++line; - break; - case '\r': - ++line; - break; - default: - break; + std::print(*error_out_, "{}({}): ", file_path_.string(), line); } - prev = c; } - return line; -} - -template -void default_error_handler::operator()(It err_pos, std::string const& error_message) const -{ - It first = pos_cache_.first(); - It last = pos_cache_.last(); - - print_file_line(position(err_pos)); - err_out_ << error_message << '\n'; - - It start = get_line_start(first, err_pos); - print_line(start, last); - print_indicator(start, err_pos, '_'); - err_out_ << "^_" << '\n'; -} + It source_first_; + Se source_last_; + std::ostream* error_out_ = nullptr, *trace_out_ = nullptr; + std::filesystem::path file_path_; -template -void default_error_handler::operator()(It err_first, It err_last, std::string const& error_message) const -{ - It first = pos_cache_.first(); - It last = pos_cache_.last(); - - print_file_line(position(err_first)); - err_out_ << error_message << '\n'; - - It start = get_line_start(first, err_first); - print_line(start, last); - print_indicator(start, err_first, ' '); - print_indicator(start, err_last, '~'); - err_out_ << " <<-- Here" << '\n'; -} + int tracer_internal_rule_stack_ = 0; + int tracer_indent_ = 0; +}; } // iris::x4 diff --git a/include/iris/x4/debug/error_handler.hpp b/include/iris/x4/debug/error_handler.hpp index 1c1f5830a..f18d7e1cb 100644 --- a/include/iris/x4/debug/error_handler.hpp +++ b/include/iris/x4/debug/error_handler.hpp @@ -29,7 +29,6 @@ struct error_handler using error_handler_tag [[deprecated("Use `x4::contexts::error_handler`")]] = contexts::error_handler; - enum class tracer_state : char { pre_parse, @@ -37,6 +36,12 @@ enum class tracer_state : char parse_failed, }; +namespace detail { + +struct annotated_rule_base {}; + +} // detail + // `T` is `RuleID` or some custom error handler type template Se, class Context> @@ -51,7 +56,7 @@ struct has_on_expectation_failure : std::false_type std::declval const&>() ); }, - "`on_error` is deprecated due to its confusing name; use `on_expectation_failure` instead." + "`on_error` is obsolete due to its confusing name; use `on_expectation_failure` instead." ); }; @@ -129,8 +134,21 @@ struct [[nodiscard]] scoped_tracer {} }; +template +constexpr bool is_rule_id_derived_from_annotated_rule = requires (RuleID const& maybe_incomplete_rule_id) { + // Note: `std::is_base_of` cannot be used for incomplete type + static_cast(maybe_incomplete_rule_id); +}; + template Se, class Context, X4Attribute Attr> - requires has_on_trace::value + requires + has_on_trace::value || + + // If `RuleID` is not derived from `x4::annotated_rule<...>`, always enable tracing. + // This is required because not doing so would make simple `rule<...> r;` declarations + // never emit any sort of useful information even when invoked from `x4::parse_debug(...)`. + (!is_rule_id_derived_from_annotated_rule) + struct [[nodiscard]] scoped_tracer { constexpr scoped_tracer( @@ -147,18 +165,34 @@ struct [[nodiscard]] scoped_tracer , rule_name_(rule_name) , parse_ok_(parse_ok) { - RuleID{}.on_trace(first, last, ctx, attr_, rule_name, tracer_state::pre_parse); + if constexpr (is_rule_id_derived_from_annotated_rule) { + if constexpr (has_on_trace::value) { + RuleID{}.on_trace(first, last, ctx, attr_, rule_name, tracer_state::pre_parse); + } + + } else if constexpr (has_on_trace, It, Se, Context, Attr>::value) { + auto&& error_handler = x4::get(ctx); + error_handler.on_trace(first, last, ctx, attr_, rule_name, tracer_state::pre_parse); + } } constexpr ~scoped_tracer() { - RuleID{}.on_trace( - first_, last_, - ctx_, - attr_, - rule_name_, - *parse_ok_ ? tracer_state::parse_succeeded : tracer_state::parse_failed - ); + if constexpr (is_rule_id_derived_from_annotated_rule) { + if constexpr (has_on_trace::value) { + RuleID{}.on_trace( + first_, last_, ctx_, attr_, rule_name_, + *parse_ok_ ? tracer_state::parse_succeeded : tracer_state::parse_failed + ); + } + + } else if constexpr (has_on_trace, It, Se, Context, Attr>::value) { + auto&& error_handler = x4::get(ctx_); + error_handler.on_trace( + first_, last_, ctx_, attr_, rule_name_, + *parse_ok_ ? tracer_state::parse_succeeded : tracer_state::parse_failed + ); + } } private: diff --git a/include/iris/x4/debug/print_attribute.hpp b/include/iris/x4/debug/print_attribute.hpp index 650641993..cd5507cdc 100644 --- a/include/iris/x4/debug/print_attribute.hpp +++ b/include/iris/x4/debug/print_attribute.hpp @@ -12,20 +12,149 @@ ================================================_==============================*/ #include + +#include + #include -#include +#include #include - #include -#ifdef IRIS_X4_UNICODE -# include -#endif +#include + +#include +#include + +#include + +namespace iris::x4 { + +template +[[nodiscard]] int calc_line_number(It const source_first, It const current_pos) +{ + int line = 1; + char32_t prev_ch = U'\0'; + + iris::unicode::code_point_iterator code_point_it{source_first, source_first, current_pos}; + + for (; code_point_it.base() != current_pos; ++code_point_it) { + char32_t const ch = *code_point_it; + switch (ch) { + case U'\n': + if (prev_ch != U'\r') ++line; + break; + case U'\r': + ++line; + break; + default: + break; + } + prev_ch = ch; + } + return line; +} -#include +template +[[nodiscard]] It fetch_line_start(It const source_first, It const current_pos) +{ + if (current_pos == source_first) { + return current_pos; + } -namespace iris::x4::traits { + iris::unicode::code_point_iterator code_point_it{current_pos, source_first, current_pos}; + + auto last_it = code_point_it--; + for (;; --code_point_it) { + switch (*code_point_it) { + case U'\n': + case U'\r': + return last_it.base(); + default: + break; + } + last_it = code_point_it; + if (code_point_it.base() == source_first) break; + } + return code_point_it.base(); +} + +template Se> +[[nodiscard]] It fetch_line_last(It const current_pos, Se const source_last) +{ + iris::unicode::code_point_iterator code_point_it{current_pos, current_pos, source_last}; + + for (; code_point_it.base() != source_last; ++code_point_it) { + switch (*code_point_it) { + case U'\n': + case U'\r': + return code_point_it.base(); + default: + break; + } + } + return code_point_it.base(); +} + +template Se> +void skip_whitespace_for_print(It& it, Se const source_last) +{ + iris::unicode::code_point_iterator code_point_it{it, it, source_last}; + + for (; code_point_it.base() != source_last; ++code_point_it) { + switch (*code_point_it) { + case U'\r': + case U'\n': + case U'\t': + case U' ': + continue; + default: + break; + } + break; + } + it = code_point_it.base(); +} + +inline void print_chars(std::ostream& os, char32_t const ch) +{ + // https://en.cppreference.com/w/cpp/utility/format/spec.html#Formatting_escaped_characters_and_strings + + switch (ch) { + case U'\t': os << "\\t"; return; + case U'\n': os << "\\n"; return; + case U'\r': os << "\\r"; return; + case U'\\': os << "\\"; return; + case U' ': os << ' '; return; + // fullwidth CJK space; we don't want this to be printed like `\u{3000}` + case U' ': os << " "; return; + default: break; + } + + auto const major_cat = unicode::get_major_category(ch); + if ( + major_cat == unicode::properties::separator || major_cat == unicode::properties::other + // TODO: handle Grapheme_Extend=Yes + ) { + std::print(os, "\\u{{{:x}}}", static_cast(ch)); + return; + } + + iris::unicode::append8(ch, std::ostreambuf_iterator(os)); +} + +template Se> +void print_chars(std::ostream& os, It const it, Se const se, std::size_t const max_code_points) +{ + iris::unicode::code_point_iterator code_point_it{it, it, se}; + iris::unicode::code_point_iterator const code_point_se{se, it, se}; + + for (std::size_t printed_code_points = 0; printed_code_points < max_code_points && code_point_it != code_point_se; ++printed_code_points, ++code_point_it) { + x4::print_chars(os, *code_point_it); + } +} + +// -------------------------------------- template void print_attribute(std::ostream& os, Attr const& attr_); @@ -50,14 +179,13 @@ struct print_tuple_like } else { out << ", "; } - traits::print_attribute(out, val); + x4::print_attribute(out, val); } Out& out; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) mutable bool is_first; }; -// print elements in a variant template struct print_visitor { @@ -68,7 +196,7 @@ struct print_visitor template void operator()(T const& val) const { - traits::print_attribute(out, val); + x4::print_attribute(out, val); } Out& out; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) @@ -89,74 +217,58 @@ struct print_attribute_debug out << "unused_container"; } - static void call(std::ostream& out, CategorizedAttr auto const& val) + static void call(std::ostream& out, traits::CategorizedAttr auto const& val) { if constexpr (std::formattable) { std::format_to(std::ostreambuf_iterator{out}, "{}", val); } else { + // TODO: https://github.com/iris-cpp/iris/issues/51 + //static_assert(iris::req::ADL_ostreamable_v); out << val; } } -#ifdef IRIS_X4_UNICODE - static void call(std::ostream& out, char_encoding::unicode::char_type const& val) - { - if (val >= 0 && val < 127) { - if (iscntrl(val)) { // TODO - out << "\\" << std::oct << int(val) << std::dec; - } else if (isprint(val)) { - out << char(val); - } else { - out << "\\x" << std::hex << int(val) << std::dec; - } - - } else { - out << "\\x" << std::hex << int(val) << std::dec; - } - } - - static void call(std::ostream& out, char const& val) - { - print_attribute_debug::call(out, static_cast(val)); - } -#endif - // for tuple-likes - static void call(std::ostream& out, CategorizedAttr auto const& val) + static void call(std::ostream& out, traits::CategorizedAttr auto const& val) { out << '['; alloy::for_each(val, detail::print_tuple_like(out)); out << ']'; } - template T_> + template T_> requires (!std::is_same_v) static void call(std::ostream& out, T_ const& val) { - out << '['; - bool is_first = true; - auto last = traits::end(val); - for (auto it = traits::begin(val); it != last; ++it) { - if (is_first) { - is_first = false; - } else { - out << ", "; + if constexpr (iris::StringLike) { + out << std::basic_string_view{val}; + + } else { + out << '['; + bool is_first = true; + auto last = traits::end(val); + for (auto it = traits::begin(val); it != last; ++it) { + if (is_first) { + is_first = false; + } else { + out << ", "; + } + x4::print_attribute(out, *it); } - traits::print_attribute(out, *it); + out << ']'; } - out << ']'; } // for variant types - static void call(std::ostream& out, CategorizedAttr auto const& val) + static void call(std::ostream& out, traits::CategorizedAttr auto const& val) { - iris::visit(detail::print_visitor{out}, val); + iris::visit(detail::print_visitor{out}, val); } - static void call(std::ostream& out, CategorizedAttr auto const& val) + static void call(std::ostream& out, traits::CategorizedAttr auto const& val) { if (val) { - traits::print_attribute(out, *val); + x4::print_attribute(out, *val); } else { out << "[empty]"; } @@ -169,6 +281,6 @@ void print_attribute(std::ostream& os, Attr const& attr_) print_attribute_debug::call(os, attr_); } -} // iris::x4::traits +} // iris::x4 #endif diff --git a/include/iris/x4/debug/print_token.hpp b/include/iris/x4/debug/print_token.hpp deleted file mode 100644 index 16816de05..000000000 --- a/include/iris/x4/debug/print_token.hpp +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef IRIS_ZZ_X4_DEBUG_PRINT_TOKEN_HPP -#define IRIS_ZZ_X4_DEBUG_PRINT_TOKEN_HPP - -/*============================================================================= - Copyright (c) 2001-2014 Joel de Guzman - Copyright (c) 2001-2011 Hartmut Kaiser - Copyright (c) 2025 Nana Sakisaka - Copyright (c) 2026 The Iris Project Contributors - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -================================================_==============================*/ - -#include -#include - -#include - -namespace iris::x4::traits { - -namespace detail { - -// generate debug output for lookahead token (character) stream -struct token_printer_debug_for_chars -{ - template - static void print(std::ostream& os, Char c) - { - using namespace std; // allow for ADL to find the proper iscntrl - - switch (c) { - case '\a': os << "\\a"; break; - case '\b': os << "\\b"; break; - case '\f': os << "\\f"; break; - case '\n': os << "\\n"; break; - case '\r': os << "\\r"; break; - case '\t': os << "\\t"; break; - case '\v': os << "\\v"; break; - default: - if (c >= 0 && c < 127) { - if (iscntrl(c)) { - os << "\\" << std::oct << int(c); - } else if (isprint(c)) { - os << char(c); - } else { - os << "\\x" << std::hex << int(c); - } - - } else { - os << "\\x" << std::hex << int(c); - } - break; - } - } -}; - -// for token types where the comparison with char constants wouldn't work -struct token_printer_debug -{ - template - static void print(std::ostream& os, T const& val) - { - os << val; - } -}; - -} // detail - -template -struct token_printer_debug - : std::conditional_t< - std::is_convertible_v && std::is_convertible_v, - detail::token_printer_debug_for_chars, - detail::token_printer_debug - > -{}; - -template -void print_token(std::ostream& os, T const& val) -{ - token_printer_debug::print(os, val); -} - -} // iris::x4::traits - -#endif diff --git a/include/iris/x4/directive/lexeme.hpp b/include/iris/x4/directive/lexeme.hpp index f14118091..d12dfc20f 100644 --- a/include/iris/x4/directive/lexeme.hpp +++ b/include/iris/x4/directive/lexeme.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -43,6 +44,11 @@ struct lexeme_directive : proxy_parser> attr ); } + + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format("lexeme[{}]", get_info{}(this->subject)); + } }; namespace detail { diff --git a/include/iris/x4/directive/skip.hpp b/include/iris/x4/directive/skip.hpp index 2b788f5c6..83e10bf90 100644 --- a/include/iris/x4/directive/skip.hpp +++ b/include/iris/x4/directive/skip.hpp @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -49,6 +50,15 @@ struct skip_directive : proxy_parser> return this->subject.parse(first, last, x4::replace_first_context(ctx, skipper_), attr); } + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format( + "skip({})[{}]", + get_info{}(this->skipper_), + get_info{}(this->subject) + ); + } + private: template using context_t = std::remove_cvref_tsubject.parse(first, last, x4::replace_first_context(ctx, skipper_kind), attr); } + + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format( + "skip({})[{}]", + detail::builtin_skipper_traits::name, + get_info{}(this->subject) + ); + } }; diff --git a/include/iris/x4/directive/with.hpp b/include/iris/x4/directive/with.hpp index faf2364f0..b0ca2b5ac 100644 --- a/include/iris/x4/directive/with.hpp +++ b/include/iris/x4/directive/with.hpp @@ -12,6 +12,7 @@ #include +#include #include #include #include @@ -131,6 +132,11 @@ struct with_directive : detail::with_directive_impl ); } + [[nodiscard]] std::string get_x4_info() const + { + return std::format("with<...>[{}]", get_info{}(this->subject)); + } + private: using base_type::val_; }; diff --git a/include/iris/x4/numeric/bool.hpp b/include/iris/x4/numeric/bool.hpp index 42703f1c9..c170d8acf 100644 --- a/include/iris/x4/numeric/bool.hpp +++ b/include/iris/x4/numeric/bool.hpp @@ -122,6 +122,11 @@ struct bool_parser : parser> } return false; } + + [[nodiscard]] static std::string get_x4_info() + { + return "`bool`"; + } }; template> @@ -185,8 +190,13 @@ struct literal_bool_parser : parser> return false; } + [[nodiscard]] std::string get_x4_info() const + { + return expected_bool_ ? "`true`" : "`false`"; + } + private: - T expected_bool_; + T expected_bool_; // TODO: remove this runtime param; make this CTP }; namespace standard { diff --git a/include/iris/x4/numeric/int.hpp b/include/iris/x4/numeric/int.hpp index 1cb5fc297..ff9d1b2f5 100644 --- a/include/iris/x4/numeric/int.hpp +++ b/include/iris/x4/numeric/int.hpp @@ -49,6 +49,27 @@ struct int_parser : parser> x4::skip_over(first, last, ctx); return numeric::extract_int::call(first, last, attr); } + + [[nodiscard]] static std::string get_x4_info() + { + if constexpr (Radix == 10 && MinDigits == 1 && MaxDigits == -1) { + if constexpr (sizeof(T) == 1) { + return "`int8`"; + } else if constexpr (sizeof(T) == 2) { + return "`int16`"; + } else if constexpr (sizeof(T) == 4) { + return "`int32`"; + } else if constexpr (sizeof(T) == 8) { + return "`int64`"; + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } }; namespace parsers { diff --git a/include/iris/x4/numeric/real.hpp b/include/iris/x4/numeric/real.hpp index efcab1012..20b785ca7 100644 --- a/include/iris/x4/numeric/real.hpp +++ b/include/iris/x4/numeric/real.hpp @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -251,6 +252,25 @@ struct real_parser : parser> } return false; } + + [[nodiscard]] static std::string get_x4_info() + { + if constexpr (std::same_as>) { + if constexpr (std::same_as) { + return "`float`"; + } else if constexpr (std::same_as) { + return "`double`"; + } else if constexpr (std::same_as) { + return "`long double`"; + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } }; namespace parsers { diff --git a/include/iris/x4/numeric/uint.hpp b/include/iris/x4/numeric/uint.hpp index 648cb9f1e..366902131 100644 --- a/include/iris/x4/numeric/uint.hpp +++ b/include/iris/x4/numeric/uint.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -48,6 +49,39 @@ struct uint_parser : parser> x4::skip_over(first, last, ctx); return numeric::extract_uint::call(first, last, attr); } + + [[nodiscard]] static std::string get_x4_info() + { + if constexpr (MinDigits == 1 && MaxDigits == -1) { + if constexpr (Radix == 10) { + if constexpr (sizeof(T) == 1) { + return "`uint8`"; + } else if constexpr (sizeof(T) == 2) { + return "`uint16`"; + } else if constexpr (sizeof(T) == 4) { + return "`uint32`"; + } else if constexpr (sizeof(T) == 8) { + return "`uint64`"; + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } else if constexpr (Radix == 2) { + return "`bin`"; + } else if constexpr (Radix == 8) { + return "`oct`"; + } else if constexpr (Radix == 16) { + return "`hex`"; + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + + } else { + static_assert(false, "sorry; unimplemented"); + return {}; + } + } }; namespace parsers { diff --git a/include/iris/x4/operator/alternative.hpp b/include/iris/x4/operator/alternative.hpp index 2746a15e5..ca187c5c7 100644 --- a/include/iris/x4/operator/alternative.hpp +++ b/include/iris/x4/operator/alternative.hpp @@ -22,6 +22,7 @@ #include +#include #include #include #include @@ -211,6 +212,15 @@ struct alternative : binary_parser> } return false; // `attr` is untouched } + + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format( + "{} | {}", + get_info{}(this->left), + get_info{}(this->right) + ); + } }; template diff --git a/include/iris/x4/operator/list.hpp b/include/iris/x4/operator/list.hpp index 465f527dd..18258b92a 100644 --- a/include/iris/x4/operator/list.hpp +++ b/include/iris/x4/operator/list.hpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -95,6 +96,15 @@ struct list : binary_parser> return true; } } + + [[nodiscard]] constexpr std::string get_x4_info() const + { + return std::format( + "({} % {})", + get_info{}(this->left), + get_info{}(this->right) + ); + } }; template diff --git a/include/iris/x4/operator/sequence.hpp b/include/iris/x4/operator/sequence.hpp index ac4ce5786..21d3e942f 100644 --- a/include/iris/x4/operator/sequence.hpp +++ b/include/iris/x4/operator/sequence.hpp @@ -22,7 +22,9 @@ #include #include +#include +#include #include #include #include @@ -127,6 +129,23 @@ struct sequence : binary_parser> { return detail::parse_sequence(*this, first, last, ctx, attr); } + + [[nodiscard]] constexpr std::string get_x4_info() const + { + if constexpr (iris::is_ttp_specialization_of_v) { + return std::format( + "{} > {}", + get_info{}(this->left), + get_info{}(this->right.subject) + ); + } else { + return std::format( + "{} >> {}", + get_info{}(this->left), + get_info{}(this->right) + ); + } + } }; template diff --git a/include/iris/x4/parse.hpp b/include/iris/x4/parse.hpp index 0b41c91f7..2d9180170 100644 --- a/include/iris/x4/parse.hpp +++ b/include/iris/x4/parse.hpp @@ -158,25 +158,26 @@ concept X4RangeParseSkipper = typename range_parse_parser_impl::sentinel_type >; -struct parse_fn_main +template + requires (!traits::CharArray) +[[nodiscard]] static constexpr decltype(auto) as_parse_range(R const& range) noexcept { -private: - template - requires (!traits::CharArray) - [[nodiscard]] static constexpr decltype(auto) as_parse_range(R const& range) noexcept - { - return range; - } + return range; +} - template - requires traits::CharArray - [[nodiscard]] static constexpr auto as_parse_range(R const& str) - noexcept(noexcept(std::basic_string_view{str})) - { - return std::basic_string_view{str}; - } +template + requires traits::CharArray +[[nodiscard]] static constexpr auto as_parse_range(R const& str) + noexcept(noexcept(std::basic_string_view{str})) +{ + return std::basic_string_view{str}; +} + +template +using as_parse_range_t = decltype(detail::as_parse_range(std::declval())); -public: +struct parse_fn_main +{ // -------------------------------------------- // parse(range) @@ -186,7 +187,7 @@ struct parse_fn_main operator()(R const& range, Parser&& p, ParseAttr& attr) { // Treat "str" as `string_view` - auto const& range_ = parse_fn_main::as_parse_range(range); + auto const& range_ = detail::as_parse_range(range); using It = typename parse_result_for::iterator_type; using Se = typename parse_result_for::sentinel_type; @@ -217,7 +218,7 @@ struct parse_fn_main operator()(parse_result_for& res, R const& range, Parser&& p, ParseAttr& attr) { // Treat "str" as `string_view` - auto const& range_ = parse_fn_main::as_parse_range(range); + auto const& range_ = detail::as_parse_range(range); using It = typename parse_result_for::iterator_type; using Se = typename parse_result_for::sentinel_type; @@ -247,7 +248,7 @@ struct parse_fn_main operator()(R const& range, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) { // Treat "str" as `string_view` - auto const& range_ = parse_fn_main::as_parse_range(range); + auto const& range_ = detail::as_parse_range(range); using It = typename parse_result_for::iterator_type; using Se = typename parse_result_for::sentinel_type; @@ -280,7 +281,7 @@ struct parse_fn_main operator()(parse_result_for& res, R const& range, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) { // Treat "str" as `string_view` - auto const& range_ = parse_fn_main::as_parse_range(range); + auto const& range_ = detail::as_parse_range(range); using It = typename parse_result_for::iterator_type; using Se = typename parse_result_for::sentinel_type; diff --git a/include/iris/x4/parse_debug.hpp b/include/iris/x4/parse_debug.hpp new file mode 100644 index 000000000..82a62fddd --- /dev/null +++ b/include/iris/x4/parse_debug.hpp @@ -0,0 +1,193 @@ +#ifndef IRIS_X4_PARSE_DEBUG_HPP +#define IRIS_X4_PARSE_DEBUG_HPP + +#include +#include +#include +#include + +#include +#include + +namespace iris::x4 { + +namespace detail { + +struct parse_debug_fn_main +{ + // -------------------------------------------- + // parse(range) + + // R + Parser + Attribute + template Parser, X4Attribute ParseAttr> + static constexpr parse_result_for + operator()(R const& range, Parser&& p, ParseAttr& attr) + { + auto const& range_ = detail::as_parse_range(range); + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + std::ranges::begin(range_), std::ranges::end(range_), + &error_out, &trace_out + }; + + auto const res = x4::parse(range_, x4::with(error_handler)[std::forward(p)], attr); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + return res; + } + + // parse_result + R + Parser + Attribute + template Parser, X4Attribute ParseAttr> + static constexpr void + operator()(parse_result_for& res, R const& range, Parser&& p, ParseAttr& attr) + { + auto const& range_ = detail::as_parse_range(range); + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + std::ranges::begin(range_), std::ranges::end(range_), + &error_out, &trace_out + }; + + x4::parse(res, range_, x4::with(error_handler)[std::forward(p)], attr); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + } + + // -------------------------------------------- + // phrase_parse(range) + + // R + Parser + Skipper + Attribute + (root_skipper_flag) + template Parser, X4RangeParseSkipper Skipper, X4Attribute ParseAttr> + static constexpr parse_result_for + operator()(R const& range, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) + { + auto const& range_ = detail::as_parse_range(range); + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + std::ranges::begin(range_), std::ranges::end(range_), + &error_out, &trace_out + }; + + auto const res = x4::parse(range_, x4::with(error_handler)[std::forward(p)], s, attr, flag); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + return res; + } + + // parse_result + R + Parser + Skipper + Attribute + template Parser, X4RangeParseSkipper Skipper, X4Attribute ParseAttr> + static constexpr void + operator()(parse_result_for& res, R const& range, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) + { + auto const& range_ = detail::as_parse_range(range); + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + std::ranges::begin(range_), std::ranges::end(range_), + &error_out, &trace_out + }; + + x4::parse(res, range_, x4::with(error_handler)[std::forward(p)], s, attr, flag); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + } + + // -------------------------------------------- + // parse(it/se) + + // It/Se + Parser + Attribute + template Se, X4Parser Parser, X4Attribute ParseAttr> + static constexpr parse_result + operator()(It first, Se last, Parser&& p, ParseAttr& attr) + { + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + first, last, + &error_out, &trace_out + }; + + auto const res = x4::parse(first, last, x4::with(error_handler)[std::forward(p)], attr); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + return res; + } + + // parse_result + It/Se + Parser + Attribute + template Se, X4Parser Parser, X4Attribute ParseAttr> + static constexpr void + operator()(parse_result& res, It first, Se last, Parser&& p, ParseAttr& attr) + { + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + first, last, + &error_out, &trace_out + }; + + x4::parse(res, first, last, x4::with(error_handler)[std::forward(p)], attr); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + } + + // -------------------------------------------- + // phrase_parse(it/se) + + // It/Se + Parser + Skipper + Attribute + (root_skipper_flag) + template Se, X4Parser Parser, X4ExplicitParser Skipper, X4Attribute ParseAttr> + static constexpr parse_result + operator()(It first, Se last, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) + { + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + first, last, + &error_out, &trace_out + }; + + auto const res = x4::parse(first, last, x4::with(error_handler)[std::forward(p)], s, attr, flag); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + return res; + } + + // parse_result + It/Se + Parser + Skipper + Attribute + (root_skipper_flag) + template Se, X4Parser Parser, X4ExplicitParser Skipper, X4Attribute ParseAttr> + static constexpr void + operator()(parse_result& res, It first, Se last, Parser&& p, Skipper const& s, ParseAttr& attr, root_skipper_flag flag = root_skipper_flag::do_post_skip) + { + std::ostringstream error_out, trace_out; + default_error_handler error_handler{ + first, last, + &error_out, &trace_out + }; + + x4::parse(res, first, last, x4::with(error_handler)[std::forward(p)], s, attr, flag); + if (!trace_out.str().empty()) std::println("{}", trace_out.str()); + if (!res && !error_out.str().empty()) { + std::println("{}", error_out.str()); + } + } +}; // parse_debug_fn + +} // detail + +inline namespace cpos { + +[[maybe_unused]] inline constexpr detail::parse_debug_fn_main parse_debug{}; + +} // cpos + +} // iris::x4 + +#endif diff --git a/include/iris/x4/rule.hpp b/include/iris/x4/rule.hpp index 406ceb4e2..344d8ca61 100644 --- a/include/iris/x4/rule.hpp +++ b/include/iris/x4/rule.hpp @@ -12,7 +12,6 @@ ==============================================================================*/ #include -#include #include #include @@ -27,6 +26,8 @@ #include +#include + #include #include #include @@ -334,7 +335,7 @@ struct rule_definition : parser @@ -397,7 +398,7 @@ struct rule : parser> static constexpr bool has_attribute = !std::is_same_v, unused_type>; static constexpr bool force_attribute = ForceAttr; - std::string_view name = "unnamed"; + std::string_view name = "unnamed_rule"; constexpr rule() = default; diff --git a/include/iris/x4/string/literal_string.hpp b/include/iris/x4/string/literal_string.hpp index 14bc1bf79..c2b430149 100644 --- a/include/iris/x4/string/literal_string.hpp +++ b/include/iris/x4/string/literal_string.hpp @@ -17,8 +17,10 @@ #include #include -#include +#include + +#include #include #include #include @@ -49,7 +51,7 @@ struct literal_string : parser> std::is_constructible_v constexpr literal_string(T&& val, Rest&&... rest) noexcept(std::is_nothrow_constructible_v) - : str(std::forward(val), std::forward(rest)...) + : str_(std::forward(val), std::forward(rest)...) {} template Se, class Context, X4Attribute Attr_> @@ -57,25 +59,22 @@ struct literal_string : parser> parse(It& first, Se const& last, Context const& ctx, Attr_& attr) const noexcept( noexcept(x4::skip_over(first, last, ctx)) && - noexcept(detail::string_parse(str, first, last, x4::assume_container(attr), x4::get_case_compare(ctx))) + noexcept(detail::string_parse(str_, first, last, x4::assume_container(attr), x4::get_case_compare(ctx))) ) { static_assert(std::same_as, char_type>, "Mixing incompatible char types is not allowed"); x4::skip_over(first, last, ctx); - return detail::string_parse(str, first, last, x4::assume_container(attr), x4::get_case_compare(ctx)); + return detail::string_parse(str_, first, last, x4::assume_container(attr), x4::get_case_compare(ctx)); } - String str; -}; - -template -struct get_info> -{ - using result_type = std::string; - [[nodiscard]] constexpr std::string operator()(literal_string const& p) const + [[nodiscard]] std::string get_x4_info() const { - return '"' + x4::to_utf8(p.str) + '"'; + // TODO: escape quotes + return std::format("\"{}\"", iris::unicode::transcode(this->str_)); } + +private: + String str_; }; } // iris::x4 diff --git a/include/iris/x4/string/utf8.hpp b/include/iris/x4/string/utf8.hpp deleted file mode 100644 index 2dc5d3bb0..000000000 --- a/include/iris/x4/string/utf8.hpp +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef IRIS_ZZ_X4_STRING_UTF8_HPP -#define IRIS_ZZ_X4_STRING_UTF8_HPP - -/*============================================================================= - Copyright (c) 2001-2014 Joel de Guzman - Copyright (c) 2023 Nikita Kniazev - Copyright (c) 2025 Nana Sakisaka - Copyright (c) 2026 The Iris Project Contributors - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -==============================================================================*/ - -#include -#include -#include - -namespace iris::x4 { - -namespace detail { - -// TODO: https://github.com/microsoft/STL/issues/2207 -using impl_def_u8string = std::string; - -constexpr void utf8_put_encode(impl_def_u8string& out, char32_t x) noexcept -{ - // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90 - if (x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)) [[unlikely]] { - x = 0xFFFDul; - } - - // Table 3-6. UTF-8 Bit Distribution - if (x < 0x80ul) { - out.push_back(static_cast(x)); - } else if (x < 0x800ul) { - out.push_back(static_cast(0xC0ul + (x >> 6))); - out.push_back(static_cast(0x80ul + (x & 0x3Ful))); - } else if (x < 0x10000ul) { - out.push_back(static_cast(0xE0ul + (x >> 12))); - out.push_back(static_cast(0x80ul + ((x >> 6) & 0x3Ful))); - out.push_back(static_cast(0x80ul + (x & 0x3Ful))); - } else { - out.push_back(static_cast(0xF0ul + (x >> 18))); - out.push_back(static_cast(0x80ul + ((x >> 12) & 0x3Ful))); - out.push_back(static_cast(0x80ul + ((x >> 6) & 0x3Ful))); - out.push_back(static_cast(0x80ul + (x & 0x3Ful))); - } -} - -} // detail - -template -[[nodiscard]] constexpr detail::impl_def_u8string to_utf8(Char value) -{ - detail::impl_def_u8string result; - using UChar = std::make_unsigned_t; - detail::utf8_put_encode(result, static_cast(value)); - return result; -} - -template -[[nodiscard]] constexpr detail::impl_def_u8string to_utf8(Char const* str) -{ - detail::impl_def_u8string result; - using UChar = std::make_unsigned_t; - while (*str) { - detail::utf8_put_encode(result, static_cast(*str++)); - } - return result; -} - -template -[[nodiscard]] constexpr detail::impl_def_u8string -to_utf8(std::basic_string_view const str) -{ - detail::impl_def_u8string result; - using UChar = std::make_unsigned_t; - for (Char ch : str) { - detail::utf8_put_encode(result, static_cast(ch)); - } - return result; -} - -template -[[nodiscard]] constexpr detail::impl_def_u8string -to_utf8(std::basic_string const& str) -{ - detail::impl_def_u8string result; - using UChar = std::make_unsigned_t; - for (Char ch : str) { - detail::utf8_put_encode(result, static_cast(ch)); - } - return result; -} - -// Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar -#if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2 -[[nodiscard]] constexpr detail::impl_def_u8string to_utf8(wchar_t value) -{ - detail::impl_def_u8string result; - detail::utf8_put_encode(result, static_cast>(value)); - return result; -} - -namespace detail { - -template - requires std::is_same_v>, wchar_t> -[[nodiscard]] constexpr char32_t decode_utf16(It& s) noexcept -{ - using uwchar_t = std::make_unsigned_t; - - uwchar_t x(*s); - if (x < 0xD800ul || x > 0xDFFFul) { - return x; - } - - // expected high-surrogate - if ((x >> 10) != 0b110110ul) [[unlikely]] { - return 0xFFFDul; - } - - uwchar_t y(*++s); - // expected low-surrogate - if ((y >> 10) != 0b110111ul) [[unlikely]] { - return 0xFFFDul; - } - - return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul; -} - -} // detail - -[[nodiscard]] constexpr detail::impl_def_u8string -to_utf8(std::basic_string_view const str) -{ - detail::impl_def_u8string result; - for (auto it = str.begin(); it != str.end(); ++it) { - detail::utf8_put_encode(result, detail::decode_utf16(it)); - } - return result; -} - -[[nodiscard]] constexpr detail::impl_def_u8string to_utf8(wchar_t const* str) -{ - return x4::to_utf8(std::basic_string_view(str)); -} - -[[nodiscard]] constexpr detail::impl_def_u8string -to_utf8(std::basic_string const& str) -{ - detail::impl_def_u8string result; - for (auto it = str.begin(); it != str.end(); ++it) { - detail::utf8_put_encode(result, detail::decode_utf16(it)); - } - return result; -} -#endif - -} // iris::x4 - -#endif diff --git a/modules/iris b/modules/iris index 345e891a9..35ef3e243 160000 --- a/modules/iris +++ b/modules/iris @@ -1 +1 @@ -Subproject commit 345e891a934469285dfa70036c6e09ec11edc313 +Subproject commit 35ef3e24365bc56f6bbb98021fd324b14ee2d380 diff --git a/test/x4/CMakeLists.txt b/test/x4/CMakeLists.txt index ab7fdac55..0ea63393f 100644 --- a/test/x4/CMakeLists.txt +++ b/test/x4/CMakeLists.txt @@ -56,6 +56,7 @@ x4_define_tests( eol eps error_handler + default_error_handler expect extract_int int @@ -90,7 +91,6 @@ x4_define_tests( symbols1 symbols2 symbols3 - to_utf8 tst uint uint_radix diff --git a/test/x4/alloy_wrong_substitute_test_case.cpp b/test/x4/alloy_wrong_substitute_test_case.cpp index 2e6bfebf0..d92e9dad3 100644 --- a/test/x4/alloy_wrong_substitute_test_case.cpp +++ b/test/x4/alloy_wrong_substitute_test_case.cpp @@ -59,6 +59,8 @@ using AorB = iris::rvariant< } // ast +namespace alloy_wrong_substitute_test_case { + using ARule = x4::rule; using BRule = x4::rule; using AorBRule = x4::rule; @@ -89,3 +91,5 @@ TEST_CASE("alloy_wrong_substitute_test_case") ast::AorB result; (void)a_or_b.parse(ptr, nullptr, x4::unused, result); } + +} // alloy_wrong_substitute_test_case diff --git a/test/x4/default_error_handler.cpp b/test/x4/default_error_handler.cpp new file mode 100644 index 000000000..35520c2dc --- /dev/null +++ b/test/x4/default_error_handler.cpp @@ -0,0 +1,158 @@ +/*============================================================================= + Copyright (c) 2026 The Iris Project Contributors + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +=============================================================================*/ + +#include "iris_x4_test.hpp" + +#include + +#include +#include + +#define NOMINMAX +#ifdef _WIN32 +#include +#endif + +#include +#include +#include +#include +#include +#include + +namespace x4_test { + +// ' ' is U+1680 'OGHAM SPACE MARK' +// https://www.fileformat.info/info/unicode/char/1680/ +constexpr std::string_view unicode_model_string = "aこれは𩸽だ サロゲート\tペア 入りの魚b"; + +TEST_CASE("print_chars") +{ +#ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); +#endif + + // full string + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), unicode_model_string.size()); + CHECK(oss.str() == R"(aこれは𩸽だ サロゲート\tペア\u{1680}入りの魚b)"); + } + + // partial string + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 0); + CHECK(oss.str() == R"()"); // NOLINT(readability-container-size-empty) + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 1); + CHECK(oss.str() == R"(a)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 2); + CHECK(oss.str() == R"(aこ)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 4); + CHECK(oss.str() == R"(aこれは)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 5); + CHECK(oss.str() == R"(aこれは𩸽)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 6); + CHECK(oss.str() == R"(aこれは𩸽だ)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 15); + CHECK(oss.str() == R"(aこれは𩸽だ サロゲート\tペア)"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 16); + CHECK(oss.str() == R"(aこれは𩸽だ サロゲート\tペア\u{1680})"); + } + { + std::ostringstream oss; + x4::print_chars(oss, unicode_model_string.begin(), unicode_model_string.end(), 17); + CHECK(oss.str() == R"(aこれは𩸽だ サロゲート\tペア\u{1680}入)"); + } +} + +TEST_CASE("print_line_highlight") +{ +#ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); +#endif + + using It = std::string_view::const_iterator; + auto first = unicode_model_string.begin(); + auto const last = unicode_model_string.end(); + + { + std::ostringstream error_out, trace_out; + x4::default_error_handler error_handler{first, last, &error_out, &trace_out}; + using error_handler_type = decltype(error_handler); + std::u32string const u32input = iris::unicode::transcode(unicode_model_string); + + for (std::size_t i = 0; i < u32input.size() + 1; ++i) { + iris::unicode::code_point_iterator it{unicode_model_string.begin(), unicode_model_string.begin(), unicode_model_string.end()}; + std::advance(it, i); + error_handler.print_line_highlight({unicode_model_string.begin(), unicode_model_string.end()}, it.base()); + + std::string left_str, right_str; + if (i > 0) { + auto const left_non_colored = u32input.substr( + 0, + std::max(0, int(i) - error_handler_type::highlight_chars) + ); + auto const left_colored = u32input.substr( + std::max(0, int(i) - error_handler_type::highlight_chars), + std::min(int(i), error_handler_type::highlight_chars) + ); + left_str = iris::colorize_format( + error_handler_type::colorize_cfg, + "{}[$expect_left]{}[/$expect_left]", + iris::unicode::transcode(left_non_colored), + iris::unicode::transcode(left_colored) + ); + } + if (i < u32input.size()) { + auto const right_colored = u32input.substr( + i, + std::min(u32input.size() - i, std::size_t(error_handler_type::highlight_chars)) + ); + auto const right_non_colored = u32input.substr( + std::min(u32input.size(), i + error_handler_type::highlight_chars), + u32input.size() - std::min(u32input.size() - i, std::size_t(error_handler_type::highlight_chars)) + ); + right_str = iris::colorize_format( + error_handler_type::colorize_cfg, + "[$expect_right]{}[/$expect_right]{}", + iris::unicode::transcode(right_colored), + iris::unicode::transcode(right_non_colored) + ); + } + + //std::print("{}", error_out.str()); + CHECK(left_str + right_str + "\n" == error_out.str()); + + error_out.str({}); + error_out.clear(); + } + } +} + +} // x4_test diff --git a/test/x4/error_handler.cpp b/test/x4/error_handler.cpp index 489f415d4..1403c14df 100644 --- a/test/x4/error_handler.cpp +++ b/test/x4/error_handler.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/test/x4/expect.cpp b/test/x4/expect.cpp index 4d6d04d72..5da40afd1 100644 --- a/test/x4/expect.cpp +++ b/test/x4/expect.cpp @@ -335,13 +335,11 @@ TEST_CASE("expect") #ifndef IRIS_X4_NO_RTTI X4_TEST_FAILURE("ay:a", char_ > char_('x') >> ':' > 'a', { - CHECK(x.which().find("sequence") != std::string::npos); CHECK(where == "y:a"sv); }); #else X4_TEST_FAILURE("ay:a", char_ > char_('x') >> ':' > 'a', { - CHECK(which == "undefined"sv); CHECK(where == "y:a"sv); }); #endif @@ -414,23 +412,6 @@ TEST_CASE("expect") }); } - // - // ********* Developers note ********** - // - // As of now (see `git blame`), get_info is still not - // specialized for many of the X4 parsers so that the - // value of `expectation_failure<...>::which()` will be - // implementation-defined demangled string. - // Therefore, it's essentially impossible to test them - // right now; further work must be done. - // - // Some specific situations are already been reported - // (e.g. https://github.com/boostorg/spirit/issues/777) - // but we really need to implement all specializations for - // X4's predefined parsers, not just the one reported above. - // - - // sanity check: test expectation_failure propagation // on custom skippers { diff --git a/test/x4/iris_x4_test.hpp b/test/x4/iris_x4_test.hpp index 5b7a38981..aa98e50db 100644 --- a/test/x4/iris_x4_test.hpp +++ b/test/x4/iris_x4_test.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -113,11 +114,90 @@ struct parse_overloads : x4::detail::parse_fn_main } }; // parse_overload +// Provide `x4::unused` default arg fallback +struct parse_debug_overloads : x4::detail::parse_debug_fn_main +{ + using x4::detail::parse_debug_fn_main::operator(); + + // It/Se + Parser + template Se, x4::X4Parser Parser> + static constexpr x4::parse_result + operator()(It first, Se last, Parser&& p) + { + return x4::parse_debug(first, last, std::forward(p), x4::unused); + } + + // parse_result + It/Se + Parser + template Se, x4::X4Parser Parser> + static constexpr void + operator()(x4::parse_result& res, It first, Se last, Parser&& p) + { + return x4::parse_debug(res, first, last, std::forward(p), x4::unused); + } + + // R + Parser + template Parser> + static constexpr x4::parse_result_for + operator()(R const& r, Parser&& p) + { + return x4::parse_debug(r, std::forward(p), x4::unused); + } + + // parse_result + R + Parser + template Parser> + static constexpr void + operator()(x4::parse_result_for& res, R const& r, Parser&& p) + { + return x4::parse_debug(res, r, std::forward(p), x4::unused); + } + + // It/Se + Parser + Skipper + template Se, x4::X4Parser Parser, x4::X4ExplicitParser Skipper> + static constexpr x4::parse_result + operator()(It first, Se last, Parser&& p, Skipper&& s, x4::root_skipper_flag flag = x4::root_skipper_flag::do_post_skip) + { + return x4::parse_debug(first, last, std::forward(p), std::forward(s), x4::unused, flag); + } + + // parse_result + It/Se + Parser + Skipper + template Se, x4::X4Parser Parser, x4::X4ExplicitParser Skipper> + static constexpr void + operator()(x4::parse_result& res, It first, Se last, Parser&& p, Skipper&& s, x4::root_skipper_flag flag = x4::root_skipper_flag::do_post_skip) + { + return x4::parse_debug(res, first, last, std::forward(p), std::forward(s), x4::unused, flag); + } + + // R + Parser + Skipper + template< + std::ranges::forward_range R, + x4::detail::X4RangeParseParser Parser, + x4::detail::X4RangeParseSkipper Skipper + > + static constexpr x4::parse_result_for + operator()(R const& r, Parser&& p, Skipper&& s, x4::root_skipper_flag flag = x4::root_skipper_flag::do_post_skip) + { + return x4::parse_debug(r, std::forward(p), std::forward(s), x4::unused, flag); + } + + // parse_result + R + Parser + Skipper + template< + std::ranges::forward_range R, + x4::detail::X4RangeParseParser Parser, + x4::detail::X4RangeParseSkipper Skipper + > + static constexpr void + operator()(x4::parse_result_for& res, R const& r, Parser&& p, Skipper&& s, x4::root_skipper_flag flag = x4::root_skipper_flag::do_post_skip) + { + return x4::parse_debug(res, r, std::forward(p), std::forward(s), x4::unused, flag); + } +}; // parse_overload + } // detail inline namespace cpos { [[maybe_unused]] inline constexpr detail::parse_overloads parse{}; +[[maybe_unused]] inline constexpr detail::parse_debug_overloads parse_debug{}; } // cpos @@ -190,6 +270,7 @@ struct custom_container } // x4_test using x4_test::parse; +using x4_test::parse_debug; #define IRIS_X4_ASSERT_CONSTEXPR_CTORS(...) \ static_assert(::x4_test::test_constexpr_copy_move_ctors(__VA_ARGS__)) diff --git a/test/x4/pch.hpp b/test/x4/pch.hpp index 59ac0c37d..15e146ccd 100644 --- a/test/x4/pch.hpp +++ b/test/x4/pch.hpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include #include diff --git a/test/x4/to_utf8.cpp b/test/x4/to_utf8.cpp deleted file mode 100644 index a5f3263c0..000000000 --- a/test/x4/to_utf8.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/*============================================================================= - Copyright (c) 2018-2023 Nikita Kniazev - Copyright (c) 2025 Nana Sakisaka - Copyright (c) 2026 The Iris Project Contributors - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -=============================================================================*/ - -#include "iris_x4_test.hpp" - -#include - -#include - -TEST_CASE("to_utf8") -{ - using x4::to_utf8; - using namespace std::string_view_literals; - - CHECK(to_utf8(0xD7FFul) == "\xED\x9F\xBF"sv); - CHECK(to_utf8(0xE000ul) == "\xEE\x80\x80"sv); - - if constexpr (sizeof(L"\u00FF") == 2) { - CHECK(to_utf8(L"\u00FF"[0]) == "\xC3\xBF"sv); - } - - CHECK(to_utf8(U'\u00FF') == "\xC3\xBF"sv); - - if constexpr (sizeof(L"\uFFE1") == 2) { - CHECK(to_utf8(L"\uFFE1"[0]) == "\xEF\xBF\xA1"sv); - } - - CHECK(to_utf8(U'\uFFE1') == "\xEF\xBF\xA1"sv); - - if constexpr(sizeof(L"\U0001F9D0") == 2) { - CHECK(to_utf8(L"\U0001F9D0"[0]) == "\xF0\x9F\xA7\x90"sv); - } - - CHECK(to_utf8(U'\U0001F9D0') == "\xF0\x9F\xA7\x90"sv); - CHECK(to_utf8(L"\U0001F9D0\U0001F9E0") == "\xF0\x9F\xA7\x90\xF0\x9F\xA7\xA0"sv); - CHECK(to_utf8(U"\U0001F9D0\U0001F9E0") == "\xF0\x9F\xA7\x90\xF0\x9F\xA7\xA0"sv); - CHECK(to_utf8(L"\U0001F9D0\U0001F9E0"sv) == "\xF0\x9F\xA7\x90\xF0\x9F\xA7\xA0"sv); - CHECK(to_utf8(U"\U0001F9D0\U0001F9E0"sv) == "\xF0\x9F\xA7\x90\xF0\x9F\xA7\xA0"sv); -}