diff --git a/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.cpp b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.cpp new file mode 100644 index 0000000000..925130d35a --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.cpp @@ -0,0 +1,259 @@ +/* FRLG Digit Reader + * + * From: https://github.com/PokemonAutomation/ + * + */ + +#include "PokemonFRLG_DigitReader.h" +#include "Common/Cpp/Color.h" // needed for COLOR_RED, COLOR_ORANGE +#include "Common/Cpp/Exceptions.h" +#include "Common/Cpp/Logging/AbstractLogger.h" +#include "CommonFramework/Globals.h" +#include "CommonFramework/ImageTools/ImageBoxes.h" +#include "CommonFramework/ImageTypes/ImageRGB32.h" +#include "CommonFramework/ImageTypes/ImageViewRGB32.h" +#include "CommonTools/ImageMatch/ExactImageMatcher.h" +#include "CommonTools/Images/BinaryImage_FilterRgb32.h" +#include "Kernels/Waterfill/Kernels_Waterfill_Session.h" +#include +#include +#include +#include +#include + +#include + +#include +using std::cout; +using std::endl; + +namespace PokemonAutomation { +namespace NintendoSwitch { +namespace PokemonFRLG { + +// --------------------------------------------------------------------------- +// Template store: loads 10 digit matchers from a resource sub-directory. +// Results are cached in a static map keyed by template type. +// Supports both: +// - StatBox (yellow stat boxes): PokemonFRLG/Digits/ +// - LevelBox (lilac level box): PokemonFRLG/LevelDigits/ +// --------------------------------------------------------------------------- + +static std::string get_template_path(DigitTemplateType type) { + switch (type) { + case DigitTemplateType::StatBox: + return "PokemonFRLG/Digits/"; + case DigitTemplateType::LevelBox: + return "PokemonFRLG/LevelDigits/"; + default: + return "PokemonFRLG/Digits/"; + } +} + +struct DigitTemplates { + // matchers[d] is the matcher for digit d (0-9), or nullptr if missing. + std::array, 10> matchers; + bool any_loaded = false; + + explicit DigitTemplates(DigitTemplateType template_type) { + std::string resource_subdir = get_template_path(template_type); + for (int d = 0; d < 10; ++d) { + std::string path = + RESOURCE_PATH() + resource_subdir + std::to_string(d) + ".png"; + try { + ImageRGB32 img(path); + if (img.width() > 0) { + matchers[d] = + std::make_unique(std::move(img)); + any_loaded = true; + } + } catch (...) { + // Template image missing - slot stays nullptr. + } + } + if (!any_loaded) { + throw FileException(nullptr, PA_CURRENT_FUNCTION, + "Failed to load any digit templates", resource_subdir); + } + } + + static const DigitTemplates& get(DigitTemplateType template_type) { + static std::map cache; + auto it = cache.find(template_type); + if (it == cache.end()) { + it = cache.emplace(template_type, DigitTemplates(template_type)).first; + } + return it->second; + } +}; + +// --------------------------------------------------------------------------- +// Main function +// --------------------------------------------------------------------------- +int read_digits_waterfill_template( + Logger& logger, + const ImageViewRGB32& stat_region, + double rmsd_threshold, + DigitTemplateType template_type, + const std::string& dump_prefix, + uint8_t binarize_high +) { + using namespace Kernels::Waterfill; + + if (!stat_region) { + logger.log("DigitReader: empty stat region.", COLOR_RED); + return -1; + } + + // ------------------------------------------------------------------ + // Step 1: Gaussian blur on the NATIVE resolution image. + // The GBA pixel font has 1-pixel gaps between segments. + // A 5x5 kernel applied twice bridges those gaps so that waterfill + // sees each digit as a single connected component. + // ------------------------------------------------------------------ + cv::Mat src = stat_region.to_opencv_Mat(); + cv::Mat blurred; + src.copyTo(blurred); + cv::GaussianBlur(blurred, blurred, cv::Size(5, 5), 1.5); + cv::GaussianBlur(blurred, blurred, cv::Size(5, 5), 1.5); + + ImageRGB32 blurred_img(blurred.cols, blurred.rows); + blurred.copyTo(blurred_img.to_opencv_Mat()); + + // ------------------------------------------------------------------ + // Step 2: Binarise the blurred image. + // Pixels where ALL channels <= binarize_high become 1 (foreground). + // Default 0xBE (190) works for yellow stat boxes. + // Use 0x7F (127) for the lilac level box to prevent the blurred + // lilac background (B~208, drops to ~156 near shadows) from being + // captured and merging digit blobs. + // ------------------------------------------------------------------ + uint32_t bh = binarize_high; + uint32_t binarize_color = 0xff000000u | (bh << 16) | (bh << 8) | bh; + PackedBinaryMatrix matrix = + compress_rgb32_to_binary_range(blurred_img, 0xff000000u, binarize_color); + + // ------------------------------------------------------------------ + // Step 3: Waterfill - find connected dark blobs (individual digits). + // Minimum area of 4 pixels to discard lone noise specks. + // Sort blobs left-to-right by their left edge (min_x). + // ------------------------------------------------------------------ + const size_t min_area = 4; + std::map blobs; // key = min_x, auto-sorted L->R + { + std::unique_ptr session = make_WaterfillSession(matrix); + auto iter = session->make_iterator(min_area); + WaterfillObject obj; + while (blobs.size() < 8 && iter->find_next(obj, false)) { + // Require at least 3px wide AND 3px tall to discard noise fragments. + if (obj.max_x - obj.min_x < 3 || obj.max_y - obj.min_y < 3) + continue; + // Use min_x as key so the map is automatically sorted left-to-right. + // If two blobs share an identical min_x, bump the key slightly. + size_t key = obj.min_x; + while (blobs.count(key)) + ++key; + blobs.emplace(key, std::move(obj)); + } + } + + if (blobs.empty()) { + logger.log("DigitReader: waterfill found no digit blobs.", COLOR_RED); + return -1; + } + + // ------------------------------------------------------------------ + // Step 4: For each blob, crop the UNBLURRED original stat_region to + // the blob's bounding box, then template-match against all 10 digit + // templates using ExactImageMatcher::rmsd(). Pick the lowest RMSD. + // ------------------------------------------------------------------ + const DigitTemplates& templates = DigitTemplates::get(template_type); + std::string result_str; + + for (const auto &kv : blobs) { + const WaterfillObject &obj = kv.second; + + size_t width = obj.max_x - obj.min_x; + size_t height = obj.max_y - obj.min_y; + + int expected_digits = 1; + // GBA font digits are typically narrower than they are tall (aspect ~0.6). + // If the blob's width is wider than expected for a single digit, it's a + // merged blob. + if (width > height * 1.5) { + expected_digits = 3; // e.g. "100" + } else if (width > height * 0.8) { + expected_digits = 2; // e.g. "23" + } + + size_t split_w = width / expected_digits; + + for (int i = 0; i < expected_digits; ++i) { + size_t min_x = obj.min_x + i * split_w; + size_t max_x = (i == expected_digits - 1) ? obj.max_x : obj.min_x + (i + 1) * split_w; + + // Crop original (unblurred) region to the split bounding box. + ImagePixelBox bbox(min_x, obj.min_y, max_x, obj.max_y); + ImageViewRGB32 crop = extract_box_reference(stat_region, bbox); + + if (dump_prefix == "levelDigit") { + crop.save("DebugDumps/" + dump_prefix + "_x" + std::to_string(min_x) + "_split_raw.png"); + } + + // Compute RMSD against each digit template; pick the minimum. + // If no templates are loaded (extraction mode), skip matching entirely. + double best_rmsd = 9999.0; + int best_digit = -1; + if (templates.any_loaded) { + for (int d = 0; d < 10; ++d) { + if (!templates.matchers[d]) + continue; + double r = templates.matchers[d]->rmsd(crop); + if (r < best_rmsd) { + best_rmsd = r; + best_digit = d; + } + } + } + + if (best_rmsd > rmsd_threshold) { + // Always save the raw crop for user inspection / template extraction. + crop.save("DebugDumps/" + dump_prefix + "_x" + std::to_string(min_x) + + "_raw.png"); + logger.log( + "DigitReader: blob at x=" + std::to_string(min_x) + + " skipped (best RMSD=" + std::to_string(best_rmsd) + + ", threshold=" + std::to_string(rmsd_threshold) + ").", + COLOR_ORANGE + ); + continue; + } + + logger.log( + "DigitReader: blob at x=" + std::to_string(min_x) + + " -> digit " + std::to_string(best_digit) + + " (RMSD=" + std::to_string(best_rmsd) + ")" + ); + // Save crop with prefix so level and stat crops are distinguishable. + crop.save("DebugDumps/" + dump_prefix + "_x" + std::to_string(min_x) + + "_match" + std::to_string(best_digit) + ".png"); + result_str += static_cast('0' + best_digit); + } + } + + if (result_str.empty()) { + return -1; + } + + int number = std::atoi(result_str.c_str()); + logger.log( + "DigitReader: \"" + result_str + "\" -> " + + std::to_string(number) + ); + return number; +} + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation + diff --git a/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.h b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.h new file mode 100644 index 0000000000..877a0352ec --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.h @@ -0,0 +1,68 @@ +/* FRLG Digit Reader + * + * From: https://github.com/PokemonAutomation/ + * + * Reads a string of decimal digits from a stat region using waterfill + * segmentation on a blurred image to locate individual digit bounding boxes, + * then template-matches each cropped digit against the pre-stored digit + * templates (Resources/PokemonFRLG/Digits/0-9.png) on the unblurred original. + * + * This is the Tesseract/PaddleOCR-free fallback path for USE_PADDLE_OCR=false. + */ + +#ifndef PokemonAutomation_PokemonFRLG_DigitReader_H +#include +#include + +namespace PokemonAutomation { +class Logger; +class ImageViewRGB32; + +namespace NintendoSwitch { +namespace PokemonFRLG { + +enum class DigitTemplateType { + StatBox, // Yellow stat boxes (default): PokemonFRLG/Digits/ + LevelBox, // Lilac level box: PokemonFRLG/LevelDigits/ +}; + +// Read a string of decimal digits from `stat_region`. +// +// template_type Which template set to use (StatBox or LevelBox). +// dump_prefix Prefix used when saving debug crop PNGs to DebugDumps/. +// +// Returns the parsed integer, or -1 on failure. +int read_digits_waterfill_template( + Logger& logger, + const ImageViewRGB32& stat_region, + double rmsd_threshold = 175.0, + DigitTemplateType template_type = DigitTemplateType::StatBox, + const std::string& dump_prefix = "digit", + uint8_t binarize_high = 0xBE // 0xBE=190 for yellow stat boxes; + // use 0x7F=127 for lilac level box +); + +// Read a string of decimal digits from `stat_region` by splitting the region into +// a fixed number of equal-width segments, instead of using waterfill. +// Useful when digits are tightly packed. +// +// num_splits The number of equal-width segments to split the region into. +// template_type Which template set to use (StatBox or LevelBox). +// dump_prefix Prefix used when saving debug crop PNGs to DebugDumps/. +// +// Returns the parsed integer, or -1 on failure. +int read_digits_fixed_width_template( + Logger& logger, + const ImageViewRGB32& stat_region, + int num_splits = 2, + double rmsd_threshold = 175.0, + DigitTemplateType template_type = DigitTemplateType::LevelBox, + const std::string& dump_prefix = "digit_split" +); + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation + +#endif + diff --git a/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.cpp b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.cpp new file mode 100644 index 0000000000..5a50a19055 --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.cpp @@ -0,0 +1,436 @@ +/* Stats Reader + * + * From: https://github.com/PokemonAutomation/ + * + */ + +#include "PokemonFRLG_StatsReader.h" +#include "Common/Cpp/Color.h" +#include "Common/Cpp/Exceptions.h" +#include "CommonFramework/GlobalSettingsPanel.h" +#include "CommonFramework/ImageTypes/ImageViewRGB32.h" +#include "CommonFramework/Tools/GlobalThreadPools.h" +#include "CommonFramework/VideoPipeline/VideoOverlayScopes.h" +#include "CommonTools/Images/ImageFilter.h" +#include "CommonTools/Images/ImageManip.h" +#include "CommonTools/OCR/OCR_NumberReader.h" +#include "CommonTools/OCR/OCR_Routines.h" +#include "Pokemon/Inference/Pokemon_NameReader.h" +#include "Pokemon/Inference/Pokemon_NatureReader.h" +#include "PokemonFRLG/PokemonFRLG_Settings.h" +#include "PokemonFRLG_DigitReader.h" +#include + +namespace PokemonAutomation { +namespace NintendoSwitch { +namespace PokemonFRLG { + +// Debug counter for unique filenames +static int debug_counter = 0; + +// Full OCR preprocessing pipeline for GBA pixel fonts. +// +// GBA fonts are seven-segment-like with 1-pixel gaps between segments. +// Pipeline: blur at native -> smooth upscale -> BW -> smooth BW -> re-BW -> pad +// +// The native blur connects gaps. Post-BW padding provides margins. +static ImageRGB32 preprocess_for_ocr( + const ImageViewRGB32 &image, + const std::string &label, + int blur_kernel_size, int blur_passes, + bool in_range_black, uint32_t bw_min, + uint32_t bw_max +) { + const bool save_debug_images = GlobalSettings::instance().SAVE_DEBUG_IMAGES; + int id = debug_counter++; + std::string prefix = "DebugDumps/ocr_" + label + "_" + std::to_string(id); + + // Save raw input + if (save_debug_images) { + image.save(prefix + "_0_raw.png"); + } + + cv::Mat src = image.to_opencv_Mat(); + + // Step 1: Gaussian blur at NATIVE resolution with 5x5 kernel. + // The 5x5 kernel reaches 2 pixels away (vs 1px for 3x3), bridging + // wider gaps in the seven-segment font. Two passes for heavy smoothing. + cv::Mat blurred_native; + src.copyTo(blurred_native); + if (blur_kernel_size > 0 && blur_passes > 0) { + for (int i = 0; i < blur_passes; i++) { + cv::GaussianBlur( + blurred_native, blurred_native, + cv::Size(blur_kernel_size, blur_kernel_size), 1.5 + ); + } + } + + // Save blurred at native res + ImageRGB32 blurred_native_img(blurred_native.cols, blurred_native.rows); + blurred_native.copyTo(blurred_native_img.to_opencv_Mat()); + if (save_debug_images) { + blurred_native_img.save(prefix + "_1_blurred_native.png"); + } + + // Step 2: Smooth upscale 4x with bilinear interpolation. + int scale_factor = 4; + int new_w = static_cast(image.width()) * scale_factor; + int new_h = static_cast(image.height()) * scale_factor; + cv::Mat resized; + cv::resize( + blurred_native, resized, cv::Size(new_w, new_h), 0, 0, + cv::INTER_LINEAR + ); + + // Save upscaled + ImageRGB32 resized_img(resized.cols, resized.rows); + resized.copyTo(resized_img.to_opencv_Mat()); + if (save_debug_images) { + resized_img.save(prefix + "_2_upscaled.png"); + } + + // Step 3: BW threshold on the smooth upscaled image. + ImageRGB32 bw = + to_blackwhite_rgb32_range(resized_img, in_range_black, bw_min, bw_max); + if (save_debug_images) { + bw.save(prefix + "_3_bw.png"); + } + + // Step 4: Post-BW smoothing -> re-threshold. + // The BW image has angular seven-segment shapes. GaussianBlur on the + // binary image creates gray anti-aliased edges. Re-thresholding at 128 + // rounds the corners into natural smooth digit shapes that Tesseract + // recognizes much better. This is equivalent to morphological closing. + cv::Mat bw_mat = bw.to_opencv_Mat(); + cv::Mat smoothed; + cv::GaussianBlur(bw_mat, smoothed, cv::Size(7, 7), 2.0); + + // Re-threshold: convert smoothed back to ImageRGB32 and BW threshold. + // After blur on BW: text areas are dark gray (~0-64), bg areas are + // light gray (~192-255), edge zones are mid-gray (~64-192). + // Threshold at [0..128] captures text + expanded edges -> BLACK. + ImageRGB32 smoothed_img(smoothed.cols, smoothed.rows); + smoothed.copyTo(smoothed_img.to_opencv_Mat()); + ImageRGB32 smooth_bw = to_blackwhite_rgb32_range( + smoothed_img, true, combine_rgb(0, 0, 0), combine_rgb(128, 128, 128)); + if (save_debug_images) { + smooth_bw.save(prefix + "_4_smooth_bw.png"); + } + + // Step 5: Pad with white border (Tesseract needs margins). + ImageRGB32 padded = pad_image(smooth_bw, smooth_bw.height() / 2, 0xffffffff); + if (save_debug_images) { + padded.save(prefix + "_5_padded.png"); + } + + return padded; +} + +StatsReader::StatsReader(Color color) + : m_color(color), m_box_nature(0.028976, 0.729610, 0.502487, 0.065251), + m_box_level(0.052000, 0.120140, 0.099000, 0.069416), + m_box_name(0.163158, 0.122917, 0.262811, 0.066639), + m_box_hp(0.815558, 0.131247, 0.173049, 0.065251), + m_box_attack(0.891000, 0.245089, 0.097607, 0.063862), + m_box_defense(0.891000, 0.325612, 0.097607, 0.066639), + m_box_sp_attack(0.891000, 0.406134, 0.097607, 0.066639), + m_box_sp_defense(0.891000, 0.486657, 0.097607, 0.063862), + m_box_speed(0.891000, 0.567180, 0.097607, 0.066639) {} + +void StatsReader::make_overlays(VideoOverlaySet &items) const { + const BoxOption &GAME_BOX = GameSettings::instance().GAME_BOX; + items.add(m_color, GAME_BOX.inner_to_outer(m_box_nature)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_level)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_name)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_hp)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_attack)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_defense)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_sp_attack)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_sp_defense)); + items.add(m_color, GAME_BOX.inner_to_outer(m_box_speed)); +} + +void StatsReader::read_page1( + Logger &logger, Language language, + const ImageViewRGB32 &frame, + PokemonFRLG_Stats &stats +) { + const bool save_debug_images = GlobalSettings::instance().SAVE_DEBUG_IMAGES; + ImageViewRGB32 game_screen = + extract_box_reference(frame, GameSettings::instance().GAME_BOX); + + // Read Name (white text on lilac background). + // Use multifiltered OCR across multiple narrow white bands. This tolerates + // brightness shifts (down to ~0xc0) while still preferring cleaner bands. + const std::vector name_text_color_ranges{ + {combine_rgb(224, 224, 224), combine_rgb(255, 255, 255)}, + {combine_rgb(208, 208, 208), combine_rgb(255, 255, 255)}, + {combine_rgb(192, 192, 192), combine_rgb(255, 255, 255)}, + }; + auto name_result = Pokemon::PokemonNameReader::instance().read_substring( + logger, language, extract_box_reference(game_screen, m_box_name), + name_text_color_ranges); + if (!name_result.results.empty()) { + stats.name = name_result.results.begin()->second.token; + } + + ImageViewRGB32 level_box = extract_box_reference(game_screen, m_box_level); + + ImageRGB32 level_upscaled = + level_box.scale_to(level_box.width() * 4, level_box.height() * 4); + if (save_debug_images) { + level_upscaled.save("DebugDumps/ocr_level_upscaled.png"); + } + + // The level has a colored (lilac) background. The text is white, with a + // gray/black shadow. To bridge the gaps and make a solid black character on a + // white background: We want to turn BOTH the bright white text AND the dark + // shadow into BLACK pixels, and turn the mid-tone lilac background into + // WHITE. We can do this by keeping pixels that are very bright (text) or very + // dark (shadow). + + ImageRGB32 level_ready(level_upscaled.width(), level_upscaled.height()); + for (size_t r = 0; r < level_upscaled.height(); r++) { + for (size_t c = 0; c < level_upscaled.width(); c++) { + Color pixel(level_upscaled.pixel(c, r)); + // If it's very bright (white text) OR very dark (shadow), it becomes + // black text. Otherwise (lilac background), it becomes white background. + if ((pixel.red() > 200 && pixel.green() > 200 && pixel.blue() > 200) || + (pixel.red() < 100 && pixel.green() < 100 && pixel.blue() < 100)) { + level_ready.pixel(c, r) = (uint32_t)0xff000000; // Black + } else { + level_ready.pixel(c, r) = (uint32_t)0xffffffff; // White + } + } + } + + if (save_debug_images) { + level_ready.save("DebugDumps/ocr_level_ready.png"); + } + + if (!GlobalSettings::instance().USE_PADDLE_OCR) { + // The level uses white text with dark shadow on a lilac background. + // The digit reader's binarizer captures dark pixels (<=190 on all channels) + // but NOT the white text (all channels 255 -> excluded). This leaves the + // shadow outline fragmented into many small disconnected blobs. + // Preprocess: convert bright-white text pixels to black so the binarizer + // merges text + shadow into one solid connected blob per digit. + ImageRGB32 preprocessed = filter_rgb32_range( + level_box, 0xffc8c8c8, 0xffffffff, Color(0xff000000), true + ); + if (save_debug_images) { + preprocessed.save("DebugDumps/ocr_level_preprocessed.png"); + } + // Trim left 7% to exclude the "L" glyph blob (always at x~0). + // The actual level digits start at ~13%+ of the box width. + size_t lv_skip = preprocessed.width() * 7 / 100; + ImagePixelBox digits_bbox( + lv_skip, 0, preprocessed.width(), + preprocessed.height() + ); + ImageViewRGB32 level_digit_view = + extract_box_reference(preprocessed, digits_bbox); + if (save_debug_images) { + level_digit_view.save("DebugDumps/ocr_level_digits_trimmed.png"); + } + // Use threshold 230 (not 175): lilac-background blob crops inherently + // give higher RMSD than yellow stat-box crops due to background colour. + stats.level = read_digits_waterfill_template( + logger, level_digit_view, 230.0, DigitTemplateType::LevelBox, + "levelDigit", 0x7F); + } else { + // Pass the binarized image to PaddleOCR + stats.level = OCR::read_number(logger, level_ready, language); + } + + // Read Nature (black text on white/beige). + // Pipeline: BW -> invert -> morph close -> invert -> upscale -> smooth -> pad. + // Morph close on the inverted image (text=white) bridges gaps in text + // regions by growing white->eroding back. Works per-channel on CV_8UC4. + const static Pokemon::NatureReader reader("Pokemon/NatureCheckerOCR.json"); + ImageViewRGB32 nature_raw = extract_box_reference(game_screen, m_box_nature); + if (save_debug_images) { + nature_raw.save("DebugDumps/ocr_nature_0_raw.png"); + } + + // Step 1: BW at native resolution. Dark text [0..150] -> black. + ImageRGB32 nature_bw = to_blackwhite_rgb32_range( + nature_raw, true, + combine_rgb(0, 0, 0), combine_rgb(150, 150, 150)); + if (save_debug_images) { + nature_bw.save("DebugDumps/ocr_nature_1_bw.png"); + } + + // Step 2: Invert -> MORPH_CLOSE -> Invert to bridge gaps. + // On the inverted image, text is bright (255) and bg is dark (0). + // MORPH_CLOSE (dilate then erode) fills small dark holes within + // the bright text regions - exactly the 1px gaps we need to bridge. + // A 3x3 kernel bridges 1px gaps. Two iterations bridges 2px gaps. + { + cv::Mat bw_mat = nature_bw.to_opencv_Mat(); + cv::Mat inverted; + cv::bitwise_not(bw_mat, inverted); + + cv::Mat kernel = cv::getStructuringElement( + cv::MORPH_ELLIPSE, cv::Size(3, 3)); + cv::Mat closed; + cv::morphologyEx(inverted, closed, cv::MORPH_CLOSE, kernel, + cv::Point(-1, -1), 2); // 2 iterations for 2px gaps + + cv::Mat result; + cv::bitwise_not(closed, result); + + ImageRGB32 nature_filled(result.cols, result.rows); + result.copyTo(nature_filled.to_opencv_Mat()); + nature_bw = std::move(nature_filled); + } + if (save_debug_images) { + nature_bw.save("DebugDumps/ocr_nature_2_gapfilled.png"); + } + + // Step 3: 4x bilinear upscale. + int scale = 4; + cv::Mat filled_mat = nature_bw.to_opencv_Mat(); + cv::Mat upscaled; + cv::resize(filled_mat, upscaled, + cv::Size(filled_mat.cols * scale, filled_mat.rows * scale), + 0, 0, cv::INTER_LINEAR); + ImageRGB32 nature_up(upscaled.cols, upscaled.rows); + upscaled.copyTo(nature_up.to_opencv_Mat()); + if (save_debug_images) { + nature_up.save("DebugDumps/ocr_nature_3_upscaled.png"); + } + + // Step 4: Smooth + re-threshold (same as preprocess_for_ocr). + cv::Mat smoothed; + cv::GaussianBlur(upscaled, smoothed, cv::Size(7, 7), 2.0); + ImageRGB32 smoothed_img(smoothed.cols, smoothed.rows); + smoothed.copyTo(smoothed_img.to_opencv_Mat()); + ImageRGB32 nature_smooth = to_blackwhite_rgb32_range( + smoothed_img, true, + combine_rgb(0, 0, 0), combine_rgb(128, 128, 128)); + if (save_debug_images) { + nature_smooth.save("DebugDumps/ocr_nature_4_smooth.png"); + } + + // Step 5: Pad with white border. + ImageRGB32 nature_padded = pad_image( + nature_smooth, nature_smooth.height() / 2, 0xffffffff); + if (save_debug_images) { + nature_padded.save("DebugDumps/ocr_nature_5_padded.png"); + } + + // OCR left/right single-word crops and pick the best score. + // This handles both "RASH nature." and "Nature DOCILE." while avoiding + // noisy full-line matches. Fall back to full-line only if both halves fail. + OCR::StringMatchResult best_nature_result; + bool have_best_nature_result = false; + + auto consider_nature_result = [&](const OCR::StringMatchResult& result) { + if (result.results.empty()) { + return; + } + if (!have_best_nature_result + || result.results.begin()->first < best_nature_result.results.begin()->first) { + best_nature_result = result; + have_best_nature_result = true; + } + }; + + // Left and right single-word attempts (silent - log final selection only). + const ImageFloatBox left_word_box(0.00, 0.00, 0.56, 1.00); + const ImageFloatBox right_word_box(0.44, 0.00, 0.56, 1.00); + + ImageViewRGB32 nature_left = extract_box_reference(nature_padded, left_word_box); + ImageViewRGB32 nature_right = extract_box_reference(nature_padded, right_word_box); + if (save_debug_images) { + nature_left.save("DebugDumps/ocr_nature_6_left_word.png"); + nature_right.save("DebugDumps/ocr_nature_7_right_word.png"); + } + + OCR::StringMatchResult left_result = reader.match_substring_from_image( + nullptr, language, nature_left, + Pokemon::NatureReader::MAX_LOG10P, + Pokemon::NatureReader::MAX_LOG10P_SPREAD, + OCR::PageSegMode::SINGLE_WORD); + consider_nature_result(left_result); + + OCR::StringMatchResult right_result = reader.match_substring_from_image( + nullptr, language, nature_right, + Pokemon::NatureReader::MAX_LOG10P, + Pokemon::NatureReader::MAX_LOG10P_SPREAD, + OCR::PageSegMode::SINGLE_WORD); + consider_nature_result(right_result); + + // Fallback: if both halves fail thresholding, try full-line once. + if (!have_best_nature_result) { + OCR::StringMatchResult full_result = reader.match_substring_from_image( + nullptr, language, nature_padded, + Pokemon::NatureReader::MAX_LOG10P, + Pokemon::NatureReader::MAX_LOG10P_SPREAD, + OCR::PageSegMode::SINGLE_LINE); + consider_nature_result(full_result); + } + + if (have_best_nature_result) { + best_nature_result.log(logger, Pokemon::NatureReader::MAX_LOG10P, "Nature Final"); + stats.nature = best_nature_result.results.begin()->second.token; + } +} + +void StatsReader::read_page2( + Logger &logger, const ImageViewRGB32 &frame, + PokemonFRLG_Stats &stats +) { + ImageViewRGB32 game_screen = + extract_box_reference(frame, GameSettings::instance().GAME_BOX); + + auto read_stat = [&](const ImageFloatBox &box, const std::string &name) { + ImageViewRGB32 stat_region = extract_box_reference(game_screen, box); + + if (!GlobalSettings::instance().USE_PADDLE_OCR) { + // Tesseract-free path: waterfill segmentation + template matching + // against the PokemonFRLG/Digits/0-9.png templates. + return read_digits_waterfill_template(logger, stat_region); + } + + // PaddleOCR path (original): preprocess then per-digit waterfill OCR. + // Dark text [0..190] -> black. Threshold at 190 captures the + // blurred gap pixels between segments, making bridges thicker. + // Not higher than 190 to avoid capturing yellow bg edge noise. + ImageRGB32 ocr_ready = preprocess_for_ocr( + stat_region, name, 7, 2, true, + combine_rgb(0, 0, 0), combine_rgb(190, 190, 190) + ); + + // Waterfill isolates each digit -> per-char SINGLE_CHAR OCR. + return OCR::read_number_waterfill( + logger, ocr_ready, 0xff000000, + 0xff808080 + ); + }; + + // HP box: shift right 70% to clear the "/" character. + ImageFloatBox total_hp_box( + m_box_hp.x + m_box_hp.width * 0.7, m_box_hp.y, + m_box_hp.width * 0.3, m_box_hp.height + ); + + auto assign_stat = [](std::optional& field, int value) { + if (value != -1) { + field = static_cast(value); + } + }; + assign_stat(stats.hp, read_stat(total_hp_box, "hp")); + assign_stat(stats.attack, read_stat(m_box_attack, "attack")); + assign_stat(stats.defense, read_stat(m_box_defense, "defense")); + assign_stat(stats.sp_attack, read_stat(m_box_sp_attack, "spatk")); + assign_stat(stats.sp_defense, read_stat(m_box_sp_defense, "spdef")); + assign_stat(stats.speed, read_stat(m_box_speed, "speed")); +} + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation + diff --git a/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.h b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.h new file mode 100644 index 0000000000..392e949ce1 --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.h @@ -0,0 +1,73 @@ +/* Stats Reader + * + * From: https://github.com/PokemonAutomation/ + * + */ + +#ifndef PokemonAutomation_PokemonFRLG_StatsReader_H +#define PokemonAutomation_PokemonFRLG_StatsReader_H + +#include "Common/Cpp/Color.h" +#include "CommonFramework/ImageTools/ImageBoxes.h" +#include "CommonFramework/Language.h" + +#include +#include +#include + +namespace PokemonAutomation { +class Logger; +class ImageViewRGB32; +class VideoOverlaySet; +namespace NintendoSwitch { +namespace PokemonFRLG { + +struct PokemonFRLG_Stats { + std::string nature; + std::optional level; + std::string name; + std::optional hp; + std::optional attack; + std::optional defense; + std::optional sp_attack; + std::optional sp_defense; + std::optional speed; +}; + +class StatsReader { +public: + StatsReader(Color color = COLOR_RED); + + void make_overlays(VideoOverlaySet &items) const; + + // Reads from page 1 (Nature, Level, Name) + void read_page1( + Logger &logger, Language language, + const ImageViewRGB32 &frame, PokemonFRLG_Stats &stats + ); + + // Reads from page 2 (Stats: HP, Atk, Def, SpA, SpD, Spe) + void read_page2( + Logger &logger, const ImageViewRGB32 &frame, + PokemonFRLG_Stats &stats + ); + +private: + Color m_color; + ImageFloatBox m_box_nature; + ImageFloatBox m_box_level; + ImageFloatBox m_box_name; + ImageFloatBox m_box_hp; + ImageFloatBox m_box_attack; + ImageFloatBox m_box_defense; + ImageFloatBox m_box_sp_attack; + ImageFloatBox m_box_sp_defense; + ImageFloatBox m_box_speed; + +}; + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation +#endif + diff --git a/SerialPrograms/Source/PokemonFRLG/PokemonFRLG_Panels.cpp b/SerialPrograms/Source/PokemonFRLG/PokemonFRLG_Panels.cpp index 13d1aea091..06813bdb92 100644 --- a/SerialPrograms/Source/PokemonFRLG/PokemonFRLG_Panels.cpp +++ b/SerialPrograms/Source/PokemonFRLG/PokemonFRLG_Panels.cpp @@ -17,6 +17,7 @@ #include "Programs/ShinyHunting/PokemonFRLG_PrizeCornerReset.h" #include "Programs/ShinyHunting/PokemonFRLG_ShinyHunt-Overworld.h" #include "Programs/TestPrograms/PokemonFRLG_SoundListener.h" +#include "Programs/TestPrograms/PokemonFRLG_ReadStats.h" namespace PokemonAutomation{ namespace NintendoSwitch{ @@ -54,6 +55,7 @@ std::vector PanelListFactory::make_panels() const{ if (PreloadSettings::instance().DEVELOPER_MODE){ ret.emplace_back("---- Developer Tools ----"); ret.emplace_back(make_single_switch_program()); + ret.emplace_back(make_single_switch_program()); } return ret; diff --git a/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.cpp b/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.cpp new file mode 100644 index 0000000000..c2b195ccce --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.cpp @@ -0,0 +1,91 @@ +/* Read Stats + * + * From: https://github.com/PokemonAutomation/ + * + */ + +#include "PokemonFRLG_ReadStats.h" +#include "Common/Cpp/Color.h" +#include "CommonFramework/VideoPipeline/VideoFeed.h" +#include "CommonFramework/VideoPipeline/VideoOverlayScopes.h" +#include "NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h" +#include "Pokemon/Inference/Pokemon_NameReader.h" +#include "Pokemon/Pokemon_Strings.h" +#include "PokemonFRLG/Inference/PokemonFRLG_StatsReader.h" +#include +#include +#include + +using namespace std::chrono_literals; +namespace PokemonAutomation { +namespace NintendoSwitch { +namespace PokemonFRLG { + +ReadStats_Descriptor::ReadStats_Descriptor() + : SingleSwitchProgramDescriptor( + "PokemonFRLG:ReadStats", Pokemon::STRING_POKEMON + " FRLG", + "Read Summary Stats", "", + "Read stats, level, name, and nature from the summary screen. Start " + "on page 1 of summary.", + ProgramControllerClass::StandardController_NoRestrictions, + FeedbackType::REQUIRED, AllowCommandsWhenRunning::DISABLE_COMMANDS) {} + +ReadStats::ReadStats() + : LANGUAGE("Game Language:", + Pokemon::PokemonNameReader::instance().languages(), + LockMode::LOCK_WHILE_RUNNING, true) { + PA_ADD_OPTION(LANGUAGE); +} + +void ReadStats::program(SingleSwitchProgramEnvironment &env, + ProControllerContext &context) { + env.log("Starting Read Stats program... Please ensure you are on Page 1 " + "(POKEMON INFO)."); + + StatsReader reader; + VideoOverlaySet overlays(env.console.overlay()); + reader.make_overlays(overlays); + + PokemonFRLG_Stats stats; + + env.log("Reading Page 1 (Name, Level, Nature)..."); + VideoSnapshot screen1 = env.console.video().snapshot(); + reader.read_page1(env.logger(), LANGUAGE, screen1, stats); + + env.log("Name: " + stats.name); + env.log("Level: " + + (stats.level.has_value() ? std::to_string(*stats.level) : "???")); + env.log("Nature: " + stats.nature); + + env.log("Navigating to Page 2 (POKEMON SKILLS)..."); + pbf_press_dpad(context, DPAD_RIGHT, 100ms, 100ms); + context.wait_for_all_requests(); + pbf_wait(context, 500ms); // Wait for transition + context.wait_for_all_requests(); + + env.log("Reading Page 2 (Stats)..."); + VideoSnapshot screen2 = env.console.video().snapshot(); + reader.read_page2(env.logger(), screen2, stats); + + env.log("HP (Total): " + (stats.hp.has_value() ? std::to_string(*stats.hp) : "???")); + env.log("Attack: " + + (stats.attack.has_value() ? std::to_string(*stats.attack) : "???")); + env.log("Defense: " + + (stats.defense.has_value() ? std::to_string(*stats.defense) : "???")); + env.log("Sp. Attack: " + + (stats.sp_attack.has_value() ? std::to_string(*stats.sp_attack) : "???")); + env.log("Sp. Defense: " + + (stats.sp_defense.has_value() ? std::to_string(*stats.sp_defense) : "???")); + env.log("Speed: " + + (stats.speed.has_value() ? std::to_string(*stats.speed) : "???")); + + env.log("Finished Reading Stats. Verification boxes are on overlay.", + COLOR_BLUE); + pbf_wait(context, 10s); + context.wait_for_all_requests(); +} + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation + diff --git a/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.h b/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.h new file mode 100644 index 0000000000..c15f7d6740 --- /dev/null +++ b/SerialPrograms/Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.h @@ -0,0 +1,43 @@ +/* Read Stats + * + * From: https://github.com/PokemonAutomation/ + * + */ + +#ifndef PokemonAutomation_PokemonFRLG_ReadStats_H +#define PokemonAutomation_PokemonFRLG_ReadStats_H + +#include "CommonFramework/Tools/VideoStream.h" +#include "CommonTools/Options/LanguageOCROption.h" +#include "NintendoSwitch/Controllers/Procon/NintendoSwitch_ProController.h" +#include "NintendoSwitch/NintendoSwitch_SingleSwitchProgram.h" +#include "Pokemon/Pokemon_Strings.h" + +namespace PokemonAutomation { +namespace NintendoSwitch { +namespace PokemonFRLG { + +class ReadStats_Descriptor : public SingleSwitchProgramDescriptor { +public: + ReadStats_Descriptor(); +}; + +class ReadStats : public SingleSwitchProgramInstance { +public: + ReadStats(); + virtual void program(SingleSwitchProgramEnvironment &env, + ProControllerContext &context) override; + + virtual void start_program_border_check(VideoStream &stream, + FeedbackType feedback_type) override { + } + +private: + OCR::LanguageOCROption LANGUAGE; +}; + +} // namespace PokemonFRLG +} // namespace NintendoSwitch +} // namespace PokemonAutomation +#endif + diff --git a/SerialPrograms/cmake/SourceFiles.cmake b/SerialPrograms/cmake/SourceFiles.cmake index 6aa85ec127..d227d04b45 100644 --- a/SerialPrograms/cmake/SourceFiles.cmake +++ b/SerialPrograms/cmake/SourceFiles.cmake @@ -1426,6 +1426,10 @@ file(GLOB LIBRARY_SOURCES Source/PokemonFRLG/Inference/PokemonFRLG_SelectionArrowDetector.h Source/PokemonFRLG/Inference/PokemonFRLG_ShinySymbolDetector.cpp Source/PokemonFRLG/Inference/PokemonFRLG_ShinySymbolDetector.h + Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.cpp + Source/PokemonFRLG/Inference/PokemonFRLG_DigitReader.h + Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.cpp + Source/PokemonFRLG/Inference/PokemonFRLG_StatsReader.h Source/PokemonFRLG/PokemonFRLG_Navigation.cpp Source/PokemonFRLG/PokemonFRLG_Navigation.h Source/PokemonFRLG/PokemonFRLG_Panels.cpp @@ -1448,6 +1452,8 @@ file(GLOB LIBRARY_SOURCES Source/PokemonFRLG/Programs/ShinyHunting/PokemonFRLG_ShinyHunt-Overworld.h Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_SoundListener.cpp Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_SoundListener.h + Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.cpp + Source/PokemonFRLG/Programs/TestPrograms/PokemonFRLG_ReadStats.h Source/PokemonHome/Inference/PokemonHome_BallReader.cpp Source/PokemonHome/Inference/PokemonHome_BallReader.h Source/PokemonHome/Inference/PokemonHome_BoxGenderDetector.cpp