diff --git a/Makefile b/Makefile index f31aac0..7e27bad 100644 --- a/Makefile +++ b/Makefile @@ -165,6 +165,28 @@ debug: all @echo "Starting QEMU paused with GDB on :1234 and monitor on 127.0.0.1:55555" QEMU_OPTS="-S -s" ./startQemu.sh headless +# Automated GDB session via pexpect (no manual address copying, no probe run). +# Uses the debug mailbox (physical 0x7000) — efi_main writes its own runtime +# address there on entry; GDB watches for it via a hardware watchpoint. +# +# Requires: pip install --break-system-packages pexpect +# Requires: tmux session named 'theseus' (created automatically if absent) +# +# Full interactive session (default): +# make debug-auto +# +# Non-interactive smoke-test (CI-friendly, exits after breakpoint check): +# make debug-auto-ci +.PHONY: debug-auto debug-auto-ci +debug-auto: all + @echo "Starting automated GDB session (mailbox watchpoint + pexpect)..." + python3 scripts/gdb-auto.py --tmux theseus + +debug-auto-ci: all + @echo "Starting non-interactive GDB breakpoint smoke-test..." + python3 scripts/gdb-auto.py --tmux theseus \ + --no-interactive --timeout-boot 180 + # Print a short help message describing common targets and how to set PROFILE .PHONY: help help: diff --git a/bootloader/src/main.rs b/bootloader/src/main.rs index 74b0813..1c32027 100644 --- a/bootloader/src/main.rs +++ b/bootloader/src/main.rs @@ -89,6 +89,41 @@ core::arch::global_asm!(".globl __chkstk", "__chkstk:", " jmp ___chkstk_ms",) /// direct `ExitBootServices` → `kernel_entry` transfer. #[entry] fn efi_main() -> Status { + // ----------------------------------------------------------------------- + // GDB debug mailbox — write runtime efi_main address to a fixed physical + // location so automated GDB tooling can discover it via a watchpoint. + // + // Must happen before any UEFI call so the address is visible as early as + // possible. The page is allocated via UEFI AllocateType::Address so the + // firmware records our ownership in the memory map. + // + // Layout at DEBUG_MAILBOX_PHYS: + // +0x00 u64 runtime efi_main address (written first) + // +0x08 u64 magic sentinel (written second → GDB trigger) + // + // See: shared/src/constants.rs :: debug_mailbox + // debug.gdb :: theseus-auto command + // ----------------------------------------------------------------------- + use theseus_shared::constants::debug_mailbox; + use uefi::boot::{self as uefi_boot, AllocateType}; + use uefi::mem::memory_map::MemoryType as UefiMemType; + + // Allocate the mailbox page via UEFI so firmware records our ownership. + // Ignore errors — if the page is already allocated (e.g. by firmware) we + // fall back to a direct write; in QEMU/OVMF this range is always free. + let _ = uefi_boot::allocate_pages( + AllocateType::Address(debug_mailbox::PHYS), + UefiMemType::LOADER_DATA, + 1, + ); + + unsafe { + let base = debug_mailbox::PHYS as *mut u64; + // Write address first, then magic — GDB watches the magic location. + base.add(0).write_volatile(efi_main as *const () as u64); + base.add(1).write_volatile(debug_mailbox::MAGIC); + } + // Install pre-exit allocators that forward to UEFI Boot Services theseus_shared::allocator::install_pre_exit_allocators(pre_exit_alloc, pre_exit_dealloc); // Initialize UEFI environment and global output driver diff --git a/debug.gdb b/debug.gdb index 4e2f375..cea77d0 100644 --- a/debug.gdb +++ b/debug.gdb @@ -1,15 +1,47 @@ # Theseus EFI GDB helpers ---------------------------------------------------- # -# Usage: -# 1. launch QEMU with -S -s so CPUs start halted -# 2. gdb -x debug.gdb -# 3. let the firmware run once (it will print `efi_main @ 0x...`) -# 4. run: theseus-load 0x3da60a10 # replace with your runtime address -# (the command computes the relocation delta, reloads DWARF at the correct -# runtime base, and installs a hardware breakpoint at the entry plus -# software breakpoints at +0x200/+0x300) -# 5. reset the guest (e.g. `monitor system_reset` or restart QEMU) and rerun -# so `efi_main` executes again and trips the breakpoints. +# RECOMMENDED WORKFLOW (fully automated, no address copying): +# +# make debug-auto +# +# This starts QEMU, spawns GDB via pexpect, and runs 'theseus-auto' +# which uses a hardware watchpoint on the debug mailbox to automatically +# capture the runtime efi_main address and load symbols. You land inside +# efi_main with full Rust source-level symbols, hands-free. +# +# MANUAL WORKFLOW (if you want direct GDB control): +# +# make debug # starts QEMU paused on :1234 +# gdb -x debug.gdb # in a separate terminal +# (gdb) target remote localhost:1234 +# (gdb) theseus-auto # automated: watchpoint → symbols → stop +# +# If theseus-auto is unavailable (older build without debug mailbox): +# +# (gdb) target remote localhost:1234 +# (gdb) continue # let UEFI run; read "efi_main @ 0x..." from debugcon +# (gdb) theseus-load 0x # load symbols at the runtime address +# (gdb) monitor system_reset # reset guest so efi_main runs again with BP armed +# +# COMMANDS PROVIDED BY THIS SCRIPT: +# +# theseus-auto Fully automated session. Sets a hw watchpoint on +# the debug mailbox sentinel (0x7008), continues, +# waits for efi_main to write its address + magic, +# then calls theseus-load automatically. Stops inside +# efi_main with symbols loaded. No address argument. +# +# theseus-load Load DWARF symbols relocated to the given runtime +# efi_main address. Computes all section addresses +# dynamically from BOOTX64.SYM (no hardcoded offsets). +# Arms software breakpoints at efi_main entry, +# entry+0x200, and entry+0x300. +# +# theseus-go Like theseus-load but also issues 'continue'. +# +# SECTION DELTAS: +# Computed fresh from BOOTX64.SYM at startup — safe across rebuilds. +# Section layout is printed on load for verification. # set pagination off @@ -18,6 +50,12 @@ set demangle-style rust set breakpoint pending on set confirm off +# GDB stub connection target used by theseus-auto for reconnect after reset. +# Override before sourcing this file if using a TCP port instead of a socket: +# (gdb) python gdb.set_convenience_variable("_gdb_target", "localhost:1234") +# (gdb) source debug.gdb +# Default: unix socket used by gdb-auto.py / make debug-auto + # Make DWARF types and symbols available up-front. symbol-file build/BOOTX64.SYM @@ -26,7 +64,20 @@ import gdb import os import struct -SYMBOL_PATH = os.path.abspath("build/BOOTX64.SYM") +SYMBOL_PATH = os.path.abspath("build/BOOTX64.SYM") + +# GDB connection target for theseus-auto reconnect after system_reset. +# Defaults to the unix socket used by gdb-auto.py / make debug-auto. +# Override via: python gdb.set_convenience_variable("_gdb_target", "localhost:1234") +def _get_gdb_target(): + """Read GDB connection target, re-evaluating the convenience variable each call.""" + raw = gdb.convenience_variable("_gdb_target") + if raw is not None: + return str(raw).strip('"').strip("'") + return "localhost:1234" + +# Evaluated at source time for initial display; re-read in theseus-auto.invoke. +_GDB_SOCKET = _get_gdb_target() SIGNATURE = b"THESEUSDBGBASE!\x00" def _read_elf(path): @@ -38,26 +89,69 @@ _elf_image = _read_elf(SYMBOL_PATH) if _elf_image[:4] != b"\x7fELF": raise gdb.GdbError(f"{SYMBOL_PATH} is not an ELF file") -_E_PHOFF = struct.unpack_from(" 0). _image_base_link = min( - (vaddr - offset) for p_type, offset, vaddr, _ in _iter_program_headers() - if p_type == 1 # PT_LOAD + (vaddr - offset) + for p_type, offset, vaddr, _ in _iter_program_headers() + if p_type == 1 and offset != 0 ) -# Locate the signature inside the ELF file to recover its link-time address. +# -------------------------------------------------------------------------- +# Section headers → compute per-section deltas from image base +# (computed fresh from the actual ELF — no hardcoded constants) +# -------------------------------------------------------------------------- +_shstr_entry_off = _E_SHOFF + _E_SHSTRNDX * _E_SHENTSIZE +_shstr_data_off = struct.unpack_from(" delta from image base (link-time) + +for i in range(_E_SHNUM): + off = _E_SHOFF + i * _E_SHENTSIZE + sh_name_i = struct.unpack_from(" Example: theseus-load 0x3da60a10 + +The runtime address is printed by the bootloader on the debug port: + efi_main @ 0x + +This command: + 1. Computes the runtime image base from the runtime efi_main address. + 2. Applies per-section relocation (computed from BOOTX64.SYM, not hardcoded). + 3. Reloads DWARF symbols via add-symbol-file with all section addresses. + 4. Sets a hardware breakpoint at efi_main entry + two software sentinels. """ def __init__(self): @@ -163,38 +262,155 @@ Example: theseus-load 0x3da60a10 signature_runtime = image_base_runtime + (_signature_link_addr - _image_base_link) gdb.write("Theseus symbol loader:\n") - gdb.write(f" · runtime efi_main: 0x{runtime_entry:x}\n") - gdb.write(f" · image base (runtime):0x{image_base_runtime:x}\n") - gdb.write(f" · signature (runtime): 0x{signature_runtime:x}\n") - - text_addr = image_base_runtime + TEXT_DELTA - rdata_addr = image_base_runtime + RDATA_DELTA - data_addr = image_base_runtime + DATA_DELTA - bss_addr = image_base_runtime + BSS_DELTA - eh_addr = image_base_runtime + EH_DELTA - reloc_addr = image_base_runtime + RELOC_DELTA - - gdb.write(" · Section remap targets:\n") - gdb.write(f" .text → 0x{text_addr:x}\n") - gdb.write(f" .rdata → 0x{rdata_addr:x}\n") - gdb.write(f" .data → 0x{data_addr:x}\n") - gdb.write(f" .bss → 0x{bss_addr:x}\n") - gdb.write(f" .eh_fram→ 0x{eh_addr:x}\n") - gdb.write(f" .reloc → 0x{reloc_addr:x}\n") - - _remove_existing_symbols() - gdb.execute( + gdb.write(f" · runtime efi_main: 0x{runtime_entry:x}\n") + gdb.write(f" · image base (runtime): 0x{image_base_runtime:x}\n") + gdb.write(f" · signature (runtime): 0x{signature_runtime:x}\n") + + # Build add-symbol-file command with all non-debug sections + # that have a non-zero address (skip debug sections — GDB handles + # those automatically from the ELF's DWARF). + SKIP_PREFIXES = (".debug_", ".shstrtab") + sections = { + name: image_base_runtime + delta + for name, delta in _section_deltas.items() + if not any(name.startswith(p) for p in SKIP_PREFIXES) + } + + if ".text" not in sections: + raise gdb.GdbError("No .text section found in symbol file") + + text_addr = sections.pop(".text") + + gdb.write(" · Section runtime addresses:\n") + gdb.write(f" .text = 0x{text_addr:x}\n") + extra_args = [] + for sname, saddr in sorted(sections.items()): + gdb.write(f" {sname:20s} = 0x{saddr:x}\n") + extra_args.append(f"-s {sname} 0x{saddr:x}") + + cmd = ( f"add-symbol-file {SYMBOL_PATH} 0x{text_addr:x} " - f"-s .rdata 0x{rdata_addr:x} " - f"-s .data 0x{data_addr:x} " - f"-s .bss 0x{bss_addr:x} " - f"-s .eh_fram 0x{eh_addr:x} " - f"-s .reloc 0x{reloc_addr:x}", - to_string=True, + + " ".join(extra_args) ) + _remove_existing_symbols() + gdb.execute(cmd, to_string=True) + gdb.write(" · Symbols loaded.\n") + _set_breakpoints(runtime_entry) - gdb.write("⛳ Breakpoints armed on theseus_efi::efi_main (entry [HW], +0x200, +0x300). Reset or rerun so they trigger.\n") + gdb.write("⛳ Ready. Reset or rerun the guest so efi_main fires.\n") + gdb.write(" (call 'continue' or 'c' to run)\n") TheseusLoadCommand() + + +class TheseusGoCommand(gdb.Command): + """Shortcut: theseus-load then continue. + +Usage: theseus-go + +Equivalent to: + theseus-load + continue +but issued in the correct synchronous context to avoid the +'target is running' race that can occur in batch/script mode. +""" + + def __init__(self): + super().__init__("theseus-go", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + gdb.execute(f"theseus-load {arg}", to_string=False) + gdb.execute("continue", to_string=False) + +TheseusGoCommand() + + +class TheseusAutoCommand(gdb.Command): + """Fully automated Theseus debug session — no address argument needed. + +Usage: theseus-auto + +Workflow: + 1. Sets a hardware watchpoint on the debug mailbox sentinel + (physical address 0x7008, value 0xDEADBEEFCAFEF00D). + 2. Issues 'continue' — UEFI boots, efi_main writes its runtime address + to 0x7000 then writes the magic to 0x7008. + 3. Watchpoint fires. theseus-auto reads the runtime efi_main address from + 0x7000, removes the watchpoint, and calls theseus-load with it. + 4. Returns to GDB prompt. Execution is stopped inside efi_main (at the + mailbox write instruction) with full Rust source-level symbols loaded. + +No reset needed — efi_main is caught on its first execution. QEMU does not +need to be started with -S; gdb-auto.py starts it running. + +Requirements: + - Kernel built with debug mailbox support: + bootloader/src/main.rs — mailbox write at efi_main entry + shared/src/constants.rs — debug_mailbox constants + +Recommended via make: + $ make debug-auto # starts everything automatically + +Manual (any QEMU session with GDB stub, -S optional): + $ make debug # QEMU paused on :1234 + $ gdb -x debug.gdb + (gdb) target remote localhost:1234 + (gdb) theseus-auto +""" + + # Mailbox layout (matches shared/src/constants.rs :: debug_mailbox) + MAILBOX_PHYS = 0x7000 + ADDR_OFFSET = 0x00 # u64: runtime efi_main address + MAGIC_OFFSET = 0x08 # u64: sentinel written after address + MAGIC_VALUE = 0xDEADBEEFCAFEF00D + + def __init__(self): + super().__init__("theseus-auto", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + addr_ptr = self.MAILBOX_PHYS + self.ADDR_OFFSET + magic_ptr = self.MAILBOX_PHYS + self.MAGIC_OFFSET + magic_expr = f"*(unsigned long long*)0x{magic_ptr:x} == 0x{self.MAGIC_VALUE:x}" + + gdb.write(f"theseus-auto: setting watchpoint on mailbox sentinel " + f"(*0x{magic_ptr:x} == 0x{self.MAGIC_VALUE:x})...\n") + + # Hardware watchpoint — fires when the sentinel is written + wp = gdb.Breakpoint(magic_expr, gdb.BP_WATCHPOINT, gdb.WP_WRITE, + internal=False) + + gdb.write("theseus-auto: continuing until mailbox is written...\n") + gdb.execute("continue", to_string=False) + + # After this returns we're stopped at the watchpoint. + # Read the runtime efi_main address. + try: + runtime_addr = int( + gdb.parse_and_eval(f"*(unsigned long long*)0x{addr_ptr:x}") + ) & 0xFFFFFFFFFFFFFFFF + except gdb.error as e: + raise gdb.GdbError( + f"theseus-auto: failed to read mailbox address: {e}" + ) + + gdb.write(f"theseus-auto: mailbox fired — " + f"runtime efi_main = 0x{runtime_addr:x}\n") + + # Remove the watchpoint + wp.delete() + + # We are stopped inside efi_main right now (the watchpoint fired while + # efi_main was executing the mailbox write). Load symbols — this gives + # us source-level debug for the rest of this run. The breakpoints set + # by theseus-load are armed for any future efi_main invocations. + # + # We do NOT issue continue here: execution is already inside efi_main + # with full symbols loaded. The user is exactly where they want to be. + # They can step, inspect locals, set additional breakpoints, then 'c'. + gdb.execute(f"theseus-load 0x{runtime_addr:x}", to_string=False) + gdb.write("theseus-auto: ✅ stopped inside efi_main with symbols loaded.\n") + gdb.write(" Use 'stepi', 'next', 'c', or set more breakpoints.\n") + +TheseusAutoCommand() end diff --git a/docs/axioms/debug.md b/docs/axioms/debug.md index 537f732..04d4d13 100644 --- a/docs/axioms/debug.md +++ b/docs/axioms/debug.md @@ -52,7 +52,43 @@ Affected modules: - `kernel/src/panic.rs` - `kernel/src/logging/*` -## A3: The runtime monitor is a first-class inspection surface +## A3: The GDB debug mailbox provides a stable physical address for runtime efi_main discovery + +**REQUIRED** + +`efi_main` writes its own runtime virtual address to physical `0x7000` and a +magic sentinel (`0xDEADBEEFCAFEF00D`) to physical `0x7008` as the very first +action at entry, before any UEFI call. This allows GDB to discover the correct +load address via a hardware watchpoint without a probe-then-restart workflow. + +The page at `0x7000` is reserved via `AllocateType::Address` so the UEFI +firmware records ownership in the memory map. The sentinel is written *after* +the address so a watchpoint on `0x7008` guarantees the address at `0x7000` is +already valid when it fires. + +Layout: +``` +0x7000 + 0x00 u64 runtime efi_main virtual address +0x7000 + 0x08 u64 magic sentinel 0xDEADBEEFCAFEF00D +``` + +Implements / evidence: +- `bootloader/src/main.rs` — mailbox write at top of `efi_main` +- `shared/src/constants.rs::debug_mailbox` — address and magic constants + +Related plans: +- `../plans/observability.md` + +Tooling: +- `debug.gdb::theseus-auto` — GDB command that uses this mailbox +- `scripts/gdb-auto.py` — pexpect driver for fully automated sessions +- `make debug-auto` — one-command entry point + +Affected modules: +- `bootloader/src/main.rs` +- `shared/src/constants.rs` + +## A4: The runtime monitor is a first-class inspection surface **REQUIRED** diff --git a/docs/development-and-debugging.md b/docs/development-and-debugging.md index 2a872e8..21e57f4 100644 --- a/docs/development-and-debugging.md +++ b/docs/development-and-debugging.md @@ -59,13 +59,77 @@ For one-shot QMP control against the host-side relay socket, use: ``` ## Debugging with GDB -- Launch QEMU with `QEMU_OPTS="-S -s"` to pause CPU 0 and listen on TCP 1234. -- Use the provided script `debug.gdb` as a starting point: - ```bash - gdb -x debug.gdb - ``` -- Useful breakpoints: `kernel_entry`, `environment::continue_after_stack_switch`, `interrupts::handler_timer`. -- Inspect the bootloader-to-kernel handoff by examining the pointer in `RDI` right before `kernel_entry` runs. + +### Automated session (recommended) + +```bash +make debug-auto +``` + +That's it. The script (`scripts/gdb-auto.py`) will: + +1. Start QEMU with a GDB stub on TCP :1251, kept alive in a tmux pane. +2. Spawn GDB via pexpect (drives it as a real interactive TTY — no batch-mode races). +3. Run `theseus-auto` — a GDB Python command that sets a hardware watchpoint on + the **debug mailbox** at physical `0x7008`. When `efi_main` starts it writes + its own runtime address to `0x7000` then the magic sentinel to `0x7008`; + the watchpoint fires and `theseus-auto` loads DWARF symbols automatically. +4. Drop you into interactive GDB, stopped inside `efi_main` with full Rust + source-level symbols. No address copying, no probe run, works every boot. + +Requires `pexpect` and a `tmux` session named `theseus` (created automatically): + +```bash +pip install --break-system-packages pexpect +``` + +Non-interactive CI mode (exits after verifying breakpoint + printing backtrace): + +```bash +make debug-auto-ci +``` + +### Manual session + +If you want direct GDB control, or are debugging something before `efi_main`: + +```bash +make debug # QEMU paused on :1234 with GDB stub +gdb -x debug.gdb # in a separate terminal +``` + +Then at the GDB prompt: + +``` +(gdb) target remote localhost:1234 +(gdb) theseus-auto # watchpoint → symbols → stop at efi_main automatically +``` + +Or, if you need to load symbols at a specific address manually: + +``` +(gdb) continue # let UEFI run; read "efi_main @ 0x..." from debugcon +(gdb) theseus-load 0x # load symbols at runtime address +``` + +`debug.gdb` provides three commands: + +| Command | What it does | +|---------|-------------| +| `theseus-auto` | Fully automated: watchpoint on mailbox → capture address → load symbols. No argument. | +| `theseus-load ` | Load DWARF at given runtime `efi_main` address; arms breakpoints at entry, +0x200, +0x300. | +| `theseus-go ` | Like `theseus-load` but also issues `continue`. | + +Section deltas are computed dynamically from `build/BOOTX64.SYM` on every GDB +startup — no hardcoded offsets that go stale after rebuilds. + +### Useful breakpoints + +- `kernel_entry` — first kernel code after ExitBootServices +- `environment::continue_after_stack_switch` — post-stack-switch environment init +- `interrupts::handler_timer` — LAPIC timer interrupt path + +Inspect the bootloader-to-kernel handoff by examining `RDI` just before `kernel_entry` runs (it holds the `*const Handoff` pointer). ## Logging - Macros (`log_error!`, `log_warn!`, `log_info!`, `log_debug!`, `log_trace!`) live in `kernel/src/logging`. diff --git a/scripts/gdb-auto.py b/scripts/gdb-auto.py new file mode 100755 index 0000000..a3555a4 --- /dev/null +++ b/scripts/gdb-auto.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +""" +gdb-auto.py — One-command GDB debug session for TheseusOS. + +Usage: + python3 scripts/gdb-auto.py [options] + make debug-auto # interactive (default) + make debug-auto-ci # non-interactive CI mode + +Workflow: + 1. Starts QEMU running (no -S) with a TCP GDB stub on localhost:1251, + keeping it alive in a tmux pane (required: tmux session named 'theseus', + created automatically if absent). + 2. Spawns GDB via pexpect and sources debug.gdb. + 3. Connects to QEMU and runs 'theseus-auto', which: + a. Sets a hardware watchpoint on the debug mailbox sentinel at 0x7008. + b. Issues continue — UEFI boots, efi_main writes its runtime address + to 0x7000 then writes magic 0xDEADBEEFCAFEF00D to 0x7008. + c. Watchpoint fires. theseus-auto reads the address, calls theseus-load + with correct per-section deltas (computed from BOOTX64.SYM). + d. Returns to GDB prompt stopped inside efi_main with full Rust symbols. + 4. In interactive mode: hands off to pexpect.interact() for live GDB use. + In --no-interactive mode: prints RIP + backtrace and exits (CI-friendly). + +Key properties: + - Single QEMU run, no probe-then-restart. + - Address captured from the running binary — correct every boot regardless + of UEFI load address variation. + - Hard timeout on every wait — never hangs silently. + - Ctrl-C via pexpect.sendcontrol reliably interrupts the remote target. + +Requirements: + pip install --break-system-packages pexpect + Kernel built with debug mailbox support (see shared/src/constants.rs::debug_mailbox + and bootloader/src/main.rs efi_main entry). + +tmux: + QEMU runs in a tmux pane so it survives across bwrap sandbox exec sessions. + Default session: 'theseus', pane 0 = QEMU, pane 1 = GDB output. + Override with --tmux, --qemu-pane, --gdb-pane. +""" + +import argparse +import os +import subprocess +import sys +import time + +try: + import pexpect +except ImportError: + sys.exit( + "ERROR: pexpect not installed.\n" + "Run: pip install --break-system-packages pexpect" + ) + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- +WORKSPACE = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +DEBUGCON_LOG = "/tmp/theseus-gdb-auto-debugcon.log" +GDB_SOCKET = "/tmp/theseus-gdb-auto.sock" # unused: kept for reference +GDB_TARGET = "localhost:1251" # TCP port — survives system_reset +GDB_SCRIPT = os.path.join(WORKSPACE, "debug.gdb") +SYMBOL_FILE = os.path.join(WORKSPACE, "build", "BOOTX64.SYM") +OVMF_CODE = os.path.join(WORKSPACE, "OVMF", "OVMF_CODE.fd") +OVMF_VARS = os.path.join(WORKSPACE, "build", "OVMF_VARS.fd") +DISK_IMG = os.path.join(WORKSPACE, "build", "disk.img") + +GDB_PROMPT_RE = r"\(gdb\)" + +DEFAULT_TMUX = "theseus" +DEFAULT_TIMEOUT_BOOT = 120 # seconds to wait for mailbox + breakpoint +DEFAULT_QEMU_PANE = 0 +DEFAULT_GDB_PANE = 1 + + +# --------------------------------------------------------------------------- +# QEMU command builder +# --------------------------------------------------------------------------- +def qemu_cmd(paused: bool, gdb_target: str, debugcon_log: str) -> list[str]: + cmd = [ + "qemu-system-x86_64", + "-machine", "q35,accel=kvm:tcg,kernel-irqchip=split", + "-cpu", "max", + "-smp", "4", + "-m", "2G", + "-drive", f"if=pflash,format=raw,readonly=on,file={OVMF_CODE}", + "-drive", f"if=pflash,format=raw,file={OVMF_VARS}", + "-device", "isa-debug-exit,iobase=0xf4,iosize=0x04", + "-device", "isa-debugcon,chardev=debugcon", + "-chardev", f"file,id=debugcon,path={debugcon_log}", + "-display", "none", + "-drive", f"if=none,id=nvme0,file={DISK_IMG},format=raw", + "-device", "nvme,drive=nvme0,serial=deadbeef", + "-device", "pcie-root-port,id=rp0,slot=0,chassis=1", + "-device", "pcie-root-port,id=rp1,slot=1,chassis=2", + "-device", "pcie-root-port,id=rp2,slot=2,chassis=3", + "-device", "virtio-gpu-pci,bus=rp0", + "-device", "qemu-xhci,id=xhci0", + "-device", "usb-kbd,bus=xhci0.0", + "-device", "usb-mouse,bus=xhci0.0", + "-device", "virtio-net-pci,id=nic0,bus=rp2", + "-nic", "none", + # GDB stub via TCP — survives system_reset (unlike unix sockets) + "-gdb", f"tcp::{gdb_target.split(':')[-1]}", + ] + if paused: + cmd.append("-S") + else: + cmd.append("-no-reboot") + return cmd + + +# --------------------------------------------------------------------------- +# tmux helpers +# --------------------------------------------------------------------------- +def tmux_send(session: str, pane: int, text: str): + subprocess.run( + ["tmux", "send-keys", "-t", f"{session}:0.{pane}", text, "Enter"], + check=True, + ) + + +def tmux_capture(session: str, pane: int) -> str: + r = subprocess.run( + ["tmux", "capture-pane", "-t", f"{session}:0.{pane}", "-p"], + capture_output=True, text=True, + ) + return r.stdout + + +def tmux_kill_pane_process(session: str, pane: int): + """Send Ctrl-C to whatever is running in the pane.""" + subprocess.run( + ["tmux", "send-keys", "-t", f"{session}:0.{pane}", "C-c"], + check=False, + ) + time.sleep(0.5) + + +def ensure_tmux_session(session: str): + r = subprocess.run(["tmux", "has-session", "-t", session], + capture_output=True) + if r.returncode != 0: + subprocess.run( + ["tmux", "new-session", "-d", "-s", session, "-x", "220", "-y", "50"], + check=True, + ) + # Create second pane + subprocess.run( + ["tmux", "split-window", "-h", "-t", session], + check=True, + ) + print(f"[gdb-auto] Created tmux session '{session}' with 2 panes") + else: + # Ensure at least 2 panes exist + r2 = subprocess.run( + ["tmux", "list-panes", "-t", session], + capture_output=True, text=True, + ) + if r2.stdout.count("\n") < 2: + subprocess.run( + ["tmux", "split-window", "-h", "-t", session], + check=False, + ) + + +# --------------------------------------------------------------------------- +# Main debug session — single QEMU run, mailbox watchpoint approach +# --------------------------------------------------------------------------- +def run_debug_session( + session: str, + qemu_pane: int, + gdb_pane: int, + timeout_boot: int, + interactive: bool, +): + # Clean up stale socket + try: + os.unlink(GDB_SOCKET) + except FileNotFoundError: + pass + try: + os.unlink(DEBUGCON_LOG) + except FileNotFoundError: + pass + + # Start QEMU running (not paused) — the mailbox watchpoint will halt it + # automatically when efi_main writes the sentinel. No -S needed. + cmd = " ".join(qemu_cmd(paused=False, gdb_target=GDB_TARGET, + debugcon_log=DEBUGCON_LOG)) + print(f"[gdb-auto] Starting QEMU (running, watchpoint will halt at efi_main)...") + tmux_kill_pane_process(session, qemu_pane) + time.sleep(1) + tmux_send(session, qemu_pane, f"cd {WORKSPACE} && {cmd}") + + # Wait for GDB TCP port to be ready + import socket as _socket + host, port = GDB_TARGET.rsplit(":", 1) + deadline = time.time() + 15 + while time.time() < deadline: + try: + s = _socket.create_connection((host, int(port)), timeout=1) + s.close() + break + except (ConnectionRefusedError, OSError): + time.sleep(0.5) + else: + sys.exit(f"ERROR: QEMU GDB port {GDB_TARGET} never opened — is QEMU starting correctly?") + + print(f"[gdb-auto] GDB socket ready. Spawning GDB (pexpect)...") + + child = pexpect.spawn( + "gdb", + cwd=WORKSPACE, + encoding=None, + timeout=30, + logfile=open("/tmp/gdb-auto-raw.log", "wb"), + ) + + def gdb_cmd(cmd: str, timeout: int = 15) -> str: + child.sendline(cmd.encode()) + child.expect(GDB_PROMPT_RE.encode(), timeout=timeout) + out = child.before.decode(errors="replace").strip() + return out + + def gdb_print(cmd: str, timeout: int = 15): + out = gdb_cmd(cmd, timeout=timeout) + if out: + lines = [l for l in out.splitlines() + if l.strip() and l.strip() != cmd.strip()] + for l in lines: + print(f" {l}") + return out + + try: + child.expect(GDB_PROMPT_RE.encode(), timeout=15) + print("[gdb-auto] GDB started") + + gdb_cmd("set pagination off") + gdb_cmd("set confirm off") + gdb_cmd("set architecture i386:x86-64") + gdb_cmd("set demangle-style rust") + gdb_cmd(f"symbol-file {SYMBOL_FILE}") + + out = gdb_cmd(f"source {GDB_SCRIPT}", timeout=20) + for line in out.splitlines(): + if any(tok in line for tok in ("Δ", "image_base", "efi_main link")): + print(f" {line}") + + print(f"[gdb-auto] Connecting to QEMU ({GDB_TARGET})...") + # Tell theseus-auto which target to use for post-reset reconnect + gdb_cmd(f'python gdb.set_convenience_variable("_gdb_target", "{GDB_TARGET}")') + gdb_cmd(f"target remote {GDB_TARGET}", timeout=15) + + rip_check = gdb_cmd("info registers rip") + rip = next((l for l in rip_check.splitlines() if "rip" in l), "") + if "0xfff0" in rip: + print(f"[gdb-auto] ✅ Confirmed halted at reset vector (rip=0xfff0)") + else: + print(f"[gdb-auto] ⚠️ Unexpected RIP after connect: {rip}") + + # Run theseus-auto — fully automated sequence: + # 1. Sets hw watchpoint on mailbox sentinel (0x7008) + # 2. Continues → UEFI boots → efi_main writes mailbox → watchpoint fires + # 3. Reads runtime address from 0x7000, calls theseus-load + # 4. Issues monitor system_reset, reconnects, continues + # 5. UEFI reboots → efi_main runs again → hits the sw breakpoint + print(f"[gdb-auto] Running theseus-auto (timeout {timeout_boot}s)...") + print(f"[gdb-auto] Watching mailbox sentinel at 0x7008 for " + f"magic 0xDEADBEEFCAFEF00D...") + + child.sendline(b"theseus-auto") + + # theseus-auto internally calls gdb.execute("continue") twice, each + # of which blocks until GDB stops. The pexpect expect() here waits for + # the final (gdb) prompt that appears after the efi_main breakpoint hit. + # The full timeout covers both the first boot (mailbox write) and the + # second boot (breakpoint hit) so multiply by 2 for safety. + idx = child.expect( + [GDB_PROMPT_RE.encode(), pexpect.TIMEOUT, pexpect.EOF], + timeout=timeout_boot * 2, + ) + output = child.before.decode(errors="replace") + + if idx == 1: + print(f"[gdb-auto] ⏰ Timeout ({timeout_boot*2}s) waiting for theseus-auto") + print(f"[gdb-auto] Check: was the kernel built with debug mailbox support?") + print(f"[gdb-auto] Check: does UEFI reach efi_main within the timeout?") + child.sendcontrol("c") + try: + child.expect(GDB_PROMPT_RE.encode(), timeout=10) + except Exception: + pass + gdb_print("info registers rip") + gdb_cmd("quit") + return + elif idx == 2: + print("[gdb-auto] ❌ GDB exited unexpectedly (EOF)") + return + + # idx == 0: theseus-auto completed and returned a prompt + print(f"[gdb-auto] theseus-auto output:") + for line in output.splitlines(): + if line.strip(): + print(f" {line}") + + if "Breakpoint" in output and "efi_main" in output and "failed to reconnect" not in output: + print(f"[gdb-auto] ✅ BREAKPOINT HIT at efi_main!") + elif "failed to reconnect" in output: + print(f"[gdb-auto] ⚠️ Reconnect after reset failed — see output above") + elif "mailbox fired" in output: + print(f"[gdb-auto] ✅ Mailbox fired — address captured") + else: + print(f"[gdb-auto] ⚠️ theseus-auto completed but breakpoint status unclear") + + if interactive: + print() + print("[gdb-auto] ─────────────────────────────────────────────────") + print("[gdb-auto] Dropping into interactive GDB.") + print("[gdb-auto] Symbols loaded, stopped at efi_main.") + print(f"[gdb-auto] Ctrl-C to interrupt, 'q' to quit.") + print("[gdb-auto] ─────────────────────────────────────────────────") + child.interact() + else: + gdb_print("info registers rip") + gdb_print("backtrace 5") + gdb_cmd("quit") + + except pexpect.exceptions.TIMEOUT as e: + print(f"[gdb-auto] ❌ Unexpected pexpect timeout: {e}") + sys.exit(1) + except pexpect.exceptions.EOF: + pass # clean GDB exit + except KeyboardInterrupt: + print("\n[gdb-auto] Interrupted.") + try: + child.sendcontrol("c") + except Exception: + pass + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--tmux", default=DEFAULT_TMUX, metavar="SESSION", + help=f"tmux session name (default: {DEFAULT_TMUX})", + ) + parser.add_argument( + "--qemu-pane", type=int, default=DEFAULT_QEMU_PANE, metavar="N", + help=f"tmux pane index for QEMU (default: {DEFAULT_QEMU_PANE})", + ) + parser.add_argument( + "--gdb-pane", type=int, default=DEFAULT_GDB_PANE, metavar="N", + help=f"tmux pane index for GDB output (default: {DEFAULT_GDB_PANE})", + ) + parser.add_argument( + "--timeout-boot", type=int, default=DEFAULT_TIMEOUT_BOOT, metavar="SECS", + help=f"Max seconds to wait for mailbox watchpoint + breakpoint " + f"(default: {DEFAULT_TIMEOUT_BOOT})", + ) + parser.add_argument( + "--no-interactive", action="store_true", + help="Run non-interactively: check breakpoint then quit (CI mode)", + ) + args = parser.parse_args() + + # Validate workspace artifacts exist + for path in (SYMBOL_FILE, GDB_SCRIPT, OVMF_CODE, OVMF_VARS, DISK_IMG): + if not os.path.exists(path): + sys.exit(f"ERROR: required file not found: {path}\n" + f"Run 'make all' first.") + + ensure_tmux_session(args.tmux) + + run_debug_session( + session=args.tmux, + qemu_pane=args.qemu_pane, + gdb_pane=args.gdb_pane, + timeout_boot=args.timeout_boot, + interactive=not args.no_interactive, + ) + + +if __name__ == "__main__": + main() diff --git a/shared/src/constants.rs b/shared/src/constants.rs index 41060aa..4f03fd3 100644 --- a/shared/src/constants.rs +++ b/shared/src/constants.rs @@ -112,6 +112,39 @@ pub mod uefi { pub const ACPI_RSDP_SIGNATURE: &[u8; 8] = b"RSD PTR "; } +/// GDB Debug Mailbox +/// +/// A fixed low-memory page used to communicate the runtime `efi_main` address +/// to GDB without requiring a probe-then-restart workflow. +/// +/// Layout (at `DEBUG_MAILBOX_PHYS`): +/// offset +0x00 u64 Runtime virtual address of `efi_main` (written on entry) +/// offset +0x08 u64 Sentinel magic: `DEBUG_MAILBOX_MAGIC` (written after addr) +/// +/// GDB watches the sentinel location for the magic value. When it fires, it +/// reads the address at +0x00 and calls `theseus-load` automatically. +/// +/// The page at `DEBUG_MAILBOX_PHYS` is allocated via UEFI `AllocateType::Address` +/// before writing, so the firmware knows we own it. +pub mod debug_mailbox { + /// Physical address of the debug mailbox page. + /// + /// 0x7000 sits in the "conventional memory" gap below 0x10000 that OVMF + /// leaves as `EfiConventionalMemory`. We allocate it explicitly via UEFI + /// before writing to avoid aliasing with firmware data structures. + pub const PHYS: u64 = 0x7000; + + /// Byte offset within the mailbox page where the efi_main address lives. + pub const ADDR_OFFSET: u64 = 0x00; + + /// Byte offset within the mailbox page where the sentinel magic lives. + /// Written *after* the address — GDB watches this to know the addr is valid. + pub const MAGIC_OFFSET: u64 = 0x08; + + /// Sentinel value written to `PHYS + MAGIC_OFFSET` after the address. + pub const MAGIC: u64 = 0xDEAD_BEEF_CAFE_F00D; +} + /// Exit Codes pub mod exit_codes { /// QEMU exit code for successful completion