diff --git a/Makefile b/Makefile index f31aac0..7e27bad 100644 --- a/Makefile +++ b/Makefile @@ -165,6 +165,28 @@ debug: all @echo "Starting QEMU paused with GDB on :1234 and monitor on 127.0.0.1:55555" QEMU_OPTS="-S -s" ./startQemu.sh headless +# Automated GDB session via pexpect (no manual address copying, no probe run). +# Uses the debug mailbox (physical 0x7000) — efi_main writes its own runtime +# address there on entry; GDB watches for it via a hardware watchpoint. +# +# Requires: pip install --break-system-packages pexpect +# Requires: tmux session named 'theseus' (created automatically if absent) +# +# Full interactive session (default): +# make debug-auto +# +# Non-interactive smoke-test (CI-friendly, exits after breakpoint check): +# make debug-auto-ci +.PHONY: debug-auto debug-auto-ci +debug-auto: all + @echo "Starting automated GDB session (mailbox watchpoint + pexpect)..." + python3 scripts/gdb-auto.py --tmux theseus + +debug-auto-ci: all + @echo "Starting non-interactive GDB breakpoint smoke-test..." + python3 scripts/gdb-auto.py --tmux theseus \ + --no-interactive --timeout-boot 180 + # Print a short help message describing common targets and how to set PROFILE .PHONY: help help: diff --git a/bootloader/src/main.rs b/bootloader/src/main.rs index 74b0813..1c32027 100644 --- a/bootloader/src/main.rs +++ b/bootloader/src/main.rs @@ -89,6 +89,41 @@ core::arch::global_asm!(".globl __chkstk", "__chkstk:", " jmp ___chkstk_ms",) /// direct `ExitBootServices` → `kernel_entry` transfer. #[entry] fn efi_main() -> Status { + // ----------------------------------------------------------------------- + // GDB debug mailbox — write runtime efi_main address to a fixed physical + // location so automated GDB tooling can discover it via a watchpoint. + // + // Must happen before any UEFI call so the address is visible as early as + // possible. The page is allocated via UEFI AllocateType::Address so the + // firmware records our ownership in the memory map. + // + // Layout at DEBUG_MAILBOX_PHYS: + // +0x00 u64 runtime efi_main address (written first) + // +0x08 u64 magic sentinel (written second → GDB trigger) + // + // See: shared/src/constants.rs :: debug_mailbox + // debug.gdb :: theseus-auto command + // ----------------------------------------------------------------------- + use theseus_shared::constants::debug_mailbox; + use uefi::boot::{self as uefi_boot, AllocateType}; + use uefi::mem::memory_map::MemoryType as UefiMemType; + + // Allocate the mailbox page via UEFI so firmware records our ownership. + // Ignore errors — if the page is already allocated (e.g. by firmware) we + // fall back to a direct write; in QEMU/OVMF this range is always free. + let _ = uefi_boot::allocate_pages( + AllocateType::Address(debug_mailbox::PHYS), + UefiMemType::LOADER_DATA, + 1, + ); + + unsafe { + let base = debug_mailbox::PHYS as *mut u64; + // Write address first, then magic — GDB watches the magic location. + base.add(0).write_volatile(efi_main as *const () as u64); + base.add(1).write_volatile(debug_mailbox::MAGIC); + } + // Install pre-exit allocators that forward to UEFI Boot Services theseus_shared::allocator::install_pre_exit_allocators(pre_exit_alloc, pre_exit_dealloc); // Initialize UEFI environment and global output driver diff --git a/debug.gdb b/debug.gdb index 4e2f375..cea77d0 100644 --- a/debug.gdb +++ b/debug.gdb @@ -1,15 +1,47 @@ # Theseus EFI GDB helpers ---------------------------------------------------- # -# Usage: -# 1. launch QEMU with -S -s so CPUs start halted -# 2. gdb -x debug.gdb -# 3. let the firmware run once (it will print `efi_main @ 0x...`) -# 4. run: theseus-load 0x3da60a10 # replace with your runtime address -# (the command computes the relocation delta, reloads DWARF at the correct -# runtime base, and installs a hardware breakpoint at the entry plus -# software breakpoints at +0x200/+0x300) -# 5. reset the guest (e.g. `monitor system_reset` or restart QEMU) and rerun -# so `efi_main` executes again and trips the breakpoints. +# RECOMMENDED WORKFLOW (fully automated, no address copying): +# +# make debug-auto +# +# This starts QEMU, spawns GDB via pexpect, and runs 'theseus-auto' +# which uses a hardware watchpoint on the debug mailbox to automatically +# capture the runtime efi_main address and load symbols. You land inside +# efi_main with full Rust source-level symbols, hands-free. +# +# MANUAL WORKFLOW (if you want direct GDB control): +# +# make debug # starts QEMU paused on :1234 +# gdb -x debug.gdb # in a separate terminal +# (gdb) target remote localhost:1234 +# (gdb) theseus-auto # automated: watchpoint → symbols → stop +# +# If theseus-auto is unavailable (older build without debug mailbox): +# +# (gdb) target remote localhost:1234 +# (gdb) continue # let UEFI run; read "efi_main @ 0x..." from debugcon +# (gdb) theseus-load 0x # load symbols at the runtime address +# (gdb) monitor system_reset # reset guest so efi_main runs again with BP armed +# +# COMMANDS PROVIDED BY THIS SCRIPT: +# +# theseus-auto Fully automated session. Sets a hw watchpoint on +# the debug mailbox sentinel (0x7008), continues, +# waits for efi_main to write its address + magic, +# then calls theseus-load automatically. Stops inside +# efi_main with symbols loaded. No address argument. +# +# theseus-load Load DWARF symbols relocated to the given runtime +# efi_main address. Computes all section addresses +# dynamically from BOOTX64.SYM (no hardcoded offsets). +# Arms software breakpoints at efi_main entry, +# entry+0x200, and entry+0x300. +# +# theseus-go Like theseus-load but also issues 'continue'. +# +# SECTION DELTAS: +# Computed fresh from BOOTX64.SYM at startup — safe across rebuilds. +# Section layout is printed on load for verification. # set pagination off @@ -18,6 +50,12 @@ set demangle-style rust set breakpoint pending on set confirm off +# GDB stub connection target used by theseus-auto for reconnect after reset. +# Override before sourcing this file if using a TCP port instead of a socket: +# (gdb) python gdb.set_convenience_variable("_gdb_target", "localhost:1234") +# (gdb) source debug.gdb +# Default: unix socket used by gdb-auto.py / make debug-auto + # Make DWARF types and symbols available up-front. symbol-file build/BOOTX64.SYM @@ -26,7 +64,20 @@ import gdb import os import struct -SYMBOL_PATH = os.path.abspath("build/BOOTX64.SYM") +SYMBOL_PATH = os.path.abspath("build/BOOTX64.SYM") + +# GDB connection target for theseus-auto reconnect after system_reset. +# Defaults to the unix socket used by gdb-auto.py / make debug-auto. +# Override via: python gdb.set_convenience_variable("_gdb_target", "localhost:1234") +def _get_gdb_target(): + """Read GDB connection target, re-evaluating the convenience variable each call.""" + raw = gdb.convenience_variable("_gdb_target") + if raw is not None: + return str(raw).strip('"').strip("'") + return "localhost:1234" + +# Evaluated at source time for initial display; re-read in theseus-auto.invoke. +_GDB_SOCKET = _get_gdb_target() SIGNATURE = b"THESEUSDBGBASE!\x00" def _read_elf(path): @@ -38,26 +89,69 @@ _elf_image = _read_elf(SYMBOL_PATH) if _elf_image[:4] != b"\x7fELF": raise gdb.GdbError(f"{SYMBOL_PATH} is not an ELF file") -_E_PHOFF = struct.unpack_from(" 0). _image_base_link = min( - (vaddr - offset) for p_type, offset, vaddr, _ in _iter_program_headers() - if p_type == 1 # PT_LOAD + (vaddr - offset) + for p_type, offset, vaddr, _ in _iter_program_headers() + if p_type == 1 and offset != 0 ) -# Locate the signature inside the ELF file to recover its link-time address. +# -------------------------------------------------------------------------- +# Section headers → compute per-section deltas from image base +# (computed fresh from the actual ELF — no hardcoded constants) +# -------------------------------------------------------------------------- +_shstr_entry_off = _E_SHOFF + _E_SHSTRNDX * _E_SHENTSIZE +_shstr_data_off = struct.unpack_from(" delta from image base (link-time) + +for i in range(_E_SHNUM): + off = _E_SHOFF + i * _E_SHENTSIZE + sh_name_i = struct.unpack_from(" Example: theseus-load 0x3da60a10 + +The runtime address is printed by the bootloader on the debug port: + efi_main @ 0x + +This command: + 1. Computes the runtime image base from the runtime efi_main address. + 2. Applies per-section relocation (computed from BOOTX64.SYM, not hardcoded). + 3. Reloads DWARF symbols via add-symbol-file with all section addresses. + 4. Sets a hardware breakpoint at efi_main entry + two software sentinels. """ def __init__(self): @@ -163,38 +262,155 @@ Example: theseus-load 0x3da60a10 signature_runtime = image_base_runtime + (_signature_link_addr - _image_base_link) gdb.write("Theseus symbol loader:\n") - gdb.write(f" · runtime efi_main: 0x{runtime_entry:x}\n") - gdb.write(f" · image base (runtime):0x{image_base_runtime:x}\n") - gdb.write(f" · signature (runtime): 0x{signature_runtime:x}\n") - - text_addr = image_base_runtime + TEXT_DELTA - rdata_addr = image_base_runtime + RDATA_DELTA - data_addr = image_base_runtime + DATA_DELTA - bss_addr = image_base_runtime + BSS_DELTA - eh_addr = image_base_runtime + EH_DELTA - reloc_addr = image_base_runtime + RELOC_DELTA - - gdb.write(" · Section remap targets:\n") - gdb.write(f" .text → 0x{text_addr:x}\n") - gdb.write(f" .rdata → 0x{rdata_addr:x}\n") - gdb.write(f" .data → 0x{data_addr:x}\n") - gdb.write(f" .bss → 0x{bss_addr:x}\n") - gdb.write(f" .eh_fram→ 0x{eh_addr:x}\n") - gdb.write(f" .reloc → 0x{reloc_addr:x}\n") - - _remove_existing_symbols() - gdb.execute( + gdb.write(f" · runtime efi_main: 0x{runtime_entry:x}\n") + gdb.write(f" · image base (runtime): 0x{image_base_runtime:x}\n") + gdb.write(f" · signature (runtime): 0x{signature_runtime:x}\n") + + # Build add-symbol-file command with all non-debug sections + # that have a non-zero address (skip debug sections — GDB handles + # those automatically from the ELF's DWARF). + SKIP_PREFIXES = (".debug_", ".shstrtab") + sections = { + name: image_base_runtime + delta + for name, delta in _section_deltas.items() + if not any(name.startswith(p) for p in SKIP_PREFIXES) + } + + if ".text" not in sections: + raise gdb.GdbError("No .text section found in symbol file") + + text_addr = sections.pop(".text") + + gdb.write(" · Section runtime addresses:\n") + gdb.write(f" .text = 0x{text_addr:x}\n") + extra_args = [] + for sname, saddr in sorted(sections.items()): + gdb.write(f" {sname:20s} = 0x{saddr:x}\n") + extra_args.append(f"-s {sname} 0x{saddr:x}") + + cmd = ( f"add-symbol-file {SYMBOL_PATH} 0x{text_addr:x} " - f"-s .rdata 0x{rdata_addr:x} " - f"-s .data 0x{data_addr:x} " - f"-s .bss 0x{bss_addr:x} " - f"-s .eh_fram 0x{eh_addr:x} " - f"-s .reloc 0x{reloc_addr:x}", - to_string=True, + + " ".join(extra_args) ) + _remove_existing_symbols() + gdb.execute(cmd, to_string=True) + gdb.write(" · Symbols loaded.\n") + _set_breakpoints(runtime_entry) - gdb.write("⛳ Breakpoints armed on theseus_efi::efi_main (entry [HW], +0x200, +0x300). Reset or rerun so they trigger.\n") + gdb.write("⛳ Ready. Reset or rerun the guest so efi_main fires.\n") + gdb.write(" (call 'continue' or 'c' to run)\n") TheseusLoadCommand() + + +class TheseusGoCommand(gdb.Command): + """Shortcut: theseus-load then continue. + +Usage: theseus-go + +Equivalent to: + theseus-load + continue +but issued in the correct synchronous context to avoid the +'target is running' race that can occur in batch/script mode. +""" + + def __init__(self): + super().__init__("theseus-go", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + gdb.execute(f"theseus-load {arg}", to_string=False) + gdb.execute("continue", to_string=False) + +TheseusGoCommand() + + +class TheseusAutoCommand(gdb.Command): + """Fully automated Theseus debug session — no address argument needed. + +Usage: theseus-auto + +Workflow: + 1. Sets a hardware watchpoint on the debug mailbox sentinel + (physical address 0x7008, value 0xDEADBEEFCAFEF00D). + 2. Issues 'continue' — UEFI boots, efi_main writes its runtime address + to 0x7000 then writes the magic to 0x7008. + 3. Watchpoint fires. theseus-auto reads the runtime efi_main address from + 0x7000, removes the watchpoint, and calls theseus-load with it. + 4. Returns to GDB prompt. Execution is stopped inside efi_main (at the + mailbox write instruction) with full Rust source-level symbols loaded. + +No reset needed — efi_main is caught on its first execution. QEMU does not +need to be started with -S; gdb-auto.py starts it running. + +Requirements: + - Kernel built with debug mailbox support: + bootloader/src/main.rs — mailbox write at efi_main entry + shared/src/constants.rs — debug_mailbox constants + +Recommended via make: + $ make debug-auto # starts everything automatically + +Manual (any QEMU session with GDB stub, -S optional): + $ make debug # QEMU paused on :1234 + $ gdb -x debug.gdb + (gdb) target remote localhost:1234 + (gdb) theseus-auto +""" + + # Mailbox layout (matches shared/src/constants.rs :: debug_mailbox) + MAILBOX_PHYS = 0x7000 + ADDR_OFFSET = 0x00 # u64: runtime efi_main address + MAGIC_OFFSET = 0x08 # u64: sentinel written after address + MAGIC_VALUE = 0xDEADBEEFCAFEF00D + + def __init__(self): + super().__init__("theseus-auto", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + addr_ptr = self.MAILBOX_PHYS + self.ADDR_OFFSET + magic_ptr = self.MAILBOX_PHYS + self.MAGIC_OFFSET + magic_expr = f"*(unsigned long long*)0x{magic_ptr:x} == 0x{self.MAGIC_VALUE:x}" + + gdb.write(f"theseus-auto: setting watchpoint on mailbox sentinel " + f"(*0x{magic_ptr:x} == 0x{self.MAGIC_VALUE:x})...\n") + + # Hardware watchpoint — fires when the sentinel is written + wp = gdb.Breakpoint(magic_expr, gdb.BP_WATCHPOINT, gdb.WP_WRITE, + internal=False) + + gdb.write("theseus-auto: continuing until mailbox is written...\n") + gdb.execute("continue", to_string=False) + + # After this returns we're stopped at the watchpoint. + # Read the runtime efi_main address. + try: + runtime_addr = int( + gdb.parse_and_eval(f"*(unsigned long long*)0x{addr_ptr:x}") + ) & 0xFFFFFFFFFFFFFFFF + except gdb.error as e: + raise gdb.GdbError( + f"theseus-auto: failed to read mailbox address: {e}" + ) + + gdb.write(f"theseus-auto: mailbox fired — " + f"runtime efi_main = 0x{runtime_addr:x}\n") + + # Remove the watchpoint + wp.delete() + + # We are stopped inside efi_main right now (the watchpoint fired while + # efi_main was executing the mailbox write). Load symbols — this gives + # us source-level debug for the rest of this run. The breakpoints set + # by theseus-load are armed for any future efi_main invocations. + # + # We do NOT issue continue here: execution is already inside efi_main + # with full symbols loaded. The user is exactly where they want to be. + # They can step, inspect locals, set additional breakpoints, then 'c'. + gdb.execute(f"theseus-load 0x{runtime_addr:x}", to_string=False) + gdb.write("theseus-auto: ✅ stopped inside efi_main with symbols loaded.\n") + gdb.write(" Use 'stepi', 'next', 'c', or set more breakpoints.\n") + +TheseusAutoCommand() end diff --git a/docs/axioms/debug.md b/docs/axioms/debug.md index 537f732..04d4d13 100644 --- a/docs/axioms/debug.md +++ b/docs/axioms/debug.md @@ -52,7 +52,43 @@ Affected modules: - `kernel/src/panic.rs` - `kernel/src/logging/*` -## A3: The runtime monitor is a first-class inspection surface +## A3: The GDB debug mailbox provides a stable physical address for runtime efi_main discovery + +**REQUIRED** + +`efi_main` writes its own runtime virtual address to physical `0x7000` and a +magic sentinel (`0xDEADBEEFCAFEF00D`) to physical `0x7008` as the very first +action at entry, before any UEFI call. This allows GDB to discover the correct +load address via a hardware watchpoint without a probe-then-restart workflow. + +The page at `0x7000` is reserved via `AllocateType::Address` so the UEFI +firmware records ownership in the memory map. The sentinel is written *after* +the address so a watchpoint on `0x7008` guarantees the address at `0x7000` is +already valid when it fires. + +Layout: +``` +0x7000 + 0x00 u64 runtime efi_main virtual address +0x7000 + 0x08 u64 magic sentinel 0xDEADBEEFCAFEF00D +``` + +Implements / evidence: +- `bootloader/src/main.rs` — mailbox write at top of `efi_main` +- `shared/src/constants.rs::debug_mailbox` — address and magic constants + +Related plans: +- `../plans/observability.md` + +Tooling: +- `debug.gdb::theseus-auto` — GDB command that uses this mailbox +- `scripts/gdb-auto.py` — pexpect driver for fully automated sessions +- `make debug-auto` — one-command entry point + +Affected modules: +- `bootloader/src/main.rs` +- `shared/src/constants.rs` + +## A4: The runtime monitor is a first-class inspection surface **REQUIRED** diff --git a/docs/development-and-debugging.md b/docs/development-and-debugging.md index 2a872e8..21e57f4 100644 --- a/docs/development-and-debugging.md +++ b/docs/development-and-debugging.md @@ -59,13 +59,77 @@ For one-shot QMP control against the host-side relay socket, use: ``` ## Debugging with GDB -- Launch QEMU with `QEMU_OPTS="-S -s"` to pause CPU 0 and listen on TCP 1234. -- Use the provided script `debug.gdb` as a starting point: - ```bash - gdb -x debug.gdb - ``` -- Useful breakpoints: `kernel_entry`, `environment::continue_after_stack_switch`, `interrupts::handler_timer`. -- Inspect the bootloader-to-kernel handoff by examining the pointer in `RDI` right before `kernel_entry` runs. + +### Automated session (recommended) + +```bash +make debug-auto +``` + +That's it. The script (`scripts/gdb-auto.py`) will: + +1. Start QEMU with a GDB stub on TCP :1251, kept alive in a tmux pane. +2. Spawn GDB via pexpect (drives it as a real interactive TTY — no batch-mode races). +3. Run `theseus-auto` — a GDB Python command that sets a hardware watchpoint on + the **debug mailbox** at physical `0x7008`. When `efi_main` starts it writes + its own runtime address to `0x7000` then the magic sentinel to `0x7008`; + the watchpoint fires and `theseus-auto` loads DWARF symbols automatically. +4. Drop you into interactive GDB, stopped inside `efi_main` with full Rust + source-level symbols. No address copying, no probe run, works every boot. + +Requires `pexpect` and a `tmux` session named `theseus` (created automatically): + +```bash +pip install --break-system-packages pexpect +``` + +Non-interactive CI mode (exits after verifying breakpoint + printing backtrace): + +```bash +make debug-auto-ci +``` + +### Manual session + +If you want direct GDB control, or are debugging something before `efi_main`: + +```bash +make debug # QEMU paused on :1234 with GDB stub +gdb -x debug.gdb # in a separate terminal +``` + +Then at the GDB prompt: + +``` +(gdb) target remote localhost:1234 +(gdb) theseus-auto # watchpoint → symbols → stop at efi_main automatically +``` + +Or, if you need to load symbols at a specific address manually: + +``` +(gdb) continue # let UEFI run; read "efi_main @ 0x..." from debugcon +(gdb) theseus-load 0x # load symbols at runtime address +``` + +`debug.gdb` provides three commands: + +| Command | What it does | +|---------|-------------| +| `theseus-auto` | Fully automated: watchpoint on mailbox → capture address → load symbols. No argument. | +| `theseus-load ` | Load DWARF at given runtime `efi_main` address; arms breakpoints at entry, +0x200, +0x300. | +| `theseus-go ` | Like `theseus-load` but also issues `continue`. | + +Section deltas are computed dynamically from `build/BOOTX64.SYM` on every GDB +startup — no hardcoded offsets that go stale after rebuilds. + +### Useful breakpoints + +- `kernel_entry` — first kernel code after ExitBootServices +- `environment::continue_after_stack_switch` — post-stack-switch environment init +- `interrupts::handler_timer` — LAPIC timer interrupt path + +Inspect the bootloader-to-kernel handoff by examining `RDI` just before `kernel_entry` runs (it holds the `*const Handoff` pointer). ## Logging - Macros (`log_error!`, `log_warn!`, `log_info!`, `log_debug!`, `log_trace!`) live in `kernel/src/logging`. diff --git a/docs/plans/phase1-cpu-platform.md b/docs/plans/phase1-cpu-platform.md new file mode 100644 index 0000000..eccd44f --- /dev/null +++ b/docs/plans/phase1-cpu-platform.md @@ -0,0 +1,517 @@ +# Phase 1 — CPU & Platform Hardening + +Detailed task breakdown for Phase 1 of the TheseusOS roadmap. +Last updated: 2026-03-23 based on full code audit of all relevant modules. + +Tasks are marked: +- `[ ] TODO` — Not started +- `[~] IN PROGRESS` — Active work +- `[x] DONE` — Complete and binding + +Leaf tasks include a **Spec** and **Completion Criteria**. + +--- + +## 1.1 Memory Subsystem — Understand & Refactor + +### What we actually have (audit findings) + +- **`BootFrameAllocator`** — walks UEFI memory descriptors, linear scan, reserved pool of 16 frames. Boot-only, one-way. +- **`PhysicalMemoryManager`** — bitmap first-fit, global `Mutex>`. Initialized post-heap from handoff. Has `alloc_frame`, `free_frame`, `alloc_contiguous`. Solid. +- **`MemoryManager`** — boot-time PML4 builder. Transitional. After boot it's done, no runtime API. +- **`mapping.rs`** — a library of helpers that take `&mut impl FrameSource`. Boot-context only. No runtime mapper exists. +- **`TemporaryWindow`** — single scratch VA slot (`0xFFFF_FFFE_0000_0000`). One page at a time. Exists, works. +- **`PageTableBuilder`** — convenience wrapper over mapping helpers. Still boot-context. +- **`DMA allocator`** — contiguous physical allocation via `alloc_contiguous` + DMA pool on top. Works. +- **IST stacks** — 4× 16 KiB static arrays in `.bss.stack`, already mapped and wired into TSS. **Done.** + +**VA layout (all hardcoded):** +``` +0xFFFF800000000000 PHYS_OFFSET (linear physmap) +0xFFFF_FF80_0000_0000 ACPI window +0xFFFF_FFFE_0000_0000 TemporaryWindow +0xFFFFFFFF80000000 KERNEL_VIRTUAL_BASE +0xFFFFFFFF90000000 Framebuffer +0xFFFFFFFFA0000000 TEMP_HEAP +0xFFFFFFFFB0000000 KERNEL_HEAP +``` + +**Gaps:** +- No VA allocator — all virtual addresses are hardcoded constants +- No runtime page table manager — post-boot there is no map/unmap API +- The boot-to-runtime handoff (`record_boot_consumed_region` path) works but has no overlap validation +- No kernel stack allocator — only the boot stack and the 4 static IST stacks exist + +--- + +### 1.1.1 [x] DONE — Physical frame allocator, DMA, TemporaryWindow + +Solid. No action needed. + +--- + +### 1.1.2 Virtual Address Space Allocator + +`[ ] TODO` + +**Prerequisites:** None — this is pure bookkeeping with no hardware dependency. + +**Context:** +Every VA region is a hardcoded constant. To dynamically allocate kernel stacks, map MMIO for new drivers, or support per-process page tables, we need a VA allocator that knows the kernel's address space layout and can carve out new ranges. + +**Spec:** +Implement a `KernelVaAllocator` in a new `kernel/src/memory/va_alloc.rs`: +- Manages a reserved window of kernel VA space, e.g. `0xFFFF900000000000..0xFFFFB00000000000` (2 TiB, well away from existing hardcoded regions) +- `alloc_va(size: u64, align: u64) -> Option` — returns aligned VA range of `size` bytes +- `free_va(base: u64, size: u64)` — returns range to pool +- Backed by a simple bump allocator to start; a free-list for reclamation in a follow-up +- Global singleton behind a `Mutex`; initialized during boot before any dynamic mappings are needed +- **Does NOT allocate physical frames** — that's the caller's job + +**Completion Criteria:** +- `alloc_va` / `free_va` compile and pass unit tests +- Two calls to `alloc_va` return non-overlapping ranges +- Allocations respect requested alignment +- Used for at least one real allocation (kernel stack from 1.1.4 below) +- The reserved window is documented in `docs/axioms/memory.md` as part of VA layout + +--- + +### 1.1.3 Runtime Page Table Manager + +`[ ] TODO` + +**Prerequisites:** 1.1.2 (VA allocator), `PhysicalMemoryManager` (done) + +**Context:** +`mapping.rs` works only at boot time — callers pass in `&mut impl FrameSource` and the helpers build tables directly. After boot there is no API to map or unmap anything. The `PersistentFrameAllocator` exists but isn't wired into any mapper. + +**What we can reuse:** `mapping.rs` helpers. The `PersistentFrameAllocator`. `PHYS_OFFSET` for translating page table frame addresses to virtual. + +**Spec:** +Implement a `KernelMapper` type in `kernel/src/memory/runtime_mapper.rs`: +- Constructed from the current CR3 and `PHYS_OFFSET` (`x86_64::OffsetPageTable` under the hood) +- `map_page(va: u64, pa: u64, flags: PageTableFlags) -> Result<(), MapError>` — maps one 4K page; allocates page table frames from `PersistentFrameAllocator` +- `map_range(va: u64, pa: u64, size: u64, flags: PageTableFlags) -> Result<(), MapError>` — maps contiguous range +- `unmap_page(va: u64) -> Result` — unmaps, returns PA; does NOT free the physical frame (caller decides) +- `translate(va: u64) -> Option` — walks page tables, returns PA +- After each map/unmap: calls `invlpg` on the affected VA +- Global singleton behind `Mutex`; initialized after high-half transition completes +- **Out of scope:** TLB shootdowns across CPUs (SMP, Phase 1.7) + +**Completion Criteria:** +- `map_page`, `unmap_page`, `translate` work for 4K pages in kernel VA +- Mapping an already-mapped VA returns `Err(MapError::AlreadyMapped)`, not a panic +- After `unmap_page`, `translate` returns `None` and `invlpg` was called +- Test: map a fresh frame, write to it, unmap, confirm translate returns None +- No use of `BootFrameAllocator` at runtime + +--- + +### 1.1.4 Kernel Stack Allocator + +`[ ] TODO` + +**Prerequisites:** 1.1.2 (VA allocator), 1.1.3 (runtime mapper) + +**Context:** +`stack.rs` today contains only one function: `switch_to_kernel_stack_and_jump`. The boot stack is a static 64 KiB array in `.bss.stack`, mapped during bring-up. The 4 IST stacks are also static arrays. There is no facility to allocate additional stacks at runtime — needed for kernel threads (Phase 2) and any future per-CPU stacks. + +**Spec:** +Implement in `kernel/src/memory/stack_alloc.rs` (or extend `stack.rs`): + +```rust +pub struct StackRegion { + pub top: u64, // Initial RSP value (high address, 16-byte aligned) + pub bottom: u64, // Base of usable stack + pub guard: u64, // Guard page VA (= bottom - PAGE_SIZE) + pub va_base: u64,// Start of VA reservation (= guard) + pub size: u64, // Usable stack size in bytes +} + +pub fn alloc_kernel_stack(size: u64) -> Result +pub fn free_kernel_stack(region: StackRegion) -> Result<(), AllocError> +``` + +- `alloc_kernel_stack`: + 1. Uses `KernelVaAllocator` to reserve `size + PAGE_SIZE` bytes of VA + 2. Maps guard page as **not present** (i.e., just doesn't map it — any access faults) + 3. Allocates `size / PAGE_SIZE` frames from `PhysicalMemoryManager` + 4. Maps them via `KernelMapper` + 5. Returns `StackRegion` with `top = va_base + PAGE_SIZE + size` (aligned down to 16 bytes) + +- `free_kernel_stack`: + 1. Unmaps all stack pages via `KernelMapper` and frees frames to `PhysicalMemoryManager` + 2. Returns VA region to `KernelVaAllocator` + +**Completion Criteria:** +- `alloc_kernel_stack` returns a stack where writing to the guard page triggers `#PF` +- Allocating and freeing leaves allocators in consistent state +- Stack top is 16-byte aligned (required by ABI) +- The existing static IST stacks in `gdt.rs` do NOT need to be replaced by this — they stay as-is; this is for future dynamic stacks + +--- + +### 1.1.5 Boot handoff documentation + validation + +`[ ] TODO` + +**Prerequisites:** None — this is documentation and a single assertion. + +**Context:** +The boot-to-runtime allocator handoff (`BootFrameAllocator` → `PhysicalMemoryManager` via `record_boot_consumed_region` + `drain_boot_consumed`) works but has no overlap validation and no clear prose doc. Before SMP (which allocates more frames during bring-up), this needs to be solid and documented. + +**Spec:** +- Add overlap check in `init_from_handoff`: after reserving all consumed regions, walk the bitmap and verify no free frame overlaps any consumed region (or simply assert in debug mode) +- Add a `dump_boot_consumed_log()` debug monitor command that shows what was logged (available even after init since init clears the log — maybe log a summary count before draining) +- Write a clear "Allocator Handoff" section in `docs/axioms/memory.md` explaining: + - Timeline: when BootFrameAllocator is the only allocator, when PhysicalMemoryManager comes online + - What `record_boot_consumed_region` records and why it's safe to log lazily + - What happens if a region is missed + +**Completion Criteria:** +- Debug assertion in `init_from_handoff` doesn't fire under normal boot +- `docs/axioms/memory.md` has the Allocator Handoff section +- Monitor shows a consumed-region count at boot (even if just a log line) + +--- + +## 1.2 Driver Subsystem — Formalization + +### What we actually have (audit findings) + +- **`Driver` trait + `DriverManager`** — the framework is fully defined and compiles. `probe`, `init`, `irq_handler`, `read`, `write` are all there. First-success binding. Global `Mutex`. +- **`driver_data: Option`** — defined with raw-pointer cast helpers (`set_driver_state`, `driver_state`, `driver_state_mut`). **The casts are defined but not actually used by any driver today.** PCI and xHCI both use their own globals/statics, not `driver_data`. +- **`pci.rs`** — full ECAM enumeration, BAR decoding (32-bit, 64-bit memory, I/O), capability parsing, MSI enable. **Does NOT call `DriverManager::add_device`.** xHCI is bound via hardcoded init call. +- **`handlers.rs`** — hardcoded vectors: 0x40 (APIC timer), 0x41 (serial RX), 0x50 (xHCI MSI), 0xFE (APIC error). xHCI vector calls `usb::handle_xhci_interrupt()` directly — **not through DriverManager**. +- **`framebuffer.rs`** — drawing utilities only (boot logo, heart animation). Mapped at fixed VA `0xFFFFFFFF90000000` from `map_framebuffer_alloc()` using `handoff.gop_fb_base`. Not a driver. + +--- + +### 1.2.1 Fix driver state storage + +`[ ] TODO` + +**Prerequisites:** None — isolated change + +**Context:** +`driver_data: Option` with raw-pointer casts is defined but not actually wired to any real driver state today. This is the right time to fix it before drivers start using it and the unsound pattern propagates. + +**Spec:** +Replace `driver_data: Option` in `Device` with `driver_data: Option>`: +- `set_driver_state(state: T)` — boxes and stores +- `driver_state() -> Option<&T>` — downcasts via `Any::downcast_ref` +- `driver_state_mut() -> Option<&mut T>` — downcasts via `Any::downcast_mut` +- Remove all `unsafe` raw-pointer casts from the trait methods +- Since no drivers currently use `driver_data`, there are no callers to update — clean break + +**Completion Criteria:** +- No `unsafe` raw casts in `traits.rs` +- Downcasting to the wrong type returns `None`, not UB +- Compiles with the existing (non-using) driver code unchanged +- Add a unit test: set state as `u32`, read back as `u32` (Ok), read back as `u64` (None) + +--- + +### 1.2.2 PCI enumeration feeds DriverManager + +`[ ] TODO` + +**Prerequisites:** 1.2.1 (driver_data fix) + +**Context:** +PCI enumeration works and BARs are decoded, but nothing flows to `DriverManager`. The xHCI driver is currently wired by a hardcoded call somewhere in the boot sequence, not via PCI probe. This needs to be closed before adding any new PCI driver. + +**Spec:** +In `pci.rs`, after enumerating each PCI function: +- Construct a `Device` with `DeviceId::Pci { segment, bus, device, function }` +- Set `device.class` from PCI class code (map the relevant PCI class codes to `DeviceClass` variants; add any missing variants like `Display`, `Audio`) +- Decode BARs into `DeviceResource::Memory` / `DeviceResource::Io` entries on the device +- Call `driver_manager().lock().add_device(device)` + +Then: register the xHCI driver with `DriverManager` during boot driver init, and remove the hardcoded xHCI init call. The xHCI driver's `probe()` method should match on `DeviceClass::UsbController`. + +**Completion Criteria:** +- After boot, `devices list` monitor command shows all PCI functions that `pci list` shows +- xHCI is bound via this path (its `probe()` is called by DriverManager, not hardcoded) +- BARs appear as `DeviceResource` entries on PCI devices +- No regression: USB keyboard still works end-to-end + +--- + +### 1.2.3 IRQ ownership model + +`[ ] TODO` + +**Prerequisites:** 1.2.2 (PCI → DriverManager) + +**Context:** +IRQ vectors are hardcoded in `handlers.rs`. Vector 0x50 calls `usb::handle_xhci_interrupt()` directly. There's no mechanism for a driver to claim a vector or for DriverManager to dispatch it. For MSI, each device gets its own vector — so shared IRQs aren't the issue; the issue is that vectors are hardcoded and drivers can't claim them dynamically. + +**Spec:** +- Add an IRQ vector registry: a simple array of 256 `Option` (or `Option<&'static dyn Driver + device index>`) protected by a `Mutex`, initialized to `None` +- Add `fn register_irq_handler(vector: u8, handler: fn()) -> Result<(), &'static str>` — fails if already registered +- The general interrupt dispatch path in `handlers.rs` (for non-reserved vectors like timer/error): if no registered handler, log and EOI; otherwise call the handler +- xHCI driver registers its vector during `init()` via this API +- Serial driver registers its vector during `init()` via this API +- Timer and APIC error keep their dedicated handlers (reserved vectors, not routed through the registry) +- Export `irq list` monitor command that shows registered vectors + +**What stays hardcoded for now:** Timer (0x40), APIC error (0xFE) — these are kernel internals, not driver IRQs. + +**Completion Criteria:** +- xHCI and serial register their vectors during `init()`; not hardcoded in `handlers.rs` +- Registering the same vector twice returns an error +- `irq list` shows registered vectors and a string name for each +- USB and serial still work after the refactor + +--- + +### 1.2.4 Framebuffer as a proper driver + +`[ ] TODO` + +**Prerequisites:** 1.1.3 (runtime mapper), 1.2.2 (PCI → DriverManager) + +**Context:** +`framebuffer.rs` is drawing utilities over a raw fixed VA. It's not a driver, has no ownership model, and the mapping was done at boot-time by `map_framebuffer_alloc()`. For now we don't need mode-setting or multi-display, but wrapping it in a driver gives us a clean ownership boundary and lets us get rid of the raw global VA access. + +**Note on priority:** This is lower priority than 1.2.2 and 1.2.3. If it's inconvenient, it can wait until after Phase 2 starts. + +**Spec:** +- Add `DeviceClass::Framebuffer` enum variant +- Create `kernel/src/drivers/video/framebuffer.rs` implementing `Driver`: + - `probe()`: matches `DeviceClass::Framebuffer` + - `init()`: reads FB base, size, stride, pixel format from handoff (already accessible at this point); stores as `FramebufferState` via `set_driver_state` + - Provides a `FramebufferHandle` with `write_pixel`, `fill_rect`, `blit` +- Register one synthetic `Device` with `DeviceClass::Framebuffer` during early boot (before PCI enumeration; it's a platform device not a PCI device) +- The existing drawing code in `framebuffer.rs` is refactored to go through `FramebufferHandle` +- The raw `0xFFFFFFFF90000000` VA access is encapsulated inside `drivers/video/framebuffer.rs` — still the same mapping, but no one else writes to it directly + +**Completion Criteria:** +- Boot screen renders correctly after refactor +- `devices list` shows framebuffer device as bound +- No direct writes to `0xFFFFFFFF90000000` outside the framebuffer driver +- The `framebuffer.rs` drawing functions are wrappers around `FramebufferHandle` methods + +--- + +## 1.3 TSS + IST Stacks + +`[x] DONE` + +**Audit finding:** `gdt.rs` already has a fully functional implementation: +- 4× 16 KiB static IST stacks (`IST_DF_STACK`, `IST_NMI_STACK`, `IST_MC_STACK`, `IST_PF_STACK`) +- TSS descriptor present in GDT; `load_tss()` is called (`ltr` executed) +- IST fields populated in `build_gdt_state()` with top-of-stack addresses, 16-byte aligned +- `refresh_tss_ist()` exists for runtime IST pointer updates +- IDT entries for `#NMI`, `#DF`, `#MC`, `#PF` use the IST indices + +**Nothing to do here.** Per-CPU TSS (for SMP) deferred to Phase 1.7. + +--- + +## 1.4 APIC Timer Calibration + +### What we actually have (audit findings) + +`timer.rs` configures the LAPIC timer with a `/16` divider and hardcoded initial counts (100,000 for one-shot tests, 50,000 for periodic). **No calibration is performed.** There is no reference to HPET, PIT, or TSC frequency. The values happen to work in QEMU but are meaningless on real hardware and don't give real-time semantics. + +--- + +### 1.4.1 HPET or PIT calibration reference + +`[ ] TODO` + +**Prerequisites:** 1.5.2 (LAPIC abstraction, so calibration uses the clean API) + +**Spec:** +Implement `calibrate_apic_timer() -> u64` returning `ticks_per_ms`: + +**HPET path (preferred):** +- ACPI HPET table is already parsed in `acpi/mod.rs` — check if HPET base address is available +- Map HPET MMIO via `KernelMapper` (1.1.3) — one page at base address +- Read `GCAP_ID` register: `counter_clk_period` field (femtoseconds per tick, bits [63:32]) +- Period in ns = `counter_clk_period / 1_000_000` +- Enable HPET main counter (`GEN_CONF` register, bit 0) +- Read `MAIN_COUNTER`, start LAPIC timer with a large initial count, wait until MAIN_COUNTER advances by 10ms worth of HPET ticks, read LAPIC current count, compute delta + +**PIT fallback** (if HPET not available): +- Channel 2 + port 0x61 gate trick; 1.193182 MHz known frequency +- Set channel 2 to mode 0 (one-shot), count = 11932 (~10 ms) +- Start LAPIC timer, start PIT, wait for PIT OUT (poll port 0x61 bit 5) +- Read LAPIC remaining count, compute delta + +Store result in `static APIC_TICKS_PER_MS: AtomicU64`. + +**Completion Criteria:** +- Returns a non-zero value (for QEMU: typically 100–10000 ticks/ms depending on config) +- `APIC_TICKS_PER_MS` set before scheduler tick init +- Monitor command `cpu timer` shows calibrated ticks/ms value +- Graceful fallback: if HPET unavailable, uses PIT + +--- + +### 1.4.2 Periodic scheduler tick + +`[ ] TODO` + +**Prerequisites:** 1.4.1 + +**Spec:** +After calibration, reconfigure timer in **periodic mode** at `SCHEDULER_TICK_HZ` (default 100 Hz = 10 ms/tick): +- `const SCHEDULER_TICK_HZ: u64 = 100` in `config.rs` +- `init_scheduler_tick()` computes `initial_count = APIC_TICKS_PER_MS * (1000 / SCHEDULER_TICK_HZ)`, sets APIC timer to periodic mode +- Timer ISR increments `static TICK_COUNT: AtomicU64` +- Provide `pub fn current_tick() -> u64` and `pub fn ticks_to_ms(ticks: u64) -> u64` +- TSC-Deadline mode: stretch goal; check `CpuFeatures::get().tsc_deadline` (from 1.6) and use it if available + +**Completion Criteria:** +- `TICK_COUNT` increments at ~100 Hz (verified: read count, wait in QEMU, read again) +- `ticks_to_ms` gives values consistent with calibration +- Timer ISR doesn't break monitor or serial output + +--- + +## 1.5 x2APIC Support + +### What we actually have (audit findings) + +`apic.rs` already reads `IA32_APIC_BASE` and has `ApicAccessMode { Disabled, XApic, X2Apic }` and `apic_base_info()` that returns mode. Mode detection is there. However, **all actual register access is MMIO-based** (`local_apic_read/write` use PHYS_OFFSET + base address). There are no x2APIC MSR accessors. So the kernel will GPF if firmware boots into x2APIC mode. + +--- + +### 1.5.1 Detect and report APIC mode + +`[x] DONE (partially)` + +Detection works (`apic_base_info()`, `ApicAccessMode`). Boot log should already report this. The only gap: need to verify `cpu apic` monitor command surfaces the mode clearly. + +**Remaining:** Add `ApicAccessMode` to the `cpu apic` monitor command output if not already there. + +--- + +### 1.5.2 Abstract LAPIC access behind mode-agnostic interface + +`[ ] TODO` + +**Prerequisites:** None (self-contained change to `apic.rs`) + +**Context:** +All LAPIC register reads/writes go through `local_apic_read(reg_offset)` / `local_apic_write(reg_offset, val)` which do MMIO. These two functions need to check detected mode and either use MMIO (xAPIC) or `rdmsr`/`wrmsr` at `0x800 + (reg_offset >> 4)` (x2APIC). The callers don't change. + +**Spec:** +Modify `local_apic_read` / `local_apic_write` in `apic.rs`: +- Call `apic_base_info()` once at first use; cache the result in a `static OnceCell` +- If `XApic`: current MMIO path (unchanged) +- If `X2Apic`: use `RDMSR`/`WRMSR` with MSR = `0x800 + (offset / 16)`; note x2APIC register width is 32-bit for most, 64-bit for ICR — handle ICR as a special case +- If `Disabled`: panic with a clear message +- No changes to callers (timer, EOI, APIC ID, etc.) + +**Completion Criteria:** +- Kernel boots without GPF in QEMU with `-cpu host` or `-cpu Skylake-Server,+x2apic` +- xAPIC path unchanged (QEMU default still works) +- APIC timer fires in both modes + +--- + +## 1.6 CPUID Feature Abstraction + +`[ ] TODO` + +**Prerequisites:** None — isolated, no hardware side effects + +### What we actually have (audit findings) + +`cpu.rs` uses `raw_cpuid::CpuId` for feature detection, but **checks are scattered**: +- `cpu.rs`: `CpuId::new()` used to check `has_xsave`, control register setup +- `apic.rs`: inline CPUID checks for x2APIC detection +- `interrupts/mod.rs`: likely has inline CPUID for TSC/APIC features +- No central feature cache — each check re-executes `CPUID` instruction + +**Spec:** +Implement `kernel/src/cpu_features.rs`: + +```rust +pub struct CpuFeatures { + pub x2apic: bool, + pub tsc_deadline: bool, + pub rdtscp: bool, + pub fsgsbase: bool, // needed for Phase 10 TLS + pub smep: bool, + pub smap: bool, // enable both in 1.6 if present + pub xsave: bool, + pub avx: bool, + pub avx2: bool, +} + +impl CpuFeatures { + pub fn detect() -> Self { ... } // executes CPUID, populates all fields + pub fn get() -> &'static Self // panics if detect() not called first +} +``` + +- `detect()` called once in boot sequence (before `apic.rs` initializes APIC) +- All existing scattered `CpuId::new()` calls replaced by `CpuFeatures::get().` +- Enable SMEP/SMAP in CR4 here if present (currently may be done in `cpu.rs` — consolidate) +- Monitor command `cpu features` prints all fields + +**Completion Criteria:** +- `CpuFeatures::get()` panics with clear message if called before `detect()` +- All scattered CPUID checks replaced by `CpuFeatures::get()` field accesses +- `cpu features` monitor command output matches QEMU's CPU model +- SMEP/SMAP enabled if CPU supports them (verify with CR4 read in monitor) + +--- + +## 1.7 SMP Bring-up + +`[ ] TODO — deferred to after Phase 2 scheduler` + +Not spec'd at leaf level here. Will become a sub-plan once the scheduler is stable on BSP. +High-level tasks noted for awareness: +- Parse AP LAPIC IDs from MADT (parsing exists, AP extraction needed) +- Per-CPU storage (GDT, IDT, TSS, stack, `gs`-relative) +- INIT-SIPI-SIPI sequence + 16→64-bit AP trampoline +- TLB shootdown IPI infrastructure +- AP joins scheduler runqueue + +--- + +## Revised Dependency Graph + +``` +1.6 CPUID (no deps) + └─► 1.5.2 LAPIC abstraction + └─► 1.4.1 Calibration + └─► 1.4.2 Periodic tick + +1.1.2 VA Allocator (no deps) + └─► 1.1.3 Runtime Mapper + └─► 1.1.4 Stack Allocator + └─► 1.2.4 Framebuffer Driver (also needs 1.2.2) + +1.2.1 Fix driver_data (no deps) + └─► 1.2.2 PCI → DriverManager + └─► 1.2.3 IRQ ownership + +1.1.5 Boot handoff docs (no deps, do anytime) +1.3 TSS/IST (DONE) +1.5.1 APIC mode detection (DONE, minor monitor polish) +``` + +## Suggested Order of Work + +1. **1.6 CPUID** — small, isolated, no hardware risk, unblocks x2APIC +2. **1.5.2 LAPIC abstraction** — unblocks calibration, zero regression risk (xAPIC path unchanged) +3. **1.4.1 + 1.4.2** — calibration + scheduler tick; unblocks Phase 2 +4. **1.2.1 Fix driver_data** — isolated, clean break (nothing uses it yet) +5. **1.1.2 VA Allocator** — pure Rust bookkeeping, no hardware +6. **1.1.3 Runtime Mapper** — needs VA alloc + persistent frame alloc (both done) +7. **1.1.4 Stack Allocator** — needs 1.1.2 + 1.1.3 +8. **1.2.2 PCI → DriverManager** — closes the PCI loop, removes hardcoded xHCI init +9. **1.2.3 IRQ ownership** — cleans up handlers.rs +10. **1.2.4 Framebuffer driver** — lowest urgency, can do last or skip to Phase 2 +11. **1.1.5 Boot handoff docs** — anytime, no blockers + +SMP (1.7) waits for Phase 2 scheduler. diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..4a0c2c4 --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,220 @@ +# TheseusOS Roadmap + +**Goal:** A POSIX-compatible enough kernel to compile and run core Unix tools (coreutils, busybox, a shell) without crippling them in the process. + +This document is a living roadmap. It reflects what's been built, what comes next, and the long arc toward a usable POSIX surface. Phases are roughly sequential but some work can happen in parallel. + +--- + +## Phase 0 — Foundation ✅ (Done) + +Everything here is merged and working. This is what the vibe coding experiment has built so far. + +**Boot & UEFI** +- [x] UEFI bootloader (custom, no GRUB/Limine) +- [x] Kernel ELF loaded from ESP +- [x] ExitBootServices + handoff struct to kernel +- [x] Higher-half kernel mapping (0xFFFF800000000000+) +- [x] GDT with kernel code/data segments + +**Memory** +- [x] UEFI memory map ingestion +- [x] Physical frame allocator +- [x] Kernel heap (linked_list_allocator) +- [x] DMA allocator + DMA pool +- [x] Temporary mapping window + +**CPU & Interrupts** +- [x] IDT + NMI handlers +- [x] xAPIC initialized +- [x] APIC timer interrupts firing (BSP) +- [x] ACPI + MADT parsing + +**Drivers** +- [x] Framebuffer (UEFI GOP) +- [x] Serial (debugcon + UART) +- [x] PCI enumeration +- [x] USB xHCI driver (full: rings, MSI/MSI-X, HID boot protocol) +- [x] USB keyboard (HID → ASCII key events) + +**Tooling & DX** +- [x] Debug monitor (serial shell with commands: cpu, memory, pci, usb, tables, io, devices) +- [x] Logging subsystem with verbosity filter +- [x] theseus-qemu runner (profiles, relays, timeout, build-before-run) +- [x] QEMU relay sockets (serial, debugcon, HMP, QMP) +- [x] GDB mailbox protocol + gdb-auto.py automation +- [x] tmux live loop for interactive debugging +- [x] Test framework (bare metal + kernel tests) + +**Documentation** +- [x] Axioms (boot, memory, arch-x86_64, debug) +- [x] Plans (boot-flow, memory, interrupts-and-platform, drivers-and-io, observability, x2apic-prep) +- [x] docs/index.md, docs/map.md, AGENTS.md + +--- + +## Phase 1 — CPU & Platform Hardening + +Before we can do multi-process work, the CPU foundations need to be solid. + +- [ ] **APIC timer calibration** — measure ticks/ms against HPET or PIT; needed for real preemption +- [ ] **x2APIC support** — plan exists (`x2apic-prep.md`), just needs implementation +- [ ] **TSS + IST stacks** — separate stacks for NMI/DF/MCE; required for safe exception handling +- [ ] **CPUID feature abstraction** — centralized feature detection (SSE, AVX, TSC-Deadline, x2APIC, etc.) +- [ ] **SMP bring-up** — wake APs via INIT/SIPI, per-CPU GDT/IDT/TSS, IPI infrastructure + +--- + +## Phase 2 — Scheduling & Kernel Threads + +The kernel needs to be able to walk and chew gum at the same time. + +- [ ] **Kernel threads** — switchable execution contexts with their own stacks +- [ ] **Context switching** — save/restore registers (GP + optional SSE), stack pointer swap +- [ ] **Preemptive scheduler** — timer-driven, round-robin to start; pluggable later +- [ ] **Per-CPU runqueues** — one queue per AP; work-stealing later +- [ ] **Mutex / spinlock / wait queues** — synchronization primitives that interact with the scheduler +- [ ] **Idle tasks** — per-CPU idle threads (HLT loop) + +--- + +## Phase 3 — User-Mode & Address Spaces + +This is where the kernel becomes an OS rather than a fancy bootloader. + +- [ ] **Per-process page tables** — each process gets its own CR3; kernel mapped in upper half of all +- [ ] **User address space layout** — conventional ELF layout: text/data/BSS/heap/stack below ~0x7FFFFFFFFFFF +- [ ] **Ring 3 entry** — SYSCALL/SYSRET setup (IA32_STAR, IA32_LSTAR, IA32_FMASK) +- [ ] **User-mode stack** — set up at exec time; stack guard page +- [ ] **SMEP/SMAP** — enable supervisor mode execution/access protection (CR4) + +--- + +## Phase 4 — System Calls (POSIX Surface — Core) + +The syscall interface is the contract. Start minimal, add as tools demand it. + +**Process lifecycle** +- [ ] `exit` / `exit_group` +- [ ] `fork` (or `clone` as the primitive) +- [ ] `exec` / `execve` — load a new ELF into the current address space +- [ ] `wait` / `waitpid` +- [ ] `getpid` / `getppid` + +**I/O fundamentals** +- [ ] `read` / `write` +- [ ] `open` / `close` +- [ ] `dup` / `dup2` +- [ ] File descriptor table per-process (stdin/stdout/stderr wired at init) + +**Memory** +- [ ] `mmap` (anonymous first — needed by malloc) +- [ ] `munmap` +- [ ] `brk` (optional if mmap anonymous is solid) + +**Signals (minimal)** +- [ ] `kill` +- [ ] `signal` / `sigaction` +- [ ] `SIGKILL`, `SIGTERM`, `SIGSEGV`, `SIGCHLD` + +--- + +## Phase 5 — VFS & Filesystems + +No OS is useful without a filesystem. Start in-memory, add real storage later. + +- [ ] **VFS layer** — inode/dentry abstraction; pluggable backends +- [ ] **tmpfs / ramfs** — memory-backed FS; used for initrd and /tmp +- [ ] **devfs / /dev stubs** — `/dev/null`, `/dev/zero`, `/dev/tty`, `/dev/console` +- [ ] **procfs stubs** — `/proc/self`, `/proc/self/maps`, `/proc/self/exe` (enough for musl) +- [ ] **FAT32 driver** — re-use bootloader knowledge; gives access to ESP / disk images +- [ ] **Pipes** — `pipe()` syscall; anonymous pipe between processes +- [ ] **ext2** — read-only first; gives access to a standard Linux disk image format + +--- + +## Phase 6 — ELF Loader & Init + +- [ ] **ELF64 static loader** — parse PT_LOAD segments, map into user address space, jump to entry +- [ ] **Program interpreter field** — detect and reject dynamic ELFs with a clear error (until dynamic linking is ready) +- [ ] **Auxiliary vector (auxv)** — pass AT_PHDR, AT_ENTRY, AT_PAGESZ etc. to new process +- [ ] **Init process (PID 1)** — statically linked, minimal; brings up /dev, mounts tmpfs, execs shell +- [ ] **Dynamic linker** (stretch) — load interpreter, resolve shared libs; needed for non-musl-static binaries + +--- + +## Phase 7 — libc Port + +Port a libc so we can compile tools against it. + +- [ ] **musl libc** — preferred: clean, static-linking-first, small syscall surface, actively ported to new kernels +- [ ] **Syscall compatibility pass** — audit musl's syscall usage; implement or stub everything it needs +- [ ] **Toolchain** — cross-compiler targeting `x86_64-theseus` (custom target JSON, sysroot) +- [ ] **newlib** (alternative) — simpler but less complete; good fallback if musl is painful + +--- + +## Phase 8 — Core Userspace Tools + +This is the "close enough to POSIX" milestone. + +- [ ] **Busybox** — single binary with sh, ls, cat, grep, echo, cp, mv, mkdir, etc. Compile against musl. +- [ ] **dash** — minimal POSIX shell; lighter than bash, easier to port +- [ ] **coreutils** (stretch) — GNU or uutils-coreutils (Rust); richer than busybox but more syscall surface +- [ ] **Self-hosting build** — can we build a simple C program inside TheseusOS itself? + +--- + +## Phase 9 — Network Stack + +Optional for the core POSIX goal but needed for anything actually useful. + +- [ ] **virtio-net driver** — QEMU virtio NIC; simplest possible NIC to implement +- [ ] **e1000 driver** — alternative; well-documented, real hardware target +- [ ] **TCP/IP stack** — port smoltcp (Rust, no_std-friendly) or lwIP +- [ ] **BSD socket API** — `socket`, `bind`, `connect`, `listen`, `accept`, `send`, `recv` +- [ ] **DNS stub** — enough for `getaddrinfo` to work + +--- + +## Phase 10 — Polish & Deeper POSIX Compliance + +The long tail. Most tools will work after Phase 8; this phase makes them work *well*. + +- [ ] **pthreads** — POSIX threads (`clone` with CLONE_THREAD, per-thread TLS via `arch_prctl`) +- [ ] **TLS (Thread-Local Storage)** — `arch_prctl(ARCH_SET_FS)`, FS.base MSR +- [ ] **mmap file-backed** — map files directly into address space +- [ ] **Proper signals** — signal masks, `sigprocmask`, `SA_RESTART`, sigaltstack +- [ ] **Terminal emulation** — proper TTY/PTY (`/dev/tty`, `tcgetattr`/`tcsetattr`); needed by interactive shells +- [ ] **`/proc` expansion** — `/proc/cpuinfo`, `/proc/meminfo`, `/proc/net/...` +- [ ] **`/sys` stubs** — minimal sysfs enough for tools that probe it +- [ ] **User/group IDs** — UID/GID, `getuid`, `setuid` etc. (even if always root for now) + +--- + +## Open Questions / Risks + +- **Fork vs spawn:** True `fork()` (copy-on-write) is expensive to implement correctly. Many modern minimal OSes implement `posix_spawn` as the primitive and fake `fork+exec`. Worth deciding early. +- **Dynamic linking:** Static musl gets you far. Dynamic linking is a lot of work (dynamic linker, GOT/PLT, shared lib loading). Probably defer until after Phase 8. +- **SMP complexity:** Multi-core makes everything harder (TLB shootdowns, per-CPU state, lock contention). Can defer AP bring-up until after the scheduler is solid on BSP. +- **Storage:** Need a real disk image (QEMU `-drive`) for anything beyond ramfs. FAT32 from the ESP is a natural first target. +- **Capability / security model:** Even a simple UID=0-only model needs to be decided early or it'll be painful to retrofit. + +--- + +## Rough Timeline Sense + +Not commits to dates — just a feeling for scale: + +| Phase | Effort | +|-------|--------| +| 1 — CPU hardening | Small-medium (x2APIC plan exists, SMP is the hard part) | +| 2 — Scheduling | Medium (context switch + preemption is fiddly) | +| 3 — User-mode | Medium (mostly CPU plumbing) | +| 4 — Syscalls | Medium-large (lots of ground to cover, but well-documented) | +| 5 — VFS | Large (abstraction design matters a lot here) | +| 6 — ELF loader | Small-medium (static ELF is actually not that bad) | +| 7 — libc port | Medium (musl is cooperative; toolchain setup is the annoying part) | +| 8 — Core tools | Small if libc works (mostly build system wrangling) | +| 9 — Network | Large | +| 10 — Polish | Ongoing forever | diff --git a/scripts/gdb-auto.py b/scripts/gdb-auto.py new file mode 100755 index 0000000..a3555a4 --- /dev/null +++ b/scripts/gdb-auto.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +""" +gdb-auto.py — One-command GDB debug session for TheseusOS. + +Usage: + python3 scripts/gdb-auto.py [options] + make debug-auto # interactive (default) + make debug-auto-ci # non-interactive CI mode + +Workflow: + 1. Starts QEMU running (no -S) with a TCP GDB stub on localhost:1251, + keeping it alive in a tmux pane (required: tmux session named 'theseus', + created automatically if absent). + 2. Spawns GDB via pexpect and sources debug.gdb. + 3. Connects to QEMU and runs 'theseus-auto', which: + a. Sets a hardware watchpoint on the debug mailbox sentinel at 0x7008. + b. Issues continue — UEFI boots, efi_main writes its runtime address + to 0x7000 then writes magic 0xDEADBEEFCAFEF00D to 0x7008. + c. Watchpoint fires. theseus-auto reads the address, calls theseus-load + with correct per-section deltas (computed from BOOTX64.SYM). + d. Returns to GDB prompt stopped inside efi_main with full Rust symbols. + 4. In interactive mode: hands off to pexpect.interact() for live GDB use. + In --no-interactive mode: prints RIP + backtrace and exits (CI-friendly). + +Key properties: + - Single QEMU run, no probe-then-restart. + - Address captured from the running binary — correct every boot regardless + of UEFI load address variation. + - Hard timeout on every wait — never hangs silently. + - Ctrl-C via pexpect.sendcontrol reliably interrupts the remote target. + +Requirements: + pip install --break-system-packages pexpect + Kernel built with debug mailbox support (see shared/src/constants.rs::debug_mailbox + and bootloader/src/main.rs efi_main entry). + +tmux: + QEMU runs in a tmux pane so it survives across bwrap sandbox exec sessions. + Default session: 'theseus', pane 0 = QEMU, pane 1 = GDB output. + Override with --tmux, --qemu-pane, --gdb-pane. +""" + +import argparse +import os +import subprocess +import sys +import time + +try: + import pexpect +except ImportError: + sys.exit( + "ERROR: pexpect not installed.\n" + "Run: pip install --break-system-packages pexpect" + ) + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- +WORKSPACE = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +DEBUGCON_LOG = "/tmp/theseus-gdb-auto-debugcon.log" +GDB_SOCKET = "/tmp/theseus-gdb-auto.sock" # unused: kept for reference +GDB_TARGET = "localhost:1251" # TCP port — survives system_reset +GDB_SCRIPT = os.path.join(WORKSPACE, "debug.gdb") +SYMBOL_FILE = os.path.join(WORKSPACE, "build", "BOOTX64.SYM") +OVMF_CODE = os.path.join(WORKSPACE, "OVMF", "OVMF_CODE.fd") +OVMF_VARS = os.path.join(WORKSPACE, "build", "OVMF_VARS.fd") +DISK_IMG = os.path.join(WORKSPACE, "build", "disk.img") + +GDB_PROMPT_RE = r"\(gdb\)" + +DEFAULT_TMUX = "theseus" +DEFAULT_TIMEOUT_BOOT = 120 # seconds to wait for mailbox + breakpoint +DEFAULT_QEMU_PANE = 0 +DEFAULT_GDB_PANE = 1 + + +# --------------------------------------------------------------------------- +# QEMU command builder +# --------------------------------------------------------------------------- +def qemu_cmd(paused: bool, gdb_target: str, debugcon_log: str) -> list[str]: + cmd = [ + "qemu-system-x86_64", + "-machine", "q35,accel=kvm:tcg,kernel-irqchip=split", + "-cpu", "max", + "-smp", "4", + "-m", "2G", + "-drive", f"if=pflash,format=raw,readonly=on,file={OVMF_CODE}", + "-drive", f"if=pflash,format=raw,file={OVMF_VARS}", + "-device", "isa-debug-exit,iobase=0xf4,iosize=0x04", + "-device", "isa-debugcon,chardev=debugcon", + "-chardev", f"file,id=debugcon,path={debugcon_log}", + "-display", "none", + "-drive", f"if=none,id=nvme0,file={DISK_IMG},format=raw", + "-device", "nvme,drive=nvme0,serial=deadbeef", + "-device", "pcie-root-port,id=rp0,slot=0,chassis=1", + "-device", "pcie-root-port,id=rp1,slot=1,chassis=2", + "-device", "pcie-root-port,id=rp2,slot=2,chassis=3", + "-device", "virtio-gpu-pci,bus=rp0", + "-device", "qemu-xhci,id=xhci0", + "-device", "usb-kbd,bus=xhci0.0", + "-device", "usb-mouse,bus=xhci0.0", + "-device", "virtio-net-pci,id=nic0,bus=rp2", + "-nic", "none", + # GDB stub via TCP — survives system_reset (unlike unix sockets) + "-gdb", f"tcp::{gdb_target.split(':')[-1]}", + ] + if paused: + cmd.append("-S") + else: + cmd.append("-no-reboot") + return cmd + + +# --------------------------------------------------------------------------- +# tmux helpers +# --------------------------------------------------------------------------- +def tmux_send(session: str, pane: int, text: str): + subprocess.run( + ["tmux", "send-keys", "-t", f"{session}:0.{pane}", text, "Enter"], + check=True, + ) + + +def tmux_capture(session: str, pane: int) -> str: + r = subprocess.run( + ["tmux", "capture-pane", "-t", f"{session}:0.{pane}", "-p"], + capture_output=True, text=True, + ) + return r.stdout + + +def tmux_kill_pane_process(session: str, pane: int): + """Send Ctrl-C to whatever is running in the pane.""" + subprocess.run( + ["tmux", "send-keys", "-t", f"{session}:0.{pane}", "C-c"], + check=False, + ) + time.sleep(0.5) + + +def ensure_tmux_session(session: str): + r = subprocess.run(["tmux", "has-session", "-t", session], + capture_output=True) + if r.returncode != 0: + subprocess.run( + ["tmux", "new-session", "-d", "-s", session, "-x", "220", "-y", "50"], + check=True, + ) + # Create second pane + subprocess.run( + ["tmux", "split-window", "-h", "-t", session], + check=True, + ) + print(f"[gdb-auto] Created tmux session '{session}' with 2 panes") + else: + # Ensure at least 2 panes exist + r2 = subprocess.run( + ["tmux", "list-panes", "-t", session], + capture_output=True, text=True, + ) + if r2.stdout.count("\n") < 2: + subprocess.run( + ["tmux", "split-window", "-h", "-t", session], + check=False, + ) + + +# --------------------------------------------------------------------------- +# Main debug session — single QEMU run, mailbox watchpoint approach +# --------------------------------------------------------------------------- +def run_debug_session( + session: str, + qemu_pane: int, + gdb_pane: int, + timeout_boot: int, + interactive: bool, +): + # Clean up stale socket + try: + os.unlink(GDB_SOCKET) + except FileNotFoundError: + pass + try: + os.unlink(DEBUGCON_LOG) + except FileNotFoundError: + pass + + # Start QEMU running (not paused) — the mailbox watchpoint will halt it + # automatically when efi_main writes the sentinel. No -S needed. + cmd = " ".join(qemu_cmd(paused=False, gdb_target=GDB_TARGET, + debugcon_log=DEBUGCON_LOG)) + print(f"[gdb-auto] Starting QEMU (running, watchpoint will halt at efi_main)...") + tmux_kill_pane_process(session, qemu_pane) + time.sleep(1) + tmux_send(session, qemu_pane, f"cd {WORKSPACE} && {cmd}") + + # Wait for GDB TCP port to be ready + import socket as _socket + host, port = GDB_TARGET.rsplit(":", 1) + deadline = time.time() + 15 + while time.time() < deadline: + try: + s = _socket.create_connection((host, int(port)), timeout=1) + s.close() + break + except (ConnectionRefusedError, OSError): + time.sleep(0.5) + else: + sys.exit(f"ERROR: QEMU GDB port {GDB_TARGET} never opened — is QEMU starting correctly?") + + print(f"[gdb-auto] GDB socket ready. Spawning GDB (pexpect)...") + + child = pexpect.spawn( + "gdb", + cwd=WORKSPACE, + encoding=None, + timeout=30, + logfile=open("/tmp/gdb-auto-raw.log", "wb"), + ) + + def gdb_cmd(cmd: str, timeout: int = 15) -> str: + child.sendline(cmd.encode()) + child.expect(GDB_PROMPT_RE.encode(), timeout=timeout) + out = child.before.decode(errors="replace").strip() + return out + + def gdb_print(cmd: str, timeout: int = 15): + out = gdb_cmd(cmd, timeout=timeout) + if out: + lines = [l for l in out.splitlines() + if l.strip() and l.strip() != cmd.strip()] + for l in lines: + print(f" {l}") + return out + + try: + child.expect(GDB_PROMPT_RE.encode(), timeout=15) + print("[gdb-auto] GDB started") + + gdb_cmd("set pagination off") + gdb_cmd("set confirm off") + gdb_cmd("set architecture i386:x86-64") + gdb_cmd("set demangle-style rust") + gdb_cmd(f"symbol-file {SYMBOL_FILE}") + + out = gdb_cmd(f"source {GDB_SCRIPT}", timeout=20) + for line in out.splitlines(): + if any(tok in line for tok in ("Δ", "image_base", "efi_main link")): + print(f" {line}") + + print(f"[gdb-auto] Connecting to QEMU ({GDB_TARGET})...") + # Tell theseus-auto which target to use for post-reset reconnect + gdb_cmd(f'python gdb.set_convenience_variable("_gdb_target", "{GDB_TARGET}")') + gdb_cmd(f"target remote {GDB_TARGET}", timeout=15) + + rip_check = gdb_cmd("info registers rip") + rip = next((l for l in rip_check.splitlines() if "rip" in l), "") + if "0xfff0" in rip: + print(f"[gdb-auto] ✅ Confirmed halted at reset vector (rip=0xfff0)") + else: + print(f"[gdb-auto] ⚠️ Unexpected RIP after connect: {rip}") + + # Run theseus-auto — fully automated sequence: + # 1. Sets hw watchpoint on mailbox sentinel (0x7008) + # 2. Continues → UEFI boots → efi_main writes mailbox → watchpoint fires + # 3. Reads runtime address from 0x7000, calls theseus-load + # 4. Issues monitor system_reset, reconnects, continues + # 5. UEFI reboots → efi_main runs again → hits the sw breakpoint + print(f"[gdb-auto] Running theseus-auto (timeout {timeout_boot}s)...") + print(f"[gdb-auto] Watching mailbox sentinel at 0x7008 for " + f"magic 0xDEADBEEFCAFEF00D...") + + child.sendline(b"theseus-auto") + + # theseus-auto internally calls gdb.execute("continue") twice, each + # of which blocks until GDB stops. The pexpect expect() here waits for + # the final (gdb) prompt that appears after the efi_main breakpoint hit. + # The full timeout covers both the first boot (mailbox write) and the + # second boot (breakpoint hit) so multiply by 2 for safety. + idx = child.expect( + [GDB_PROMPT_RE.encode(), pexpect.TIMEOUT, pexpect.EOF], + timeout=timeout_boot * 2, + ) + output = child.before.decode(errors="replace") + + if idx == 1: + print(f"[gdb-auto] ⏰ Timeout ({timeout_boot*2}s) waiting for theseus-auto") + print(f"[gdb-auto] Check: was the kernel built with debug mailbox support?") + print(f"[gdb-auto] Check: does UEFI reach efi_main within the timeout?") + child.sendcontrol("c") + try: + child.expect(GDB_PROMPT_RE.encode(), timeout=10) + except Exception: + pass + gdb_print("info registers rip") + gdb_cmd("quit") + return + elif idx == 2: + print("[gdb-auto] ❌ GDB exited unexpectedly (EOF)") + return + + # idx == 0: theseus-auto completed and returned a prompt + print(f"[gdb-auto] theseus-auto output:") + for line in output.splitlines(): + if line.strip(): + print(f" {line}") + + if "Breakpoint" in output and "efi_main" in output and "failed to reconnect" not in output: + print(f"[gdb-auto] ✅ BREAKPOINT HIT at efi_main!") + elif "failed to reconnect" in output: + print(f"[gdb-auto] ⚠️ Reconnect after reset failed — see output above") + elif "mailbox fired" in output: + print(f"[gdb-auto] ✅ Mailbox fired — address captured") + else: + print(f"[gdb-auto] ⚠️ theseus-auto completed but breakpoint status unclear") + + if interactive: + print() + print("[gdb-auto] ─────────────────────────────────────────────────") + print("[gdb-auto] Dropping into interactive GDB.") + print("[gdb-auto] Symbols loaded, stopped at efi_main.") + print(f"[gdb-auto] Ctrl-C to interrupt, 'q' to quit.") + print("[gdb-auto] ─────────────────────────────────────────────────") + child.interact() + else: + gdb_print("info registers rip") + gdb_print("backtrace 5") + gdb_cmd("quit") + + except pexpect.exceptions.TIMEOUT as e: + print(f"[gdb-auto] ❌ Unexpected pexpect timeout: {e}") + sys.exit(1) + except pexpect.exceptions.EOF: + pass # clean GDB exit + except KeyboardInterrupt: + print("\n[gdb-auto] Interrupted.") + try: + child.sendcontrol("c") + except Exception: + pass + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--tmux", default=DEFAULT_TMUX, metavar="SESSION", + help=f"tmux session name (default: {DEFAULT_TMUX})", + ) + parser.add_argument( + "--qemu-pane", type=int, default=DEFAULT_QEMU_PANE, metavar="N", + help=f"tmux pane index for QEMU (default: {DEFAULT_QEMU_PANE})", + ) + parser.add_argument( + "--gdb-pane", type=int, default=DEFAULT_GDB_PANE, metavar="N", + help=f"tmux pane index for GDB output (default: {DEFAULT_GDB_PANE})", + ) + parser.add_argument( + "--timeout-boot", type=int, default=DEFAULT_TIMEOUT_BOOT, metavar="SECS", + help=f"Max seconds to wait for mailbox watchpoint + breakpoint " + f"(default: {DEFAULT_TIMEOUT_BOOT})", + ) + parser.add_argument( + "--no-interactive", action="store_true", + help="Run non-interactively: check breakpoint then quit (CI mode)", + ) + args = parser.parse_args() + + # Validate workspace artifacts exist + for path in (SYMBOL_FILE, GDB_SCRIPT, OVMF_CODE, OVMF_VARS, DISK_IMG): + if not os.path.exists(path): + sys.exit(f"ERROR: required file not found: {path}\n" + f"Run 'make all' first.") + + ensure_tmux_session(args.tmux) + + run_debug_session( + session=args.tmux, + qemu_pane=args.qemu_pane, + gdb_pane=args.gdb_pane, + timeout_boot=args.timeout_boot, + interactive=not args.no_interactive, + ) + + +if __name__ == "__main__": + main() diff --git a/shared/src/constants.rs b/shared/src/constants.rs index 41060aa..4f03fd3 100644 --- a/shared/src/constants.rs +++ b/shared/src/constants.rs @@ -112,6 +112,39 @@ pub mod uefi { pub const ACPI_RSDP_SIGNATURE: &[u8; 8] = b"RSD PTR "; } +/// GDB Debug Mailbox +/// +/// A fixed low-memory page used to communicate the runtime `efi_main` address +/// to GDB without requiring a probe-then-restart workflow. +/// +/// Layout (at `DEBUG_MAILBOX_PHYS`): +/// offset +0x00 u64 Runtime virtual address of `efi_main` (written on entry) +/// offset +0x08 u64 Sentinel magic: `DEBUG_MAILBOX_MAGIC` (written after addr) +/// +/// GDB watches the sentinel location for the magic value. When it fires, it +/// reads the address at +0x00 and calls `theseus-load` automatically. +/// +/// The page at `DEBUG_MAILBOX_PHYS` is allocated via UEFI `AllocateType::Address` +/// before writing, so the firmware knows we own it. +pub mod debug_mailbox { + /// Physical address of the debug mailbox page. + /// + /// 0x7000 sits in the "conventional memory" gap below 0x10000 that OVMF + /// leaves as `EfiConventionalMemory`. We allocate it explicitly via UEFI + /// before writing to avoid aliasing with firmware data structures. + pub const PHYS: u64 = 0x7000; + + /// Byte offset within the mailbox page where the efi_main address lives. + pub const ADDR_OFFSET: u64 = 0x00; + + /// Byte offset within the mailbox page where the sentinel magic lives. + /// Written *after* the address — GDB watches this to know the addr is valid. + pub const MAGIC_OFFSET: u64 = 0x08; + + /// Sentinel value written to `PHYS + MAGIC_OFFSET` after the address. + pub const MAGIC: u64 = 0xDEAD_BEEF_CAFE_F00D; +} + /// Exit Codes pub mod exit_codes { /// QEMU exit code for successful completion