From 8d92edf9f00fd57f66ee7c3b96516ea4fb640d77 Mon Sep 17 00:00:00 2001 From: Pablo Deymonnaz Date: Fri, 13 Feb 2026 16:13:59 -0300 Subject: [PATCH] Add devnet-runner and devnet-log-review Claude skills Add project-level skills for managing local devnets and analyzing their logs. devnet-runner: orchestrates lean-quickstart devnets from the repo root, with validator config management, timed runs, and automatic log collection. devnet-log-review: analyzes devnet logs with scripts for error counting, block production stats, consensus progress, and reference guides for fork analysis, finalization debugging, and error classification. --- .claude/skills/devnet-log-review/SKILL.md | 280 +++++++++++++ .../references/CLIENT_LOG_PATTERNS.md | 163 ++++++++ .../references/ERROR_CLASSIFICATION.md | 103 +++++ .../references/FINALIZATION_DEBUG.md | 247 +++++++++++ .../references/FORK_ANALYSIS.md | 233 +++++++++++ .../devnet-log-review/scripts/analyze-logs.sh | 73 ++++ .../scripts/check-consensus-progress.sh | 86 ++++ .../devnet-log-review/scripts/count-blocks.sh | 91 ++++ .../scripts/count-errors-warnings.sh | 50 +++ .../devnet-log-review/scripts/show-errors.sh | 80 ++++ .claude/skills/devnet-runner/SKILL.md | 395 ++++++++++++++++++ .../devnet-runner/references/clients.md | 129 ++++++ .../scripts/run-devnet-with-timeout.sh | 34 ++ 13 files changed, 1964 insertions(+) create mode 100644 .claude/skills/devnet-log-review/SKILL.md create mode 100644 .claude/skills/devnet-log-review/references/CLIENT_LOG_PATTERNS.md create mode 100644 .claude/skills/devnet-log-review/references/ERROR_CLASSIFICATION.md create mode 100644 .claude/skills/devnet-log-review/references/FINALIZATION_DEBUG.md create mode 100644 .claude/skills/devnet-log-review/references/FORK_ANALYSIS.md create mode 100755 .claude/skills/devnet-log-review/scripts/analyze-logs.sh create mode 100755 .claude/skills/devnet-log-review/scripts/check-consensus-progress.sh create mode 100755 .claude/skills/devnet-log-review/scripts/count-blocks.sh create mode 100755 .claude/skills/devnet-log-review/scripts/count-errors-warnings.sh create mode 100755 .claude/skills/devnet-log-review/scripts/show-errors.sh create mode 100644 .claude/skills/devnet-runner/SKILL.md create mode 100644 .claude/skills/devnet-runner/references/clients.md create mode 100755 .claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh diff --git a/.claude/skills/devnet-log-review/SKILL.md b/.claude/skills/devnet-log-review/SKILL.md new file mode 100644 index 0000000..f3322a0 --- /dev/null +++ b/.claude/skills/devnet-log-review/SKILL.md @@ -0,0 +1,280 @@ +--- +name: devnet-log-review +description: Review and analyze devnet run results. Use when users want to (1) Analyze devnet logs for errors and warnings, (2) Generate a summary of a devnet run, (3) Identify interoperability issues between clients, (4) Understand consensus progress and block production, (5) Debug forks and finalization issues. +--- + +# Devnet Log Review + +Analyze and summarize devnet run results from lean consensus testing. + +## Quick Start + +**Run the analysis script:** +```bash +# From project root (with logs in current directory) +.claude/skills/devnet-log-review/scripts/analyze-logs.sh + +# Or specify logs directory +.claude/skills/devnet-log-review/scripts/analyze-logs.sh /path/to/logs +``` + +This produces a structured summary with: +- Error/warning counts per node +- Block production statistics +- Consensus progress +- Proposer assignments + +## Log File Locations + +| File | Content | +|------|---------| +| `devnet.log` | Combined output from `spin-node.sh` (genesis generation + all node output) | +| `{client}_{n}.log` | Individual node logs (e.g., `zeam_0.log`, `ream_0.log`, `ethlambda_0.log`) | + +## Analysis Scripts + +| Script | Description | +|--------|-------------| +| `analyze-logs.sh [dir]` | Main entry point - runs all analyses, outputs markdown summary | +| `count-errors-warnings.sh [dir]` | Count errors/warnings per node (excludes benign patterns) | +| `count-blocks.sh [dir]` | Count blocks proposed/processed per node (client-aware) | +| `check-consensus-progress.sh [dir]` | Show last slot reached and proposer assignments | +| `show-errors.sh [-n node] [-l limit] [-w] [dir]` | Display error details for investigation | + +**Usage examples:** +```bash +# Just count errors/warnings +.claude/skills/devnet-log-review/scripts/count-errors-warnings.sh + +# Show errors for specific node +.claude/skills/devnet-log-review/scripts/show-errors.sh -n zeam_0 + +# Show errors and warnings with limit +.claude/skills/devnet-log-review/scripts/show-errors.sh -w -l 50 +``` + +## Common Investigation Patterns + +### Tracing Slot-by-Slot Flow + +When investigating issues, trace the complete flow for a specific slot using structured logging fields (`slot=X`). + +**Note:** Logs contain ANSI color codes. Strip them first: + +```bash +# Strip ANSI codes and grep for a specific slot +sed 's/\x1b\[[0-9;]*m//g' devnet.log | grep -E "slot=3[^0-9]|slot=3$" + +# For double-digit slots +sed 's/\x1b\[[0-9;]*m//g' devnet.log | grep -E "slot=12[^0-9]|slot=12$" +``` + +Structured logging fields follow `key=value` format: +- `slot=N` - Slot number +- `validator_id=N` - Validator index +- `validator=N` - Validator index (in gossipsub messages) +- `proposer=N` - Block proposer index +- `err=...` - Error message + +### Comparing Clients at Specific Slots + +```bash +# Extract block hashes for specific slots across all clients +for slot in 1 2 3 4 5; do + echo "=== Slot $slot ===" + grep -h "slot=$slot[^0-9]\|@ $slot[^0-9]" *.log | grep -oE "0x[a-f0-9]{8}" | sort -u +done + +# Check which client has which head at a specific slot +grep -h "head_slot=18\|Head Slot: 18" *.log + +# Compare finalization across clients +grep -h "finalized.*slot\|Finalized block.*@" *.log | tail -20 +``` + +### Finding Validators + +Each validator proposes blocks when `slot % validator_count == validator_id`. + +```bash +# ethlambda - explicit validator_id in logs +grep "We are the proposer" ethlambda_0.log | head -3 +# Output: We are the proposer for this slot slot=5 validator_id=5 + +# zeam - proposer field in attestation logs +grep "packing proposer attestation" zeam_0.log | head -3 +# Output: packing proposer attestation for slot=6 proposer=0 + +# Generic approach - validator_id = slot % validator_count +``` + +## Analysis Areas + +### Fork Analysis + +When clients disagree on which blocks are valid, the network splits into forks. + +**Quick check for forks:** +```bash +# Compare block hashes at same slot across clients +grep -h "slot=4[^0-9]" *.log | grep -oE "block_root=0x[a-f0-9]{16}" | sort -u + +# If you see different hashes → fork exists! +``` + +**Identifying rejected blocks:** +```bash +# Check signature verification failures +grep -i "signature.*failed\|invalid signature" *.log | head -20 + +# ethlambda +grep "Failed to process block" ethlambda_0.log + +# qlean +grep "Invalid signatures for block" qlean_0.log + +# lantern +grep "signature verification failed" lantern_0.log +``` + +**See [references/FORK_ANALYSIS.md](references/FORK_ANALYSIS.md) for:** +- Understanding fork types (canonical, orphan, invalid) +- Tracing parent-child relationships +- Building fork structure diagrams +- Determining which validators are on which fork + +### Finalization Debugging + +Finalization should advance every 6-12 slots. If it stalls, investigate: + +```bash +# Check finalization progress +grep "finalized_slot=" ethlambda_0.log | tail -20 + +# If finalized_slot stays same for 50+ slots → finalization stalled +``` + +**Finalization requires >2/3 supermajority:** +- 6 validators → need 5 votes minimum +- 9 validators → need 7 votes minimum + +**See [references/FINALIZATION_DEBUG.md](references/FINALIZATION_DEBUG.md) for:** +- Common causes of finalization stalls +- Validator participation calculations +- Justification chain analysis +- Step-by-step debugging guide + +### Error Classification + +**See [references/ERROR_CLASSIFICATION.md](references/ERROR_CLASSIFICATION.md) for:** +- Critical errors (genesis mismatch, panics, database corruption) +- Expected/benign messages (TODOs, HandshakeTimedOut to unconfigured nodes) +- Medium severity issues (encoding mismatches, missing blocks) +- State transition errors + +### Client Log Patterns + +Different clients have different log formats and key patterns. + +**See [references/CLIENT_LOG_PATTERNS.md](references/CLIENT_LOG_PATTERNS.md) for:** +- Log format for each client (zeam, ream, ethlambda, grandine, lantern, qlean) +- Key log patterns per client +- Block counting methods +- ANSI color code handling + +## Block Proposal Flow (ethlambda) + +A healthy block proposal follows this sequence: + +1. `We are the proposer for this slot` - Node detects it's the proposer +2. `TODO precompute poseidons in parallel + SIMD` - XMSS aggregate proof starts +3. `packed_pcs_commit` - Proof commitment +4. `Logup data` - Logup protocol data +5. `AIR proof{table=poseidon16}` / `AIR proof{table=poseidon24}` - AIR proofs +6. `Published block` - Block successfully built and published +7. `Published block to gossipsub` - Block broadcast to network + +## Summary Report Format + +Generate concise summaries (20 lines or less) in this structure: + +```markdown +## Devnet Log Summary + +**Run:** {N} {client} nodes (`{image}`) | {M} slots ({range}) + +| Node | Validator | Blocks Proposed | Errors | Warnings | Status | +|------|-----------|-----------------|--------|----------|--------| +| {node_name} | {id} | {count} (slots {list}) | {n} | {n} | {emoji} | + +**Issues:** +- {issue 1} +- {issue 2} + +**{emoji} {RESULT}** - {one-line explanation} +``` + +### Status Emoji Guide + +| Emoji | Meaning | When to Use | +|-------|---------|-------------| +| 🟢 | Healthy | No errors, blocks processed successfully | +| 🟡 | Warning | Minor issues but consensus working | +| 🔴 | Failed | Critical errors, consensus broken, or blocks failing validation | + +### Result Line Examples + +- `🟢 PASSED` - All nodes healthy, consensus achieved +- `🟡 PASSED WITH WARNINGS` - Consensus working but minor issues detected +- `🔴 FAILED` - Consensus broken: {reason} + +### Key Rules + +1. Keep summary under 20 lines +2. Use table for per-node status +3. Status should reflect whether that node's blocks pass validation (🔴 if not) +4. End with single-line result with emoji +5. Don't list "what's working" - focus on issues + +## Manual Investigation Commands + +Use these when scripts don't provide enough detail: + +```bash +# Find which validators proposed blocks +grep -h "proposer\|We are the proposer" *.log | head -20 + +# Check peer connections +grep -h "peer connected\|Connection established" *.log | head -20 + +# Check attestations +grep -i "attestation" *.log | head -50 + +# Search for specific error patterns +grep -i "genesis mismatch\|panic\|fatal" *.log + +# Track attestations to unknown blocks (indicates forks) +grep "Unknown.*block:" ethlambda_0.log | grep -oE "0x[a-f0-9]{64}" | sort | uniq -c | sort -rn + +# Check which validators are on invalid fork +grep "rejected vote" lantern_0.log | grep -oE "validator=[0-9]+" | sort | uniq -c +``` + +## Detailed References + +For in-depth analysis, see these specialized guides: + +- **[FORK_ANALYSIS.md](references/FORK_ANALYSIS.md)** - Comprehensive guide to identifying and analyzing blockchain forks, tracing parent-child relationships, building fork structure diagrams, and determining consensus disagreements +- **[FINALIZATION_DEBUG.md](references/FINALIZATION_DEBUG.md)** - Debugging finalization stalls, validator participation calculations, justification chain analysis, and threshold math +- **[CLIENT_LOG_PATTERNS.md](references/CLIENT_LOG_PATTERNS.md)** - Log formats and key patterns for all clients (zeam, ream, ethlambda, grandine, lantern, qlean), including block counting methods +- **[ERROR_CLASSIFICATION.md](references/ERROR_CLASSIFICATION.md)** - Error types, severity levels, expected vs. critical errors, and interoperability issues + +## Progressive Disclosure + +This skill uses progressive disclosure to keep context usage efficient: + +1. **Start here** (SKILL.md) - Quick start workflow and common patterns +2. **Detailed references** (references/*.md) - Deep dives into specific analysis areas +3. **Scripts** (scripts/) - Automated analysis tools + +Load detailed references only when needed for specific investigations. diff --git a/.claude/skills/devnet-log-review/references/CLIENT_LOG_PATTERNS.md b/.claude/skills/devnet-log-review/references/CLIENT_LOG_PATTERNS.md new file mode 100644 index 0000000..80c5dc0 --- /dev/null +++ b/.claude/skills/devnet-log-review/references/CLIENT_LOG_PATTERNS.md @@ -0,0 +1,163 @@ +# Client-Specific Log Patterns + +Reference guide for log formats and key patterns across different lean consensus clients. + +## zeam (Zig) + +**Log format:** `[timestamp] [level] (zeam): [module] message` + +**Key characteristics:** +- Color codes in output (ANSI escape sequences) +- Key modules: `[node]`, `[network]`, `[consensus]` + +**Common patterns:** +``` +[validator] packing proposer attestation for slot=X proposer=Y +[database] initializing RocksDB +[node] failed to load latest finalized state from database: error.NoFinalizedStateFound +``` + +## ream (Rust) + +**Log format:** `timestamp LEVEL module: message` + +**Key characteristics:** +- Uses tracing crate format +- Key modules: `ream_p2p::network::lean`, `ream_blockchain` + +**Common patterns:** +``` +ream_p2p::network::lean: Connected to peer: PeerId("...") +ream_blockchain: Processing block slot=X +``` + +## ethlambda (Rust) + +**Log format:** `timestamp LEVEL module: message` + +**Key modules:** +- `ethlambda` +- `ethlambda_blockchain` +- `ethlambda_p2p` +- `ethlambda_p2p::gossipsub` + +**Key patterns:** + +### Block Proposal +``` +ethlambda_blockchain: We are the proposer for this slot slot=X validator_id=Y +ethlambda_blockchain: Published block slot=X validator_id=Y +ethlambda_p2p: Published block to gossipsub slot=X proposer=Y +``` + +### Attestations +``` +ethlambda_blockchain: Published attestation slot=X validator_id=Y +ethlambda_p2p::gossipsub::handler: Received new attestation from gossipsub, sending for processing slot=X validator=Y +ethlambda_blockchain: Skipping attestation for proposer slot=X (expected: proposers don't attest to their own slot) +``` + +### Block Processing +``` +ethlambda_p2p::gossipsub::handler: Received new block from gossipsub, sending for processing slot=X +ethlambda_blockchain::store: Processed new block slot=X block_root=0x... state_root=0x... +ethlambda_blockchain: Block processed successfully slot=X +``` + +### Errors +``` +ethlambda_blockchain: Failed to process block slot=X err=Proposer signature verification failed +ethlambda_blockchain: Failed to build block slot=X err=... +ethlambda_blockchain: Block parent missing, storing as pending slot=X parent_root=0x... block_root=0x... +ethlambda_blockchain: Failed to process gossiped attestation err=Unknown head block: 0x... +``` + +### Counting Blocks +Each block proposal generates TWO "Published block" log lines: +1. `ethlambda_blockchain: Published block slot=X validator_id=Y` (block built) +2. `ethlambda_p2p: Published block to gossipsub slot=X proposer=Y` (block broadcast) + +To count accurately: +```bash +# Count only blockchain module's log (one per block) +sed 's/\x1b\[[0-9;]*m//g' ethlambda_0.log | grep "ethlambda_blockchain: Published block" | wc -l + +# Or count "Published block to gossipsub" (also one per block) +sed 's/\x1b\[[0-9;]*m//g' ethlambda_0.log | grep "Published block to gossipsub" | wc -l +``` + +### Attestation Math +Each validator attests to all slots except slots where they're the proposer. With round-robin and N validators over S slots, each validator publishes approximately `S - (S/N)` attestations. + +## grandine (Rust) + +**Log format:** `timestamp LEVEL module: message` + +**Key modules:** +- `validator` +- `block_producer` +- `validator_config` + +**Key patterns:** +``` +CHAIN STATUS: Current Slot: X | Head Slot: Y | Behind: Z +Head Block Root: 0xabc... +Using parent root for block proposal parent_root=0x... +Finalized Slot: X +``` + +**Checking chain head:** +```bash +grep "CHAIN STATUS\|Head Block Root" grandine_0.log +``` + +## lantern (Rust) + +**Log format:** `timestamp LEVEL [module] message` + +**Key characteristics:** +- Brackets around module names: `[state]`, `[gossip]`, `[network]` + +**Key patterns:** +``` +[state] imported block slot=X new_head_slot=Y head_root=0x... +[gossip] rejected vote validator=X slot=Y head=0x... reason=unknown head +[gossip] received block slot=X proposer=Y root=0x... source=gossip +[state] signature verification failed slot=X root=0x... +``` + +## qlean (C++) + +**Log format:** `date time log-level module message` + +**Key characteristics:** +- No colons separating fields, spaces only +- Date format: `YY.MM.DD HH:MM:SS.microseconds` + +**Key patterns:** +``` +BlockStorage Add slot-to-hash for 0x... @ X +Networking Received block 0x... @ X parent=0x... from peer=... +BlockStorage Added block 0x... @ X as child of abc1…2345 +ForkChoice Invalid signatures for block 0x... @ X +BlockTree Finalized block 0x... @ X +ForkChoice 🔒 Finalized block: 0x... @ X +Networking ❌ Error importing block=0x... @ X: Invalid attestation +``` + +**Checking parent relationships:** +```bash +grep "Received block.*parent=" qlean_0.log +grep "Added block.*as child of" qlean_0.log +``` + +## ANSI Color Code Handling + +Many clients output ANSI escape sequences for terminal colors. Strip them before grepping: + +```bash +# Strip ANSI codes +sed 's/\x1b\[[0-9;]*m//g' logfile.log | grep pattern +``` + +Without stripping, patterns may not match correctly. diff --git a/.claude/skills/devnet-log-review/references/ERROR_CLASSIFICATION.md b/.claude/skills/devnet-log-review/references/ERROR_CLASSIFICATION.md new file mode 100644 index 0000000..c660191 --- /dev/null +++ b/.claude/skills/devnet-log-review/references/ERROR_CLASSIFICATION.md @@ -0,0 +1,103 @@ +# Error Classification Guide + +Reference for categorizing and understanding errors in devnet logs. + +## Critical Errors + +Errors that indicate serious problems requiring immediate attention. + +| Pattern | Meaning | Action | +|---------|---------|--------| +| `genesis mismatch` | Nodes have different genesis configurations | Check genesis.json consistency across nodes | +| `panic` / `fatal` | Client crash | Check stack trace, file bug report | +| `database corruption` | Data directory corrupted | Clear data directory and restart | +| `OutOfMemory` in block deserialization | Block format incompatibility between clients | Check SSZ schema versions | +| `xmss_aggregate.rs panic` | Missing signature aggregation prover files | Ensure prover files are in correct location | + +## Expected/Benign Messages + +Messages that look like errors but are actually normal or harmless. + +| Pattern | Meaning | Why It's OK | +|---------|---------|-------------| +| `Error response from daemon: manifest unknown` | Docker image tag not found in remote registry | Docker falls back to local image; only an issue if no local image exists | +| `failed to load latest finalized state from database: NoFinalizedStateFound` | Fresh start, no previous state | Normal for new devnet runs | +| `HandshakeTimedOut` to ports of unconfigured nodes | Connection attempt to node that doesn't exist | Expected when validator config has fewer nodes than the network expects | +| `TODO precompute poseidons in parallel + SIMD` | Performance optimization not yet implemented | Code TODOs, not runtime errors | +| `TODO optimize open_columns when no shifted F columns` | AIR proof optimization not yet implemented | Code TODOs, not runtime errors | + +## Medium Severity + +Issues that may indicate problems but don't immediately break consensus. + +| Pattern | Meaning | Action | +|---------|---------|--------| +| `Failed to decode snappy-framed RPC request` | Protocol/encoding mismatch between clients | Check libp2p versions and snappy compression settings | +| `No callback found for request_id` | Response received for unknown request | May indicate internal state tracking issue | +| `UnexpectedEof` | Incomplete message received | Check network stability and message size limits | +| `Proposer signature verification failed` | Block has invalid proposer signature | Check if block is genuinely invalid or validation bug | +| `Invalid signatures for block` | Block has invalid attestation signatures | Check XMSS signature aggregation | +| `signature verification failed` | Generic signature validation failure | Check which signature type failed | +| `Unknown head block` | Attestation references block client doesn't have | May indicate fork or missing block | +| `Unknown target block` | Attestation target block not found | May indicate fork or missing block | +| `Block parent missing` | Received block but parent not available | Client will try to fetch parent | + +## Connection Timeouts + +Connection timeouts to specific ports usually mean the node for that port was never started. + +**Identifying the node:** +Check the `validator-config.yaml` file in the network directory: +- `local-devnet/genesis/validator-config.yaml` +- `ansible-devnet/genesis/validator-config.yaml` + +Each node entry has an `enrFields.quic` port. + +**If you see HandshakeTimedOut to certain ports but those nodes were never started, this is expected.** + +## State Transition Errors + +### State Root Mismatch During Proposal + +If you see this pattern: +``` +We are the proposer for this slot slot=N validator_id=X +... +Failed to process block slot=N err=State transition failed: state root mismatch +Published block slot=N validator_id=X +``` + +This indicates a **block building bug**, not a consensus issue: +- The proposer builds a block with one state root in the header +- When verifying its own block, it computes a different state root +- The block is published anyway (bug: should not publish invalid blocks) +- Other nodes will also fail to process it with the same mismatch + +**Key diagnostic:** If all nodes compute the **same** state root (but different from the block header), the state transition is deterministic - the bug is in how the block header's state root is computed during block building. + +## Interoperability Issues + +When analyzing multi-client devnets, watch for: + +1. **Status exchange failures** - clients failing to exchange status messages +2. **Block/attestation propagation** - messages not reaching all clients +3. **Encoding mismatches** - snappy/SSZ encoding differences +4. **Timing issues** - slot timing drift between clients +5. **Block format incompatibility** - SSZ schema differences causing deserialization failures (look for `OutOfMemory` errors) +6. **Stale containers** - containers from previous runs causing genesis mismatch (look for `UnknownSourceBlock`) +7. **Signature validation disagreements** - clients disagree on signature validity (indicates bug in proposer or validator) + +## Searching for Errors + +```bash +# Generic error search +grep -i "error\|ERROR" *.log | grep -v "no callback\|manifest unknown" | head -50 + +# Search for specific patterns +grep -i "genesis mismatch\|panic\|fatal" *.log + +# Client-specific error patterns +grep "Failed to process block" ethlambda_0.log +grep "Invalid signatures" qlean_0.log +grep "signature verification failed" lantern_0.log +``` diff --git a/.claude/skills/devnet-log-review/references/FINALIZATION_DEBUG.md b/.claude/skills/devnet-log-review/references/FINALIZATION_DEBUG.md new file mode 100644 index 0000000..d5bc4dd --- /dev/null +++ b/.claude/skills/devnet-log-review/references/FINALIZATION_DEBUG.md @@ -0,0 +1,247 @@ +# Finalization Debugging Guide + +Guide for diagnosing and debugging finalization issues in devnet runs. + +## What is Finalization? + +Finalization is the process by which slots become irreversible in the blockchain. In the lean consensus protocol (3SF-mini), finalization requires: +- >2/3 supermajority of validators attesting +- Proper justification chain (slots justified at specific intervals) + +## Checking Finalization Progress + +```bash +# Track finalization over time for each client +grep -h "finalized.*slot\|Finalized block.*@" *.log | tail -50 + +# ethlambda specific +grep "finalized_slot=" ethlambda_0.log | tail -20 + +# qlean specific +grep "Finalized block" qlean_0.log | tail -20 + +# grandine specific +grep "Finalized Slot:" grandine_0.log | tail -20 +``` + +**Expected pattern:** Finalization should advance roughly every 6-12 slots (depending on 3SF-mini rules). + +**Stall indicator:** Finalized slot stays the same for 50+ slots while head slot continues advancing. + +## Example of Healthy Finalization + +``` +Slot 0: finalized_slot=0 +Slot 6: finalized_slot=0 (waiting for justification) +Slot 12: finalized_slot=6 (slot 6 finalized) +Slot 18: finalized_slot=12 (slot 12 finalized) +Slot 24: finalized_slot=18 (slot 18 finalized) +``` + +## Example of Finalization Stall + +``` +Slot 0: finalized_slot=0 +Slot 6: finalized_slot=0 +Slot 12: finalized_slot=6 +Slot 18: finalized_slot=12 +Slot 24: finalized_slot=18 ← finalized +Slot 30: finalized_slot=18 ← STUCK +Slot 50: finalized_slot=18 ← STILL STUCK +Slot 100: finalized_slot=18 ← NOT ADVANCING +``` + +## Common Causes of Finalization Stalls + +### 1. Insufficient Validator Participation + +**Requirement:** Need **>2/3 supermajority** to finalize +- With 6 validators: need >4 votes = **at least 5 votes** +- With 9 validators: need >6 votes = **at least 7 votes** + +If validators are on different forks, neither fork may reach >2/3. + +```bash +# Count how many validators are active (attesting) +grep "validator=" *.log | grep -oE "validator=[0-9]+" | sort -u + +# Check which validators are on which fork (by head block they attest to) +grep "head=0x" lantern_0.log | grep "validator=" | tail -30 +``` + +### 2. Validators on Invalid Fork + +If N validators follow an invalid fork, only (total - N) validators contribute to canonical chain. + +**Example:** 6 validators, 1 on invalid fork +- Total: 6 validators +- Honest: 5 validators on canonical fork +- Threshold: >4 votes, so need 5 votes +- Available: 5 honest votes +- **Should finalize!** 5 > 4 ✓ + +**Example:** 6 validators, 2 on invalid fork +- Total: 6 validators +- Honest: 4 validators on canonical fork +- Threshold: >4 votes, so need 5 votes +- Available: 4 honest votes +- **Cannot finalize!** 4 ≯ 4 ✗ + +```bash +# Find which validators are following invalid blocks +grep "rejected vote" lantern_0.log | grep -oE "validator=[0-9]+" | sort | uniq -c + +# If validator 4 keeps getting rejected, validator 4 is on wrong fork +``` + +### 3. Missing Attestations + +Client fails to process attestations from certain validators. + +```bash +# Check for attestation processing failures +grep "Failed to process.*attestation" ethlambda_0.log | tail -30 + +# Common reasons: +# - "Unknown head block" → validator attesting to block this client doesn't have +# - "Unknown target block" → validator attesting to invalid/orphan fork blocks +``` + +**Impact:** +- Missing attestations reduce effective vote count +- May prevent reaching >2/3 threshold even if enough validators are on canonical fork + +### 4. Justification Chain Broken + +3SF-mini requires justified slots at specific intervals: +- Delta ≤ 5 from finalized slot +- Perfect squares (9, 16, 25, 36...) +- Pronic numbers (6, 12, 20, 30...) + +Missing blocks or attestations can break justification chain. + +```bash +# Check justification progress (ethlambda specific) +grep "latest_justified\|justified.*slot" ethlambda_0.log | tail -30 + +# Look for gaps in justified slots +``` + +## Finalization Math + +Given: +- `N` = total validators +- `N_honest` = validators on canonical fork +- `N_invalid` = validators on invalid/wrong fork +- Threshold = **> 2N/3** votes needed (strictly greater than 2/3) + +### Examples + +**6 validators, 1 on invalid fork:** +- Total: 6 validators +- Honest: 5 validators on canonical fork +- Threshold: > 2×6/3 = > 4, so need **at least 5 votes** +- Available honest votes: 5 +- **Should finalize!** 5 > 4 ✓ + +**6 validators, 2 on invalid fork:** +- Total: 6 validators +- Honest: 4 validators on canonical fork +- Threshold: > 4, so need **at least 5 votes** +- Available honest votes: 4 +- **Cannot finalize!** 4 ≯ 4 ✗ (exactly 2/3 is not enough) + +**6 validators, 1 crashed + 1 on invalid fork:** +- Total: 6 validators +- Honest: 4 validators on canonical fork +- Threshold: > 4, so need **at least 5 votes** +- Available honest votes: 4 +- **Cannot finalize!** Network stuck until validators come back or rejoin canonical fork + +## Debugging Steps + +### Step 1: Verify Validator Count and Status + +```bash +# Count total validators +grep -h "validator=" *.log | grep -oE "validator=[0-9]+" | sort -u | wc -l + +# Check which nodes are proposing blocks (active validators) +grep -h "We are the proposer\|Using parent root" *.log | head -30 +``` + +### Step 2: Check Fork Structure + +```bash +# See if clients have different heads +grep -h "head_slot=30\|Head Slot: 30" *.log + +# Compare block hashes at recent slots +for slot in 28 29 30 31 32; do + echo "=== Slot $slot ===" + grep -h "slot=$slot[^0-9]\|@ $slot[^0-9]" *.log | grep -oE "0x[a-f0-9]{8}" | sort -u +done +``` + +### Step 3: Count Attestations + +```bash +# Count attestations received per slot (ethlambda) +grep "Received new attestation.*slot=30" ethlambda_0.log | wc -l + +# Expected: N-1 attestations per slot (all validators except proposer) +# With 6 validators: expect 5 attestations per slot +``` + +### Step 4: Check for Processing Failures + +```bash +# Look for attestation processing failures +grep "Failed to process.*attestation" ethlambda_0.log | tail -50 + +# Group by error type +grep "Failed to process.*attestation" ethlambda_0.log | \ + grep -oE "err=.*" | sort | uniq -c +``` + +### Step 5: Verify Threshold Calculation + +```bash +# Calculate if finalization should be possible +echo "Total validators: 6" +echo "Threshold: > 2×6/3 = > 4, need 5 votes" +echo "Validators on canonical fork: ?" # Count from logs +echo "Can finalize: yes if ≥5, no if ≤4" +``` + +## Known Bugs + +### Bug: Waiting for All Validators + +**Symptom:** Finalization stalls even with >2/3 validators on canonical fork + +**Cause:** Finalization logic waits for attestations from ALL validators instead of just >2/3 + +**Example:** 6 validators, 1 on invalid fork +- Available: 5 votes from honest validators +- Threshold: need 5 votes +- Bug behavior: waits for 6th validator (on invalid fork) which will never attest +- Expected behavior: should finalize with 5 votes + +**Detection:** +```bash +# Check if stalled client has enough attestations +# If yes but finalization stalled → possible bug +``` + +### Bug: Off-by-One in Threshold + +**Symptom:** Finalization requires exactly 2/3 instead of >2/3 + +**Cause:** Using `>=` instead of `>` in threshold check + +**Detection:** Check if finalization succeeds with exactly 4/6 votes but protocol requires >4 + +## Additional Resources + +See [FORK_ANALYSIS.md](FORK_ANALYSIS.md) for fork detection and [ERROR_CLASSIFICATION.md](ERROR_CLASSIFICATION.md) for common error patterns. diff --git a/.claude/skills/devnet-log-review/references/FORK_ANALYSIS.md b/.claude/skills/devnet-log-review/references/FORK_ANALYSIS.md new file mode 100644 index 0000000..a3ca5a8 --- /dev/null +++ b/.claude/skills/devnet-log-review/references/FORK_ANALYSIS.md @@ -0,0 +1,233 @@ +# Fork Analysis Guide + +Comprehensive guide to identifying and analyzing blockchain forks in devnet runs. + +## Understanding Forks + +**Fork Types:** +1. **Canonical Fork** - The main chain that the honest majority follows +2. **Orphan Fork** - Valid blocks that lost a fork choice race (e.g., two blocks proposed for same slot) +3. **Invalid Fork** - Chain built on blocks with validation failures (signature errors, state errors, etc.) + +**Key Insight:** Blocks don't just have slot numbers - they have **parent relationships**. A fork occurs when blocks at different slots reference different parent blocks. + +## Tracing Parent-Child Relationships + +To understand forks, map out the blockchain DAG (Directed Acyclic Graph) by tracking which block is the parent of each new block. + +### qlean - Explicit Parent Logging + +```bash +# qlean logs parent relationships when receiving blocks +grep "Received block.*parent=" qlean_0.log | head -20 +# Output: Received block 0xabc...123 @ 3 parent=0xdef...456 from peer=... +# Meaning: slot 3 block (0xabc...123) builds on parent (0xdef...456) + +# Also check "Added block" logs +grep "Added block.*as child of" qlean_0.log | head -20 +# Output: Added block 0xabc...123 @ 3 as child of def4…5678 +``` + +### ethlambda - Pending Blocks + +```bash +# When ethlambda receives a block with unknown parent: +grep "Block parent missing" ethlambda_0.log +# Output: Block parent missing, storing as pending slot=8 parent_root=0x6cc163e6... block_root=0x16d1daad... +# Meaning: slot 8 block depends on parent 0x6cc163e6... which ethlambda doesn't have + +# Check processed blocks +grep "Processed new block" ethlambda_0.log | head -20 +# Shows which blocks were successfully validated and added to chain +``` + +### lantern - Import Logs + +```bash +grep "imported block" lantern_0.log | head -20 +# Output: imported block slot=3 new_head_slot=3 head_root=0x0c3dd6a5... +``` + +### zeam - Block Processing + +```bash +sed 's/\x1b\[[0-9;]*m//g' zeam_0.log | grep "processing block\|imported block" | head -20 +``` + +## Building the Fork Structure + +### Step 1: Map Canonical Chain + +Start from genesis and follow the longest/heaviest chain: + +```bash +# For each client, extract processed blocks in order +grep "Processed new block\|imported block\|Added block" CLIENT.log | \ + grep -oE "slot=[0-9]+|block_root=0x[a-f0-9]{8}" | \ + paste - - | head -30 + +# Compare block hashes at each slot across clients +# If clients have different hashes at same slot → fork! +``` + +### Step 2: Identify Rejected Blocks + +```bash +# Find blocks rejected by signature verification +grep -i "signature.*failed\|invalid signature" *.log + +# ethlambda +grep "Failed to process block" ethlambda_0.log +# Output: Failed to process block slot=4 err=Proposer signature verification failed + +# qlean +grep "Invalid signatures for block" qlean_0.log +# Output: Invalid signatures for block 0xa829bac5... @ 4 + +# lantern +grep "signature verification failed" lantern_0.log +# Output: signature verification failed slot=4 root=0xa829bac5... +``` + +### Step 3: Track Attestations to Unknown Blocks + +Attestations reference blocks by hash. If a client receives attestations for an unknown block, it indicates a fork: + +```bash +# ethlambda logs "Unknown head block" or "Unknown target block" +grep "Unknown.*block:" ethlambda_0.log | head -20 +# Output: Failed to process gossiped attestation err=Unknown head block: 0xa829bac5... + +# Count attestations per unknown block +grep "Unknown.*block:" ethlambda_0.log | grep -oE "0x[a-f0-9]{64}" | sort | uniq -c | sort -rn +# Output: 48 0x66adc5361a72c49aab91f28c3350734f6224e674fc39518416f2ef932f9523ae +# 12 0xa829bac56f6b98fbe16ed02cde4166a0a0df2e68c68e64afa4fce43bbe1992b3 +# Many attestations for the same unknown block → multiple validators on that fork +``` + +### Step 4: Determine Which Validators Are on Which Fork + +```bash +# Check who is attesting to rejected blocks +grep "rejected vote" lantern_0.log | grep "validator=" | head -20 +# Output: rejected vote validator=4 slot=5 head=0xa829bac5... reason=unknown head +# Meaning: validator 4 is attesting to the rejected block at slot 4 + +# Check validator's own head +grep "head.*slot\|Head Block Root" grandine_0.log | head -10 +# If grandine's head is the rejected block, grandine is on the invalid fork +``` + +## Fork Structure Diagram Format + +When you identify forks, document them in ASCII: + +``` + GENESIS (slot 0) + 0xc8849d39... + │ + ┌─────────────────┴─────────────────┐ + │ │ + SLOT 1 █ SLOT 4 ✗ + 0xcbe3c545... 0xa829bac5... + ┌─────────────────┐ (INVALID - rejected + │ CANONICAL (A) │ by 3/4 clients) + │ Clients: │ │ + │ ✓ ethlambda │ SLOT 10 ⚠ + │ ✓ zeam │ 0xf8dae5ee... + │ ✓ lantern │ (invalid fork, only + │ ✓ qlean │ grandine follows) + └─────────────────┘ + │ + SLOT 3 █ + 0x0c3dd6a5... + │ + SLOT 5 █ + 0xd0fd6225... + │ + (continues...) + +Legend: + █ = Canonical block ✗ = Rejected block ⚠ = Block on invalid fork +``` + +## Key Questions to Answer + +1. **Which block(s) were rejected and why?** (signature errors, state errors, etc.) +2. **Which validators accepted the rejected block?** (check their heads) +3. **How many validators are on each fork?** (count unique attestations per fork) +4. **Can the canonical fork finalize without the validators on invalid fork?** (need >2/3 supermajority) + +## Signature Verification Disagreements + +If clients disagree on signature validity, determine consensus: + +```bash +# Count how many clients rejected vs accepted a specific block +BLOCK_HASH="0xa829bac56f6b98fbe16ed02cde4166a0a0df2e68c68e64afa4fce43bbe1992b3" + +echo "=== Clients that rejected $BLOCK_HASH ===" +grep -l "signature.*failed.*$BLOCK_HASH\|Invalid signatures.*$BLOCK_HASH" *.log + +echo "=== Clients that accepted $BLOCK_HASH ===" +grep -l "Processed.*$BLOCK_HASH\|imported.*$BLOCK_HASH" *.log + +# If 3/4 clients reject → the block is genuinely invalid, bug in proposer +# If 1/4 clients reject → possible bug in that client's validation +``` + +### Root Cause Determination + +- If **majority rejects** with signature errors → **proposer has bug** (failed to sign properly) +- If **minority rejects** with signature errors → **validator has bug** (incorrect validation) +- If **different blocks at same slot** → fork choice race (benign, resolved by fork choice) + +## Comparing Block Hashes Across Slots + +```bash +# Extract block hashes for specific slots (comparing across clients) +for slot in 1 2 3 4 5; do + echo "=== Slot $slot ===" + grep -h "slot=$slot[^0-9]\|@ $slot[^0-9]" *.log | grep -oE "0x[a-f0-9]{8}" | sort -u +done + +# Check which client has which head at a specific slot +grep -h "head_slot=18\|Head Slot: 18" *.log + +# Compare finalization across clients +grep -h "finalized.*slot\|Finalized block.*@" *.log | tail -20 +``` + +## Validator ID Detection + +Each validator proposes blocks when `slot % validator_count == validator_id`. + +### Finding Validator IDs from Logs + +```bash +# ethlambda - explicit validator_id +grep "We are the proposer" ethlambda_0.log | head -3 +# Output: We are the proposer for this slot slot=5 validator_id=5 +# Pattern: validator_id=5 proposes at slots 5,11,17,23... (every 6th if 6 validators) + +# zeam - proposer field +grep "packing proposer attestation" zeam_0.log | head -3 +# Output: packing proposer attestation for slot=6 proposer=0 +# Pattern: proposer=0 proposes at slots 0,6,12,18... + +# grandine - check proposal slots +grep "Using parent root for block proposal" grandine_0.log +# If it proposes at slots 4,10,16,22... then validator_id=4 + +# Generic - validator_id = slot % validator_count +``` + +### Verify Validator Count + +```bash +# Count unique validators from attestations +grep -h "validator=" *.log | grep -oE "validator=[0-9]+" | sort -u | wc -l + +# Or check genesis configuration +grep "GENESIS_VALIDATORS" genesis/genesis.json | jq '. | length' +``` diff --git a/.claude/skills/devnet-log-review/scripts/analyze-logs.sh b/.claude/skills/devnet-log-review/scripts/analyze-logs.sh new file mode 100755 index 0000000..f66d3a8 --- /dev/null +++ b/.claude/skills/devnet-log-review/scripts/analyze-logs.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# analyze-logs.sh - Main entry point for devnet log analysis +# +# Usage: analyze-logs.sh [log_dir] +# log_dir: Directory containing *.log files (default: current directory) +# +# Output: Complete analysis summary in markdown format +# Exit codes: 0 = healthy, 1 = warnings, 2 = failed + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +log_dir="${1:-.}" + +# Check if log files exist +shopt -s nullglob +log_files=("$log_dir"/*.log) +if [[ ${#log_files[@]} -eq 0 ]]; then + echo "No .log files found in $log_dir" >&2 + exit 1 +fi + +# Count node log files (excluding devnet.log) +node_count=0 +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + if [[ "$node" != "devnet" ]]; then + ((node_count++)) + fi +done + +echo "## Devnet Log Analysis" +echo "" +echo "**Log directory:** $log_dir" +echo "**Node logs found:** $node_count" +echo "" + +echo "### Errors and Warnings" +echo "" +"$SCRIPT_DIR/count-errors-warnings.sh" "$log_dir" +echo "" + +echo "### Block Production" +echo "" +"$SCRIPT_DIR/count-blocks.sh" "$log_dir" +echo "" + +echo "### Consensus Progress" +echo "" +"$SCRIPT_DIR/check-consensus-progress.sh" "$log_dir" +echo "" + +# Calculate overall health +total_errors=0 +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + if [[ "$node" != "devnet" ]]; then + errors=$(grep -i "error" "$f" 2>/dev/null | grep -cvE "manifest unknown|NoFinalizedStateFound|HandshakeTimedOut" 2>/dev/null) || errors=0 + total_errors=$((total_errors + errors)) + fi +done + +echo "---" +if [[ $total_errors -eq 0 ]]; then + echo "**Status: HEALTHY** - No errors detected" + exit 0 +elif [[ $total_errors -lt 50 ]]; then + echo "**Status: WARNINGS** - $total_errors total errors detected" + exit 1 +else + echo "**Status: ISSUES** - $total_errors total errors detected (review recommended)" + exit 2 +fi diff --git a/.claude/skills/devnet-log-review/scripts/check-consensus-progress.sh b/.claude/skills/devnet-log-review/scripts/check-consensus-progress.sh new file mode 100755 index 0000000..1e95868 --- /dev/null +++ b/.claude/skills/devnet-log-review/scripts/check-consensus-progress.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# check-consensus-progress.sh - Show consensus progress per node +# +# Usage: check-consensus-progress.sh [log_dir] +# log_dir: Directory containing *.log files (default: current directory) +# +# Output: Last slot reached per node and proposer slot assignments + +set -euo pipefail + +log_dir="${1:-.}" + +# Strip ANSI escape codes from input +strip_ansi() { + sed 's/\x1b\[[0-9;]*m//g' +} + +# Check if log files exist +shopt -s nullglob +log_files=("$log_dir"/*.log) +if [[ ${#log_files[@]} -eq 0 ]]; then + echo "No .log files found in $log_dir" >&2 + exit 1 +fi + +echo "=== Last Slot Reached ===" +printf "%-20s %12s\n" "Node" "Last Slot" +printf "%-20s %12s\n" "----" "---------" + +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + + # Skip combined devnet.log + if [[ "$node" == "devnet" ]]; then + continue + fi + + # Extract last slot number from log (handles slot=N, slot: N, Slot N, @ N formats) + last_slot=$(strip_ansi < "$f" | grep -oE "slot[=: ][0-9]+|Slot [0-9]+|@ [0-9]+" | grep -oE "[0-9]+" | sort -n | tail -1 || echo "0") + + if [[ -z "$last_slot" ]]; then + last_slot="N/A" + fi + + printf "%-20s %12s\n" "$node" "$last_slot" +done + +echo "" +echo "=== Proposer Slots ===" +echo "(Slots where each node was the proposer)" +echo "" + +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + client="${node%_*}" + + # Skip combined devnet.log + if [[ "$node" == "devnet" ]]; then + continue + fi + + # Extract proposed slots based on client + case "$client" in + zeam) + slots=$(strip_ansi < "$f" | grep "produced block for slot" | grep -oE "slot=[0-9]+" | cut -d= -f2 | tr '\n' ',' | sed 's/,$//') + ;; + ream) + slots=$(strip_ansi < "$f" | grep "Proposing block by Validator" | grep -oE "slot=[0-9]+" | cut -d= -f2 | tr '\n' ',' | sed 's/,$//') + ;; + qlean) + slots=$(strip_ansi < "$f" | grep "Produced block" | grep -oE "@ [0-9]+" | grep -oE "[0-9]+" | tr '\n' ',' | sed 's/,$//') + ;; + ethlambda) + slots=$(strip_ansi < "$f" | grep "Published block to gossipsub" | grep -oE "slot=[0-9]+" | cut -d= -f2 | tr '\n' ',' | sed 's/,$//') + ;; + *) + slots="" + ;; + esac + + if [[ -n "$slots" ]]; then + echo "$node: slots $slots" + else + echo "$node: (no blocks proposed)" + fi +done diff --git a/.claude/skills/devnet-log-review/scripts/count-blocks.sh b/.claude/skills/devnet-log-review/scripts/count-blocks.sh new file mode 100755 index 0000000..5daa0af --- /dev/null +++ b/.claude/skills/devnet-log-review/scripts/count-blocks.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# count-blocks.sh - Count blocks proposed and processed per node +# +# Usage: count-blocks.sh [log_dir] +# log_dir: Directory containing *.log files (default: current directory) +# +# Output: Table with node name, blocks proposed, blocks processed +# Handles client-specific log patterns (zeam, ream, qlean, lantern, ethlambda) + +set -uo pipefail + +log_dir="${1:-.}" + +# Strip ANSI escape codes from input +strip_ansi() { + sed 's/\x1b\[[0-9;]*m//g' +} + +# Safe count function that always returns a number +count_pattern() { + local file="$1" + local pattern="$2" + local result + result=$(strip_ansi < "$file" | grep -cE "$pattern" 2>/dev/null) || result=0 + echo "${result:-0}" +} + +# Check if log files exist +shopt -s nullglob +log_files=("$log_dir"/*.log) +if [[ ${#log_files[@]} -eq 0 ]]; then + echo "No .log files found in $log_dir" >&2 + exit 1 +fi + +# Print header +printf "%-20s %10s %10s\n" "Node" "Proposed" "Processed" +printf "%-20s %10s %10s\n" "----" "--------" "---------" + +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + + # Skip devnet.log - it's a combined log + if [[ "$node" == "devnet" ]]; then + continue + fi + + # Extract client name from node name (e.g., "zeam_0" -> "zeam") + client="${node%_*}" + + proposed=0 + processed=0 + + case "$client" in + zeam) + proposed=$(count_pattern "$f" "produced block for slot") + processed=$(count_pattern "$f" "processed block") + ;; + ream) + # ream logs "Proposing block" when attempting + proposed=$(count_pattern "$f" "Proposing block by Validator") + processed=$(count_pattern "$f" "Processing block built") + ;; + qlean) + # qlean uses "Produced block" or "Gossiped block" + proposed=$(count_pattern "$f" "Produced block|Gossiped block") + processed=$(count_pattern "$f" "Imported block") + ;; + lantern) + # Lantern logs lowercase "published block" for proposals + proposed=$(count_pattern "$f" "[Pp]roduced block|[Gg]ossiped block|[Pp]ublished block") + processed=$(count_pattern "$f" "[Ii]mported block") + ;; + ethlambda) + # ethlambda logs "Published block to gossipsub" once per block + proposed=$(count_pattern "$f" "Published block to gossipsub") + processed=$(count_pattern "$f" "Processed new block") + ;; + lighthouse|grandine) + proposed=$(count_pattern "$f" "[Pp]roduced block|[Pp]ublished block") + processed=$(count_pattern "$f" "[Pp]rocessed block|[Ii]mported block") + ;; + *) + # Unknown client - try generic patterns + proposed=$(count_pattern "$f" "[Pp]roduced block|[Pp]ublished block|[Gg]ossiped block") + processed=$(count_pattern "$f" "[Pp]rocessed block|[Ii]mported block") + ;; + esac + + printf "%-20s %10d %10d\n" "$node" "$proposed" "$processed" +done diff --git a/.claude/skills/devnet-log-review/scripts/count-errors-warnings.sh b/.claude/skills/devnet-log-review/scripts/count-errors-warnings.sh new file mode 100755 index 0000000..5bdfc99 --- /dev/null +++ b/.claude/skills/devnet-log-review/scripts/count-errors-warnings.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# count-errors-warnings.sh - Count errors and warnings per node log file +# +# Usage: count-errors-warnings.sh [log_dir] +# log_dir: Directory containing *.log files (default: current directory) +# +# Output: Table with node name, error count, warning count +# Excludes benign patterns like "manifest unknown", "NoFinalizedStateFound", "TODO" + +set -uo pipefail + +log_dir="${1:-.}" + +# Benign patterns to exclude from counts +BENIGN_ERRORS="manifest unknown|NoFinalizedStateFound|HandshakeTimedOut" +BENIGN_WARNINGS="TODO" + +# Safe count function +count_filtered() { + local file="$1" + local pattern="$2" + local exclude="$3" + local result + result=$(grep -i "$pattern" "$file" 2>/dev/null | grep -cvE "$exclude" 2>/dev/null) || result=0 + echo "${result:-0}" +} + +# Check if log files exist +shopt -s nullglob +log_files=("$log_dir"/*.log) +if [[ ${#log_files[@]} -eq 0 ]]; then + echo "No .log files found in $log_dir" >&2 + exit 1 +fi + +# Print header +printf "%-20s %8s %8s\n" "Node" "Errors" "Warnings" +printf "%-20s %8s %8s\n" "----" "------" "--------" + +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + + # Count errors excluding benign patterns + errors=$(count_filtered "$f" "error" "$BENIGN_ERRORS") + + # Count warnings excluding benign patterns + warnings=$(count_filtered "$f" "warn" "$BENIGN_WARNINGS") + + printf "%-20s %8d %8d\n" "$node" "$errors" "$warnings" +done diff --git a/.claude/skills/devnet-log-review/scripts/show-errors.sh b/.claude/skills/devnet-log-review/scripts/show-errors.sh new file mode 100755 index 0000000..fcc1a18 --- /dev/null +++ b/.claude/skills/devnet-log-review/scripts/show-errors.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# show-errors.sh - Display error details for investigation +# +# Usage: show-errors.sh [options] [log_dir] +# -n NODE Filter to specific node (e.g., "zeam_0") +# -l LIMIT Limit number of errors shown per file (default: 20) +# -w Also show warnings +# log_dir Directory containing *.log files (default: current directory) +# +# Output: Error messages from log files, stripped of ANSI codes + +set -euo pipefail + +# Defaults +node_filter="" +limit=20 +show_warnings=false +log_dir="." + +# Parse options +while getopts "n:l:w" opt; do + case $opt in + n) node_filter="$OPTARG" ;; + l) limit="$OPTARG" ;; + w) show_warnings=true ;; + *) echo "Usage: $0 [-n node] [-l limit] [-w] [log_dir]" >&2; exit 1 ;; + esac +done +shift $((OPTIND-1)) + +# Remaining argument is log_dir +if [[ $# -gt 0 ]]; then + log_dir="$1" +fi + +# Strip ANSI escape codes from input +strip_ansi() { + sed 's/\x1b\[[0-9;]*m//g' +} + +# Build file pattern +if [[ -n "$node_filter" ]]; then + pattern="$log_dir/${node_filter}.log" +else + pattern="$log_dir/*.log" +fi + +# Check if log files exist +shopt -s nullglob +log_files=($pattern) +if [[ ${#log_files[@]} -eq 0 ]]; then + echo "No matching .log files found" >&2 + exit 1 +fi + +for f in "${log_files[@]}"; do + node=$(basename "$f" .log) + + # Skip combined devnet.log unless specifically requested + if [[ "$node" == "devnet" && -z "$node_filter" ]]; then + continue + fi + + echo "=== $node ===" + + # Show errors + error_count=$(strip_ansi < "$f" | grep -ci "error" || echo 0) + echo "Errors ($error_count total, showing first $limit):" + strip_ansi < "$f" | grep -i "error" | head -"$limit" + + # Optionally show warnings + if $show_warnings; then + echo "" + warning_count=$(strip_ansi < "$f" | grep -ci "warn" || echo 0) + echo "Warnings ($warning_count total, showing first $limit):" + strip_ansi < "$f" | grep -i "warn" | head -"$limit" + fi + + echo "" +done diff --git a/.claude/skills/devnet-runner/SKILL.md b/.claude/skills/devnet-runner/SKILL.md new file mode 100644 index 0000000..ca8ac50 --- /dev/null +++ b/.claude/skills/devnet-runner/SKILL.md @@ -0,0 +1,395 @@ +--- +name: devnet-runner +description: Manage local development networks for lean consensus testing. Use when users want to (1) Configure a devnet with validator nodes, (2) Start/stop devnet nodes, (3) Regenerate genesis files, (4) Collect and dump node logs to files, (5) Troubleshoot devnet issues. +--- + +# Devnet Runner + +Manage local development networks for lean consensus testing. + +## Prerequisites + +The `lean-quickstart` directory must exist at the repo root. If missing: +```bash +make lean-quickstart +``` + +## Default Behavior + +When starting a devnet, **always**: +1. **Update validator config** - Edit `lean-quickstart/local-devnet/genesis/validator-config.yaml` to include ONLY the nodes that will run. Remove entries for nodes that won't be started (unless the user explicitly asks to keep them). This is critical because validator indices are assigned to ALL nodes in the config - if a node is in the config but not running, its validators will miss their proposer slots. +2. **Update client image tags** - If the user specifies a tag (e.g., "use devnet1 tag"), edit the relevant `lean-quickstart/client-cmds/{client}-cmd.sh` file to update the `node_docker` image tag. +3. **Use run-devnet-with-timeout.sh** - This script runs all nodes in the config with a timeout, dumps logs, then stops them. Do NOT use `--node ` to select nodes - this does not reassign validators. +4. Run for **20 slots** unless the user specifies otherwise +5. The script automatically dumps all node logs to `.log` files in the repo root and stops the nodes when the timeout expires + +**Important:** Only use `--node ` (e.g., `--node zeam_0,ream_0`) if the user explicitly requests it. This flag starts only the specified nodes but does NOT reassign their validators, causing missed slots. + +This ensures consistent test runs, clean logs without spurious warnings, and captured output for debugging. + +## Timing Calculation + +Total timeout = startup buffer + genesis offset + (slots × 4 seconds) + +| Component | Local Mode | Ansible Mode | +|-----------|------------|--------------| +| Startup buffer | 10s | 10s | +| Genesis offset | 30s | 360s | +| Per slot | 4s | 4s | + +**Examples (local mode):** +- 20 slots: 10 + 30 + (20 × 4) = **120s** +- 50 slots: 10 + 30 + (50 × 4) = **240s** +- 100 slots: 10 + 30 + (100 × 4) = **440s** + +## Quick Start (Default Workflow) + +**Step 1: Configure nodes** - Edit `lean-quickstart/local-devnet/genesis/validator-config.yaml` to keep only the nodes you want to run. Remove all other validator entries. This is critical because validator indices are assigned based on all nodes in the config - if a node is in the config but not running, its validators will miss their slots. + +**Step 2: Update image tags (if needed)** - Edit `lean-quickstart/client-cmds/{client}-cmd.sh` to change the Docker image tag in `node_docker`. + +**Step 3: Run the devnet** +```bash +# Start devnet with fresh genesis, capture logs directly (20 slots = 120s) +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 120 + +# Stop any remaining nodes (cleanup) +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --stop 2>/dev/null || true +``` + +## Manual Commands + +All `spin-node.sh` commands must be run from within `lean-quickstart/`: + +```bash +# Stop all nodes +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --stop + +# Run for custom duration (e.g., 50 slots = 240s with genesis offset) +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 240 + +# Start without timeout (press Ctrl+C to stop) +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis +``` + +### Using --node to Select Specific Nodes (Advanced) + +**WARNING:** Only use `--node ` if the user explicitly requests it. This flag does NOT reassign validators - nodes not selected will still have validators assigned to them in the genesis, causing missed slots. + +```bash +# Only use if explicitly requested by user +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node zeam_0,ream_0 +``` + +For normal operation, always modify `validator-config.yaml` to include only the nodes you want, then use `run-devnet-with-timeout.sh` or `--node all`. + +## Command-Line Flags + +| Flag | Description | +|------|-------------| +| `--node ` | **Required.** Node(s) to start. Use `all` to start all nodes in config. **Note:** Using specific node names (e.g., `--node zeam_0,ream_0`) does NOT reassign validators - use only if explicitly requested | +| `--generateGenesis` | Regenerate genesis files. Implies `--cleanData` | +| `--cleanData` | Clean data directories before starting | +| `--stop` | Stop running nodes instead of starting them | +| `--forceKeyGen` | Force regeneration of hash-sig validator keys | +| `--validatorConfig ` | Custom config path (default: `$NETWORK_DIR/genesis/validator-config.yaml`) | +| `--dockerWithSudo` | Run docker commands with `sudo` | + +## Changing Docker Image Tags + +To use a specific tag for certain clients, edit the `lean-quickstart/client-cmds/{client}-cmd.sh` files before running. + +**Example:** Change zeam from `devnet1` to `local`: +```bash +# In lean-quickstart/client-cmds/zeam-cmd.sh, find: +node_docker="--security-opt seccomp=unconfined blockblaz/zeam:devnet1 node \ + +# Change to: +node_docker="--security-opt seccomp=unconfined blockblaz/zeam:local node \ +``` + +**Current default tags:** +| Client | Image | Default Tag | +|--------|-------|-------------| +| zeam | blockblaz/zeam | devnet1 | +| ream | ghcr.io/reamlabs/ream | latest | +| ethlambda | ghcr.io/lambdaclass/ethlambda | local | +| qlean | qdrvm/qlean-mini | 3a96a1f | +| lantern | piertwo/lantern | v0.0.1 | +| lighthouse | hopinheimer/lighthouse | latest | +| grandine | sifrai/lean | unstable | + +## Configuration Workflow + +### Validator Config File Structure + +The config file is at `lean-quickstart/local-devnet/genesis/validator-config.yaml`. This is the **single source of truth** for all node configurations. + +**Important:** Only include clients that will actually run in the devnet. If a configured validator is offline from the start, it will miss its proposer slots and affect consensus progress. Only include offline validators if you specifically want to test behavior with missing nodes. + +**Full schema:** +```yaml +shuffle: roundrobin # Proposer selection algorithm (roundrobin = deterministic turns) +deployment_mode: local # 'local' (localhost) or 'ansible' (remote servers) + +config: + activeEpoch: 18 # Log2 of active signing epochs for hash-sig keys (2^18) + keyType: "hash-sig" # Post-quantum signature scheme + +validators: + - name: "zeam_0" # Node identifier: _ + privkey: "bdf953adc..." # 64-char hex P2P private key (libp2p identity) + enrFields: + ip: "127.0.0.1" # Node IP (127.0.0.1 for local, real IP for ansible) + quic: 9001 # QUIC/UDP port for P2P communication + metricsPort: 8081 # Prometheus metrics endpoint port + count: 1 # Number of validator indices assigned to this node +``` + +**Field reference:** + +| Field | Required | Description | +|-------|----------|-------------| +| `shuffle` | Yes | Proposer selection algorithm. Use `roundrobin` for deterministic turn-based proposing | +| `deployment_mode` | Yes | `local` or `ansible` - determines genesis time offset and config directory | +| `config.activeEpoch` | Yes | Exponent for hash-sig active epochs (e.g., 18 means 2^18 signatures per period) | +| `config.keyType` | Yes | Always `hash-sig` for post-quantum support | +| `name` | Yes | Format: `_`. Client name determines which `client-cmds/*.sh` script runs | +| `privkey` | Yes | 32-byte hex string (64 chars). Used for P2P identity and ENR generation | +| `enrFields.ip` | Yes | IP address. Use `127.0.0.1` for local, real IPs for ansible | +| `enrFields.quic` | Yes | QUIC port. Must be unique per node in local mode | +| `metricsPort` | Yes | Prometheus metrics port. Must be unique per node in local mode | +| `count` | Yes | Number of validator indices. Sum of all counts = total validators | + +### Adding a New Validator Node + +1. **Choose a unique node name** following `_` convention: + ``` + zeam_0, zeam_1, ream_0, qlean_0, lantern_0, lighthouse_0, grandine_0, ethlambda_0 + ``` + +2. **Generate a P2P private key** (64-char hex): + ```bash + openssl rand -hex 32 + ``` + +3. **Assign unique ports** (for local mode): + - QUIC: 9001, 9002, 9003... (increment for each node) + - Metrics: 8081, 8082, 8083... (increment for each node) + +4. **Add the entry to `lean-quickstart/local-devnet/genesis/validator-config.yaml`:** + ```yaml + validators: + # ... existing nodes ... + + - name: "newclient_0" + privkey: "" + enrFields: + ip: "127.0.0.1" # Use real IP for ansible + quic: 9008 # Next available port + metricsPort: 8088 # Next available port + count: 1 + ``` + +5. **Regenerate genesis with new keys:** + ```bash + cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis --forceKeyGen + ``` + +### Removing a Validator Node + +1. **Delete the node entry** from `lean-quickstart/local-devnet/genesis/validator-config.yaml` + +2. **Regenerate genesis** (required because genesis state must reflect new validator set): + ```bash + cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis + ``` + Note: `--forceKeyGen` is NOT needed when removing - existing keys for remaining indices are reused. + +### Port Allocation Guide (Local Mode) + +When running multiple nodes locally, each needs unique ports: + +| Node | QUIC Port | Metrics Port | +|------|-----------|--------------| +| zeam_0 | 9001 | 8081 | +| ream_0 | 9002 | 8082 | +| qlean_0 | 9003 | 8083 | +| lantern_0 | 9004 | 8084 | +| lighthouse_0 | 9005 | 8085 | +| grandine_0 | 9006 | 8086 | +| ethlambda_0 | 9007 | 8087 | + +For **ansible mode**, all nodes can use the same ports (9001, 8081) since they run on different machines. + +### Local vs Ansible Deployment + +| Aspect | Local | Ansible | +|--------|-------|---------| +| Config file | `lean-quickstart/local-devnet/genesis/validator-config.yaml` | `lean-quickstart/ansible-devnet/genesis/validator-config.yaml` | +| `deployment_mode` | `local` | `ansible` | +| IP addresses | `127.0.0.1` for all | Real server IPs | +| Ports | Must be unique per node | Same port, different machines | +| Genesis offset | +30 seconds | +360 seconds | + +## Node Lifecycle Commands + +### Start Nodes + +**Preferred method:** Use `run-devnet-with-timeout.sh` after configuring `validator-config.yaml`: +```bash +# Edit lean-quickstart/local-devnet/genesis/validator-config.yaml to include only nodes you want, then: +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 120 +``` + +**Alternative (no timeout):** +```bash +# All nodes in config +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all + +# Fresh start with new genesis +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis +``` + +**Advanced (only if explicitly requested):** Start specific nodes without modifying config. Note: validators will NOT be reassigned. +```bash +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node zeam_0,ream_0 +``` + +### Stop Nodes +```bash +# Via script +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --stop + +# Or press Ctrl+C in the terminal running spin-node.sh +``` + +### Clean and Restart +```bash +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --cleanData +``` + +## Log Collection + +### View Live Logs +```bash +docker logs zeam_0 # View current logs +docker logs -f zeam_0 # Follow/stream logs +``` + +### Dump Logs to Files + +**Automatic:** When using `run-devnet-with-timeout.sh`, logs are automatically dumped to `.log` files in the repo root before stopping. + +**Single node (manual):** +```bash +docker logs zeam_0 > zeam_0.log 2>&1 +``` + +**All running nodes (manual):** +```bash +for node in $(docker ps --format '{{.Names}}' | grep -E '^(zeam|ream|qlean|lantern|lighthouse|grandine|ethlambda)_'); do + docker logs "$node" > "${node}.log" 2>&1 +done +``` + +**Follow and save simultaneously:** +```bash +docker logs -f zeam_0 2>&1 | tee zeam_0.log +``` + +**With timestamps:** +```bash +docker logs -t zeam_0 > zeam_0.log 2>&1 +``` + +### Data Directory Logs + +Client-specific data and file-based logs are stored at: +``` +lean-quickstart/local-devnet/data// +``` +Example: `lean-quickstart/local-devnet/data/zeam_0/` + +## Common Troubleshooting + +### Nodes Won't Start + +1. Check if containers are already running: + ```bash + docker ps | grep -E 'zeam|ream|qlean|lantern|lighthouse|grandine|ethlambda' + ``` +2. Stop existing nodes first: + ```bash + cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --stop + ``` + +### Nodes Not Finding Peers + +1. Verify all nodes are using the same genesis: + ```bash + cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis + ``` +2. Check `nodes.yaml` was generated with correct ENR records + +### Genesis Mismatch Errors + +Regenerate genesis for all nodes: +```bash +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis --forceKeyGen +``` + +### Port Conflicts + +Check if ports are in use: +```bash +lsof -i :9001 # Check QUIC port +lsof -i :8081 # Check metrics port +``` + +Update ports in `lean-quickstart/local-devnet/genesis/validator-config.yaml` if needed. + +### Docker Permission Issues + +Run with sudo: +```bash +cd lean-quickstart && NETWORK_DIR=local-devnet ./spin-node.sh --node all --dockerWithSudo +``` + +### Stale Containers Cause Genesis Mismatch + +If you see `UnknownSourceBlock` or `OutOfMemory` deserialization errors, a container from a previous run may still be running with old genesis. + +**Fix:** Always clean up before starting a new devnet: +```bash +docker rm -f zeam_0 ethlambda_0 ream_0 qlean_0 lantern_0 grandine_0 2>/dev/null +``` + +Or use `run-devnet-with-timeout.sh` which handles cleanup automatically. + +### Time-Based Stop + +Use the `run-devnet-with-timeout.sh` script for timed runs. Remember to include genesis offset (30s local, 360s ansible) + startup buffer (10s): + +```bash +# 20 slots: 10 + 30 + 80 = 120s +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 120 + +# 50 slots: 10 + 30 + 200 = 240s +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 240 + +# 100 slots: 10 + 30 + 400 = 440s +.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh 440 +``` + +**Formula:** duration = 10 + 30 + (slots × 4) seconds (local mode) + +## Scripts + +| Script | Description | +|--------|-------------| +| `scripts/run-devnet-with-timeout.sh ` | Run devnet for specified duration, dump logs to repo root, then stop | + +## Reference + +See `references/clients.md` for client-specific details (images, ports, configurations). diff --git a/.claude/skills/devnet-runner/references/clients.md b/.claude/skills/devnet-runner/references/clients.md new file mode 100644 index 0000000..bce5f3a --- /dev/null +++ b/.claude/skills/devnet-runner/references/clients.md @@ -0,0 +1,129 @@ +# Client Reference + +Supported lean consensus clients and their configurations. + +## Supported Clients + +| Client | Description | +|--------|-------------| +| zeam | Zig-based lean consensus client | +| ream | Rust-based lean consensus client | +| qlean | QEMU/Kagome lean implementation | +| lantern | PierTwo's lean consensus client | +| lighthouse | Rust Ethereum consensus client (lean fork) | +| grandine | High-performance consensus client | +| ethlambda | LambdaClass Rust implementation | + +## Docker Images + +Images are defined in `client-cmds/{client}-cmd.sh`. Edit the `node_docker` variable to change image/tag. + +| Client | Default Image | +|--------|---------------| +| zeam | `blockblaz/zeam:devnet1` | +| ream | `ghcr.io/reamlabs/ream:latest` | +| qlean | `qdrvm/qlean-mini:3a96a1f` | +| lantern | `piertwo/lantern:v0.0.1` | +| lighthouse | `hopinheimer/lighthouse:latest` | +| grandine | `sifrai/lean:unstable` | +| ethlambda | `ghcr.io/lambdaclass/ethlambda:local` | + +## Default Ports + +Ports are configured per-node in `validator-config.yaml`. Typical port assignments: + +| Node | QUIC Port | Metrics Port | +|------|-----------|--------------| +| *_0 | 9001 | 8081 | +| *_1 | 9002 | 8082 | +| *_2 | 9003 | 8083 | + +**Note:** Adjust ports to avoid conflicts when running multiple nodes. + +## Client-Specific Configuration Notes + +### zeam + +- Image: `blockblaz/zeam:devnet1` +- Native Zig implementation +- Command file: `client-cmds/zeam-cmd.sh` + +### ream + +- Image: `ghcr.io/reamlabs/ream:latest` +- Rust implementation by Ream Labs +- Command file: `client-cmds/ream-cmd.sh` + +### qlean + +- Image: `qdrvm/qlean-mini:3a96a1f` +- Uses specific commit hash for stability +- Command file: `client-cmds/qlean-cmd.sh` + +### lantern + +- Image: `piertwo/lantern:v0.0.1` +- PierTwo implementation +- Command file: `client-cmds/lantern-cmd.sh` + +### lighthouse + +- Image: `hopinheimer/lighthouse:latest` +- Fork of the standard Rust Lighthouse client +- Command file: `client-cmds/lighthouse-cmd.sh` + +### grandine + +- Image: `sifrai/lean:unstable` +- High-performance client by Sifrai +- Command file: `client-cmds/grandine-cmd.sh` + +### ethlambda + +- Image: `ghcr.io/lambdaclass/ethlambda:local` +- Rust implementation by LambdaClass +- Command file: `client-cmds/ethlambda-cmd.sh` + +## Changing Docker Images + +To use a different image or tag: + +1. **Temporary (single run):** Use `--tag` flag: + ```bash + NETWORK_DIR=local-devnet ./spin-node.sh --node zeam_0 --tag dev + ``` + +2. **Permanent:** Edit `client-cmds/{client}-cmd.sh` and modify `node_docker`: + ```bash + node_docker="your-registry/image:tag" + ``` + +## Known Issues & Compatibility + +### zeam + +| Issue | Image Tags Affected | Description | +|-------|---------------------|-------------| +| CLI flag change | devnet2+ | Uses `--api-port` instead of `--metrics_port` for metrics endpoint | +| XMSS prover crash | devnet2 | Missing prover setup files cause panic when producing blocks with signature aggregation | +| Block format incompatibility | devnet2 ↔ ethlambda:local | Cannot deserialize blocks from ethlambda - OutOfMemory error | + +### ethlambda + +| Issue | Image Tags Affected | Description | +|-------|---------------------|-------------| +| Manifest unknown warning | local | Docker shows "manifest unknown" but falls back to local image - can be ignored | +| NoPeersSubscribedToTopic | all | Expected warning when no peers are connected to gossipsub topics | + +## Environment Variables Available to Clients + +These are set by `spin-node.sh` and available in client command scripts: + +| Variable | Description | +|----------|-------------| +| `$item` | Node name (e.g., `zeam_0`) | +| `$configDir` | Genesis config directory path | +| `$dataDir` | Data directory path | +| `$quicPort` | QUIC port from config | +| `$metricsPort` | Metrics port from config | +| `$privkey` | P2P private key | diff --git a/.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh b/.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh new file mode 100755 index 0000000..b30e8ac --- /dev/null +++ b/.claude/skills/devnet-runner/scripts/run-devnet-with-timeout.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Run devnet for a specified number of seconds, dump logs before stopping +# +# Usage: ./run-devnet-with-timeout.sh +# Must be run from the ethlambda repo root (where lean-quickstart/ is) + +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)" +QUICKSTART_DIR="$REPO_ROOT/lean-quickstart" + +if [ ! -d "$QUICKSTART_DIR" ]; then + echo "Error: lean-quickstart not found at $QUICKSTART_DIR" + echo "Run 'make lean-quickstart' first to clone it." + exit 1 +fi + +cd "$QUICKSTART_DIR" +NETWORK_DIR=local-devnet ./spin-node.sh --node all --generateGenesis 2>&1 | tee "$REPO_ROOT/devnet.log" & +PID=$! +sleep "$1" + +# Dump logs from all running node containers before stopping +echo "Dumping node logs..." +for node in $(docker ps --format '{{.Names}}' | grep -E '^(zeam|ream|qlean|lantern|lighthouse|grandine|ethlambda)_'); do + docker logs "$node" > "$REPO_ROOT/${node}.log" 2>&1 + echo " Dumped ${node}.log" +done + +kill $PID 2>/dev/null +wait $PID 2>/dev/null