From 9129203bb6820d786b05a30402790df5013920e3 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Mon, 23 Feb 2026 18:38:42 -0500 Subject: [PATCH 1/5] Add content pipeline skills and publish-analysis command - blog-pipeline-skill: results.json schema, template syntax, policyengine.py simulation patterns, chart generation, source tracking, and SEO output - us-household-analysis-skill: US household-level impact patterns including calculate_household_impact(), situation dicts, income sweeps, reform comparisons, and results.json generation for case studies - uk-household-analysis-skill: UK household-level impact patterns including benefit unit structure, would_claim flags, UC taper analysis, regional variation, and Scottish tax differences - publish-analysis command: 9-phase workflow from topic to published post with pre-flight checklist, verification table, and error handling - Register all new skills/command in analysis-tools, content, and complete plugins Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 30 +- commands/publish-analysis.md | 348 +++++++++++ skills/content/blog-pipeline-skill/SKILL.md | 524 ++++++++++++++++ .../uk-household-analysis-skill/SKILL.md | 584 ++++++++++++++++++ .../us-household-analysis-skill/SKILL.md | 458 ++++++++++++++ 5 files changed, 1936 insertions(+), 8 deletions(-) create mode 100644 commands/publish-analysis.md create mode 100644 skills/content/blog-pipeline-skill/SKILL.md create mode 100644 skills/content/uk-household-analysis-skill/SKILL.md create mode 100644 skills/content/us-household-analysis-skill/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 51ce065..d77681c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -161,13 +161,16 @@ "description": "Policy analysis and research - impact studies, dashboards, notebooks, and visualizations", "source": "./", "category": "analysis", - "version": "3.9.0", - "keywords": ["analysis", "research", "policy", "impact", "streamlit", "plotly", "notebooks"], + "version": "3.10.0", + "keywords": ["analysis", "research", "policy", "impact", "streamlit", "plotly", "notebooks", "blog", "pipeline"], "author": { "name": "PolicyEngine", "url": "https://github.com/PolicyEngine" }, "license": "MIT", + "commands": [ + "./commands/publish-analysis.md" + ], "skills": [ "./skills/documentation/policyengine-user-guide-skill", "./skills/tools-and-apis/policyengine-python-client-skill", @@ -180,7 +183,10 @@ "./skills/data-science/microdf-skill", "./skills/documentation/policyengine-design-skill", "./skills/documentation/policyengine-writing-skill", - "./skills/documentation/policyengine-research-lookup-skill" + "./skills/documentation/policyengine-research-lookup-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill" ] }, { @@ -212,11 +218,11 @@ }, { "name": "content", - "description": "Content generation - social images and posts from blog articles", + "description": "Content pipeline - blog post analysis, social images, and distribution from policy reforms", "source": "./", "category": "marketing", - "version": "3.4.0", - "keywords": ["content", "social", "marketing", "images"], + "version": "3.10.0", + "keywords": ["content", "social", "marketing", "images", "blog", "pipeline", "analysis"], "author": { "name": "PolicyEngine", "url": "https://github.com/PolicyEngine" @@ -226,10 +232,14 @@ "./agents/content/content-orchestrator.md" ], "commands": [ - "./commands/generate-content.md" + "./commands/generate-content.md", + "./commands/publish-analysis.md" ], "skills": [ - "./skills/content/content-generation-skill" + "./skills/content/content-generation-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill" ] }, { @@ -283,6 +293,7 @@ "./commands/create-pr.md", "./commands/encode-policy.md", "./commands/generate-content.md", + "./commands/publish-analysis.md", "./commands/review-pr.md", "./commands/fix-pr.md", "./commands/new-tool.md" @@ -323,6 +334,9 @@ "./skills/documentation/policyengine-research-lookup-skill", "./skills/documentation/policyengine-plugin-maintenance-skill", "./skills/content/content-generation-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill", "./skills/technical-patterns/seo-checklist-skill" ] } diff --git a/commands/publish-analysis.md b/commands/publish-analysis.md new file mode 100644 index 0000000..1b66dbe --- /dev/null +++ b/commands/publish-analysis.md @@ -0,0 +1,348 @@ +--- +name: publish-analysis +description: End-to-end blog post pipeline - from research question to published, distributed post with traceable numbers +arguments: + - name: topic + description: Research question, reform description, or bill reference (e.g., "SALT cap repeal" or "HR 1234") + required: true + - name: country + description: Country code (us or uk) + default: "us" + - name: year + description: Analysis year + default: "2026" +--- + +# Publish Analysis: $ARGUMENTS + +Generate a complete, SEO-optimized blog post from a policy reform — all numbers traceable to code, zero hard-coded values. Uses `policyengine.py` for local simulation. + +## Prerequisites + +Load these skills before starting: +- `blog-pipeline` — results.json schema, template syntax, policyengine.py patterns +- `policyengine-writing-skill` — neutral tone, active voice, PE style +- `policyengine-analysis-skill` — simulation patterns, chart types +- `content-generation-skill` — social images and copy + +--- + +## Pre-Flight Checklist + +Before starting: +- [ ] I will use `policyengine.py` for all simulations (not API, not policyengine-us directly) +- [ ] I will generate results.json with `source_line` and `source_url` for every value +- [ ] I will use `{{}}` template references — zero hard-coded numbers in the blog post +- [ ] I will follow policyengine-writing-skill for neutral tone and active voice +- [ ] I will generate descriptive alt text with 2-3 key data points for every chart +- [ ] I will ask the user before creating any GitHub repos or PRs + +--- + +## Key Rules + +1. **Zero hard-coded values**: Every number in the blog post comes from results.json via `{{}}` templates +2. **Every number is traceable**: `source_line` and `source_url` in results.json point to the exact code +3. **Neutral language**: Describe what policies do, not whether they are good or bad (see policyengine-writing-skill) +4. **No iframes**: Charts are static `` tags from GitHub Pages with descriptive alt text +5. **Active voice**: "The reform reduces poverty by 3.2%" not "Poverty is reduced by 3.2%" +6. **Quantitative precision**: "$15.2 billion" not "significant cost" +7. **Sentence case headings**: "Budgetary impact" not "Budgetary Impact" +8. **Show calculations**: Spell out how derived values are computed + +--- + +## Phase 1: Define the Reform + +1. **Parse the topic** — identify what policy change to analyze +2. **Ask clarifying questions** if needed: + - What specific parameters change? + - What is the baseline (current law, TCJA extension, etc.)? + - What year to analyze? + - US or UK? +3. **Identify the PE parameter paths** for the reform: + ```python + from policyengine.tax_benefit_models.{country} import {country}_latest + # Search parameter names matching the topic + ``` + +--- + +## Phase 2: Create Analysis Repo + +1. **Create a new GitHub repo** under PolicyEngine org: + ```bash + gh repo create PolicyEngine/{topic-slug} --public --clone + cd {topic-slug} + ``` + +2. **Create the repo structure:** + ``` + analysis.py # Full simulation + results.json generation + results.json # Generated by analysis.py + charts/ # Generated by analysis.py + requirements.txt # policyengine, plotly, kaleido + README.md # How to reproduce + .github/workflows/pages.yml # Chart deployment to GitHub Pages + ``` + +3. **Write requirements.txt:** + ``` + policyengine>=0.1.0 + plotly>=5.15.0 + kaleido>=0.2.1 + ``` + +4. **Write .github/workflows/pages.yml** for auto-deploying charts to GitHub Pages. + +5. **Enable GitHub Pages** on the repo (Settings > Pages > GitHub Actions). + +--- + +## Phase 3: Write and Run analysis.py + +Write a Python script using `policyengine.py` that: +1. Defines the reform using `Policy`, `Parameter`, `ParameterValue` +2. Loads the dataset (`PolicyEngineUSDataset` or `PolicyEngineUKDataset`) +3. Runs baseline and reform simulations via `Simulation.run()` +4. Computes all outputs using built-in classes (`DecileImpact`, `Poverty`, `Aggregate`, `ChangeAggregate`) +5. Generates chart PNGs using Plotly with PE brand colors +6. Writes results.json with source line tracking via `inspect` + +### analysis.py structure: + +```python +import json, os, inspect, datetime +from policyengine.core import Simulation, Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.us import PolicyEngineUSDataset, us_latest +from policyengine.outputs.aggregate import Aggregate, AggregateType +from policyengine.outputs.decile_impact import DecileImpact +from policyengine.outputs.poverty import Poverty +from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType +import plotly.graph_objects as go +import plotly.io as pio + +REPO = "PolicyEngine/{topic-slug}" +YEAR = 2026 + +# 1. Define reform +param = Parameter(name="...", tax_benefit_model_version=us_latest, data_type=float) +pv = ParameterValue(parameter=param, start_date=datetime.date(2026,1,1), + end_date=datetime.date(2026,12,31), value=NEW_VALUE) +policy = Policy(name="Reform Name", parameter_values=[pv]) + +# 2. Load dataset +dataset = PolicyEngineUSDataset(name="enhanced_cps_2024", + filepath="path/to/enhanced_cps_2024.h5", year=YEAR) + +# 3. Run simulations +baseline = Simulation(dataset=dataset, tax_benefit_model_version=us_latest) +baseline.run() + +reform = Simulation(dataset=dataset, tax_benefit_model_version=us_latest, policy=policy) +reform.run() + +# 4. Extract results using built-in output classes +decile = DecileImpact(baseline_simulation=baseline, reform_simulation=reform, + variable="household_net_income") +decile.run() + +poverty = Poverty(baseline_simulation=baseline, reform_simulation=reform) +poverty.run() + +# 5. Direct variable access for custom analysis +baseline_net = baseline.output_dataset.data.household["household_net_income"] +reform_net = reform.output_dataset.data.household["household_net_income"] +weights = baseline.output_dataset.data.household["household_weight"] + +# 6. Generate charts +fig = go.Figure(...) +os.makedirs("charts", exist_ok=True) +pio.write_image(fig, "charts/distributional.png", width=1200, height=600, scale=2) + +# 7. Build results.json with source tracking +results = { + "metadata": {"title": "...", "repo": REPO, ...}, + "values": { + "budget_impact": { + "value": budget_impact, + "display": format_currency(budget_impact), + "source_line": inspect.currentframe().f_lineno, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", + } + }, + "tables": {...}, + "charts": {...}, +} +with open("results.json", "w") as f: + json.dump(results, f, indent=2) +``` + +### Run the script: +```bash +pip install -r requirements.txt +python analysis.py +``` + +### Verify outputs: +- `results.json` exists with all expected keys +- `charts/*.png` files exist and look correct +- All `source_line` values point to real code lines + +--- + +## Phase 4: Write Blog Post + +1. **Draft the blog post** as a markdown file using `{{}}` template references: + + ```markdown + The [reform name] would [verb] {{budget_impact}} per year. + + ## Budgetary impact + + {{table:distributional}} + + {{chart:distributional}} + + ## Household examples + + {{table:household_examples}} + + ## Poverty impact + + The reform would change the poverty rate by {{poverty_change}}. + + ## Methodology + + This analysis uses PolicyEngine's microsimulation model with the + Enhanced CPS 2024 dataset. All calculations are open source and + reproducible. [View the analysis code](https://github.com/REPO). + ``` + +2. **Follow the writing skill rules:** + + **✅ Correct (neutral, active, quantitative):** + ``` + Repealing the SALT cap costs {{budget_impact}} in {{year}}. + The top income decile receives {{top_decile_share}} of total benefits. + ``` + + **❌ Wrong (value judgments, passive, vague):** + ``` + Repealing the SALT cap would unfortunately cost a significant amount. + The wealthiest households receive a disproportionate share of benefits. + ``` + +3. **Write chart alt text** — descriptive, includes key data points, 1-3 sentences. + +4. **Validate all references** — every `{{name}}` in the markdown must exist in results.json. + +--- + +## Phase 5: Create Posts Entry + +Add an entry to `posts.json` in policyengine-app-v2: + +```json +{ + "title": "...", + "description": "...", + "date": "YYYY-MM-DD", + "tags": ["{country}", "policy"], + "authors": ["..."], + "filename": "{topic-slug}.md", + "image": "{topic-slug}.png", + "analysis_repo": "PolicyEngine/{topic-slug}" +} +``` + +The `analysis_repo` field triggers the resolve-posts build step. + +--- + +## Phase 6: Generate Social Content + +Use the content-generation skill to create: + +1. **Social sharing image** (1200x630) using the social-image template +2. **Twitter/X post** — key finding + image + link +3. **LinkedIn post** — more context, professional tone + +Social copy must follow the same neutral tone as the blog post: + +**✅ Correct:** +``` +Repealing the SALT cap would cost $15.2 billion in 2026. +The top income decile receives 42% of total benefits. + +Full analysis: [link] +``` + +**❌ Wrong:** +``` +BREAKING: SALT cap repeal is a massive giveaway to the wealthy! +This shocking analysis reveals who really benefits. +``` + +--- + +## Phase 7: Create PRs + +### Analysis repo +```bash +cd {topic-slug} +git add . +git commit -m "Add {topic} analysis with results.json and charts" +git push origin main +``` + +### Blog post PR (policyengine-app-v2) +Use `/create-pr` command for proper PR creation with CI check waiting. + +--- + +## Phase 8: Verify + +Before marking as done, run through this checklist: + +| Check | How to verify | +|-------|---------------| +| All `{{}}` refs resolve | Search markdown for `{{` — each must match a key in results.json | +| Charts load | curl each GitHub Pages chart URL — expect 200 | +| Alt text is descriptive | Each alt starts with chart type and includes 2-3 data points | +| No hard-coded numbers | Search markdown for raw digits — each should be inside `{{}}` | +| Neutral language | No "unfortunately", "significant", "massive", "dramatic" | +| Active voice | No "is reduced by", "are projected by" | +| Sentence case headings | No title case in H2/H3 headers | +| Source links work | `source_url` values return 200, point to correct lines | +| Methodology section | Specifies model version, dataset, year, and assumptions | + +--- + +## Phase 9: Distribution Checklist + +After merge and deploy: + +- [ ] Post to Twitter/X with key finding + image +- [ ] Post to LinkedIn with key finding + image +- [ ] Send to newsletter list (if applicable) +- [ ] Direct outreach to bill sponsors (if bill analysis) +- [ ] Pitch to relevant reporters +- [ ] Log in CRM +- [ ] Confirm GA4 events firing + +--- + +## Error Handling + +| Problem | Cause | Fix | +|---------|-------|-----| +| Dataset not found | HDF5 file not available locally | Download from HuggingFace: `hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5` | +| Memory issues | Microsimulation loads ~60k households | Ensure 8GB+ RAM available. Use `simulation.ensure()` for caching | +| Chart generation fails | kaleido not installed | `pip install kaleido` or note in results.json that charts need manual generation | +| Unresolvable `{{ref}}` | Key mismatch between markdown and results.json | Fix spelling or add missing key to results.json | +| Stale source lines | Code changed after generating results.json | Re-run analysis.py to regenerate results.json | + +--- + +Start by parsing the topic, then proceed through all phases. diff --git a/skills/content/blog-pipeline-skill/SKILL.md b/skills/content/blog-pipeline-skill/SKILL.md new file mode 100644 index 0000000..ffabef0 --- /dev/null +++ b/skills/content/blog-pipeline-skill/SKILL.md @@ -0,0 +1,524 @@ +--- +name: blog-pipeline +description: End-to-end blog post pipeline - results.json schema, template syntax, policyengine.py local simulation, chart generation, and SEO-optimized publishing +--- + +# Blog Post Pipeline + +How to produce a fully traceable, SEO-optimized blog post from a policy reform using `policyengine.py` for local simulation. + +## For Users + +### What This Pipeline Does + +Every PolicyEngine blog post follows a strict pipeline: + +1. **Agent runs simulations locally** using `policyengine.py` — full microsimulation, all variables accessible +2. **Agent generates results.json** with every value, table, and chart traceable to its source +3. **Agent writes a blog post** using `{{value}}` template references — zero hard-coded numbers +4. **Build step resolves templates** at deploy time by fetching results.json from GitHub +5. **Output is fully indexable** — text with source links, `` charts with alt text, HTML tables + +### Core Principles + +1. **Zero hard-coded values** — every number comes from results.json +2. **Every number is traceable** — click any number to see the code line that produced it +3. **No iframes** — charts are static `` from GitHub Pages with descriptive alt text +4. **No computation in posts** — blog posts are a presentation layer only +5. **Auto-updating on deploy** — resolve-posts fetches latest results.json automatically +6. **Neutral language** — active voice, quantitative precision, no value judgments (see policyengine-writing-skill) + +--- + +## For Analysts + +### results.json Schema + +Every analysis produces a `results.json` file. This is the contract between the analysis and the blog post. + +```json +{ + "metadata": { + "title": "SALT Cap Repeal Impact Analysis", + "repo": "PolicyEngine/salt-cap-analysis", + "commit": "a1b2c3d", + "generated_at": "2026-02-23T14:30:00Z", + "policyengine_version": "0.1.0", + "dataset": "enhanced_cps_2024", + "country_id": "us", + "year": 2026 + }, + "values": { + "budget_impact": { + "value": -15200000000, + "display": "$15.2 billion", + "source_line": 47, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L47" + } + }, + "tables": { + "distributional": { + "title": "Distributional Impact by Income Decile", + "headers": ["Decile", "Avg Change", "% Affected"], + "rows": [["Bottom 10%", "+$340", "12%"], ["Top 10%", "+$8,200", "89%"]], + "source_line": 82, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L82" + } + }, + "charts": { + "distributional": { + "url": "https://PolicyEngine.github.io/salt-cap-analysis/charts/distributional.png", + "alt": "Bar chart showing SALT cap repeal benefits by income decile. Top 10% gains $8,200 on average.", + "width": 1200, + "height": 600, + "source_line": 105, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L105" + } + } +} +``` + +### Required Fields + +| Section | Field | Type | Purpose | +|---------|-------|------|---------| +| metadata | `repo` | string | GitHub org/repo for source links | +| metadata | `commit` | string | Exact commit for reproducibility | +| metadata | `policyengine_version` | string | Package version used | +| metadata | `dataset` | string | Dataset name (e.g., `enhanced_cps_2024`) | +| metadata | `year` | number | Analysis year | +| values.* | `value` | number | Raw numeric value | +| values.* | `display` | string | Formatted display text (e.g., "$15.2 billion") | +| values.* | `source_line` | number | Line number in analysis.py | +| values.* | `source_url` | string | Full GitHub URL to source line | +| tables.* | `headers` | string[] | Column headers | +| tables.* | `rows` | string[][] | Row data (pre-formatted) | +| charts.* | `url` | string | GitHub Pages URL to PNG | +| charts.* | `alt` | string | Descriptive alt text with data points | + +--- + +### Template Syntax + +Blog posts use `{{}}` references that are resolved at build time: + +| Pattern | Resolves To | Example | +|---------|-------------|---------| +| `{{value_name}}` | Linked display text | `{{budget_impact}}` → `[$15.2 billion](source_url)` | +| `{{table:name}}` | Markdown table with caption | `{{table:distributional}}` → full table | +| `{{chart:name}}` | `![alt](url)` image | `{{chart:distributional}}` → image with alt text | + +**✅ Correct template usage:** +```markdown +The reform would cost {{budget_impact}} per year. + +{{table:distributional}} + +{{chart:distributional}} + +The poverty rate changes by {{poverty_change}}. +``` + +**❌ Wrong — hard-coded numbers:** +```markdown +The reform would cost $15.2 billion per year. + +| Decile | Avg Change | +| Bottom 10% | +$340 | + +![chart](https://example.com/chart.png) + +The poverty rate falls by 3.2%. +``` + +Every number in the blog post must come from results.json. If you type a raw number into the markdown, it will not have a source link and cannot be traced or auto-updated. + +--- + +### Using policyengine.py + +The agent writes and runs Python using `policyengine.py` for all simulations. This runs locally — no API calls, full variable access. + +**Install:** +```bash +pip install policyengine +``` + +**Define a reform:** +```python +from policyengine.core import Simulation, Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.us import PolicyEngineUSDataset, us_latest +import datetime + +# Create reform: e.g., remove SALT cap +param = Parameter( + name="gov.irs.deductions.itemized.salt_and_real_estate.cap", + tax_benefit_model_version=us_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=0, # Remove cap +) +policy = Policy(name="SALT Cap Repeal", parameter_values=[pv]) +``` + +**Run baseline + reform microsimulations:** +```python +# Load dataset +dataset = PolicyEngineUSDataset( + name="enhanced_cps_2024", + filepath="path/to/enhanced_cps_2024.h5", + year=2026, +) + +# Baseline +baseline_sim = Simulation(dataset=dataset, tax_benefit_model_version=us_latest) +baseline_sim.run() + +# Reform +reform_sim = Simulation(dataset=dataset, tax_benefit_model_version=us_latest, policy=policy) +reform_sim.run() +``` + +**Access any variable directly:** +```python +# Household-level results +baseline_net = baseline_sim.output_dataset.data.household["household_net_income"] +reform_net = reform_sim.output_dataset.data.household["household_net_income"] +change = reform_net - baseline_net + +# Person-level +baseline_tax = baseline_sim.output_dataset.data.tax_unit["income_tax"] +reform_tax = reform_sim.output_dataset.data.tax_unit["income_tax"] + +# Access weights for proper aggregation +weights = baseline_sim.output_dataset.data.household["household_weight"] +``` + +**Built-in distributional analysis:** +```python +from policyengine.outputs.decile_impact import DecileImpact +from policyengine.outputs.poverty import Poverty +from policyengine.outputs.inequality import Inequality +from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType + +# Decile impact +decile = DecileImpact( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", +) +decile.run() + +# Poverty +poverty = Poverty( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, +) +poverty.run() + +# Winners/losers count +winners = ChangeAggregate( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", + aggregate_type=ChangeAggregateType.COUNT, + change_geq=1, +) +winners.run() +``` + +**Budget impact:** +```python +from policyengine.outputs.aggregate import Aggregate, AggregateType + +baseline_revenue = Aggregate( + simulation=baseline_sim, + variable="household_tax", + aggregate_type=AggregateType.SUM, +) +baseline_revenue.run() + +reform_revenue = Aggregate( + simulation=reform_sim, + variable="household_tax", + aggregate_type=AggregateType.SUM, +) +reform_revenue.run() + +budget_impact = reform_revenue.result - baseline_revenue.result +``` + +--- + +### Chart Generation + +Generate chart PNGs using Plotly with PolicyEngine brand styling: + +```python +import plotly.graph_objects as go +import plotly.io as pio + +TEAL = "#39C6C0" +BLUE = "#2C6496" + +fig = go.Figure() +fig.add_trace(go.Bar( + x=[f"Decile {i}" for i in range(1, 11)], + y=decile_values, + marker_color=[TEAL if v >= 0 else "#DC2626" for v in decile_values], +)) +fig.update_layout( + template="plotly_white", + font=dict(family="Inter, sans-serif"), + xaxis_title="Income Decile", + yaxis_title="Average Annual Change ($)", +) + +os.makedirs("charts", exist_ok=True) +pio.write_image(fig, "charts/distributional.png", width=1200, height=600, scale=2) +``` + +Charts are deployed to GitHub Pages via a GitHub Actions workflow in the analysis repo. + +--- + +### Alt Text for Charts + +Every chart needs descriptive alt text that includes key data points. Critical for SEO and accessibility. + +**✅ Correct (descriptive, includes data):** +``` +Bar chart showing SALT cap repeal benefits by income decile. +Top decile gains $8,200 average. Bottom decile gains $340 average. +Middle deciles gain $500-$1,200. 89% of top-decile households affected. +``` + +**❌ Wrong (vague, no data):** +``` +Chart showing distributional impact of the reform. +``` +``` +distributional.png +``` +``` +Impact by income group. +``` + +Alt text should: +- Start with the chart type ("Bar chart showing...", "Line chart of...") +- Include 2-3 key data points with actual numbers +- Mention the most significant finding +- Be 1-3 sentences + +--- + +### Blog Post Writing Rules + +Blog posts generated through this pipeline must follow the policyengine-writing-skill. Key rules: + +#### Neutral tone + +**✅ Correct (neutral — describes what policies do):** +``` +The reform reduces poverty by 3.2% and raises inequality by 0.16% +The top income decile receives 42% of total benefits +``` + +**❌ Wrong (value judgments):** +``` +The reform successfully reduces poverty but unfortunately raises inequality +The wealthiest households receive a disproportionate share of benefits +``` + +#### Active voice with specific numbers + +**✅ Correct:** +``` +The bill lowers the top rate from 5.9% to 5.4% +Repealing the SALT cap costs $15.2 billion in 2026 +``` + +**❌ Wrong:** +``` +The top rate is lowered by the bill +Repealing the SALT cap significantly increases the deficit +``` + +#### Sentence case headings + +**✅ Correct:** +``` +## Budgetary impact +## Distributional analysis +## Poverty and inequality +``` + +**❌ Wrong:** +``` +## Budgetary Impact +## Distributional Analysis +## Poverty and Inequality +``` + +#### Show calculations explicitly + +**✅ Correct:** +``` +The reform costs $15.2 billion per year: $18.4 billion in reduced income tax +revenue, partially offset by $3.2 billion in higher payroll tax collections. +``` + +**❌ Wrong:** +``` +The reform has a significant budgetary impact. +``` + +--- + +### Source Tracking + +Every value in results.json must include `source_line` and `source_url` pointing to the exact line in analysis.py that computed it: + +```python +import inspect + +line = inspect.currentframe().f_lineno +budget_impact = reform_revenue.result - baseline_revenue.result + +results["values"]["budget_impact"] = { + "value": budget_impact, + "display": format_currency(budget_impact), + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +} +``` + +**✅ Correct — every value traceable:** +```json +{ + "budget_impact": { + "value": -15200000000, + "display": "$15.2 billion", + "source_line": 47, + "source_url": "https://github.com/PolicyEngine/salt-cap/blob/main/analysis.py#L47" + } +} +``` + +**❌ Wrong — value without source:** +```json +{ + "budget_impact": { + "value": -15200000000, + "display": "$15.2 billion" + } +} +``` + +--- + +## For Contributors + +### Analysis Repo Structure + +``` +analysis-repo/ +├── analysis.py # Main script — policyengine.py simulations, charts, results.json +├── results.json # Generated output — the contract +├── charts/ # Generated PNGs — deployed to GitHub Pages +│ ├── distributional.png +│ └── household_impact.png +├── requirements.txt # policyengine, plotly, kaleido +├── README.md # How to reproduce +└── .github/workflows/ + └── pages.yml # Auto-deploy charts to GitHub Pages on push +``` + +### Blog Post in policyengine-app-v2 + +``` +app/src/data/posts/ +├── posts.json # Add entry with analysis_repo field +└── articles/ + └── salt-cap-analysis.md # Blog post with {{}} template refs +``` + +**posts.json entry:** +```json +{ + "title": "SALT Cap Repeal Would Cost $15 Billion", + "description": "Analysis of repealing the SALT deduction cap...", + "date": "2026-02-23", + "tags": ["us", "policy", "featured"], + "authors": ["max-ghenis"], + "filename": "salt-cap-analysis.md", + "image": "salt-cap-analysis.png", + "analysis_repo": "PolicyEngine/salt-cap-analysis" +} +``` + +The `analysis_repo` field triggers the resolve-posts build step to fetch results.json and resolve all `{{}}` templates before Vite builds the site. + +### resolve-posts Build Step + +Runs automatically before Vite build: + +1. Reads posts.json → finds posts with `analysis_repo` field +2. Fetches `results.json` from `raw.githubusercontent.com/{repo}/main/results.json` +3. Reads the markdown template file +4. Resolves `{{value}}` → `[display](source_url)` (linked text) +5. Resolves `{{table:name}}` → markdown table with caption and source link +6. Resolves `{{chart:name}}` → `![alt](github_pages_url)` image +7. Writes resolved markdown back to the articles directory +8. Vite builds the site with all values populated + +### SEO Output + +The resolved blog post produces: +- **Text with source links** — every number is a clickable link to the code +- **Charts as ``** — from GitHub Pages with descriptive alt text (fully indexable) +- **Data tables in HTML** — eligible for Google featured snippets +- **JSON-LD Article schema** — served by middleware to crawlers +- **OG tags** — for social media sharing previews + +--- + +## Pipeline Checklist + +Before publishing, verify: + +- [ ] Every `{{}}` ref in the markdown exists in results.json +- [ ] Every value in results.json has `source_line` and `source_url` +- [ ] Charts load from GitHub Pages URLs +- [ ] Alt text is descriptive with 2-3 key data points +- [ ] No hard-coded numbers in the markdown (search for raw digits) +- [ ] Neutral language — no value judgments (see policyengine-writing-skill) +- [ ] Active voice throughout +- [ ] Sentence case headings +- [ ] Methodology section specifies model version, dataset, and assumptions +- [ ] Source links point to real code lines (not stale line numbers) + +--- + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| `{{name}}` appears literally in published post | Key missing from results.json | Add the key to results.json or fix the spelling | +| Source link points to wrong line | Code changed after results.json was generated | Re-run analysis.py to regenerate results.json | +| Chart 404 on GitHub Pages | Pages workflow hasn't run | Push to main to trigger the pages.yml workflow | +| Numbers don't match between text and tables | Template refs point to different values | Each number should reference one canonical value in results.json | +| Alt text says "chart" with no data | Generic placeholder | Rewrite to include chart type and 2-3 key data points | + +--- + +## Resources + +- policyengine.py repo: See policyengine-python-client-skill +- Writing skill: See policyengine-writing-skill for tone and style +- Content generation skill: See content-generation-skill for social images +- Analysis skill: See policyengine-analysis-skill for simulation patterns diff --git a/skills/content/uk-household-analysis-skill/SKILL.md b/skills/content/uk-household-analysis-skill/SKILL.md new file mode 100644 index 0000000..5e1c7fc --- /dev/null +++ b/skills/content/uk-household-analysis-skill/SKILL.md @@ -0,0 +1,584 @@ +--- +name: uk-household-analysis +description: Household-level impact analysis patterns for UK policy reforms - define households, calculate tax/benefit changes, generate results.json +--- + +# UK Household Analysis + +Patterns for analyzing how UK policy reforms affect specific household types. Use this skill when a blog post needs household-level case studies (e.g., "a single parent earning £30,000 sees a £520 increase in Universal Credit"). + +## When to Use This Skill + +- Blog posts showing how a reform affects representative households +- Calculators that let users enter their own household details +- Case studies comparing reform impacts across family types, income levels, or regions +- Budget constraint / marginal tax rate analysis across an income range + +For population-level microsimulation (deciles, poverty rates, aggregate budget impact), see blog-pipeline-skill instead. + +--- + +## Household Structure + +A UK household in PolicyEngine has 3 entity groups. Simpler than US — no tax units or SPM units. + +| Entity | Purpose | Key variables | +|--------|---------|---------------| +| `household` | Physical dwelling | `region`, `rent`, `council_tax`, `tenure_type`, `hbai_household_net_income` | +| `benunit` | Benefit unit (means-testing unit) | `universal_credit`, `child_benefit`, `would_claim_uc` | +| `person` | Individual | `age`, `employment_income`, `income_tax`, `national_insurance` | + +### Benefit Unit vs Household + +A household can contain multiple benefit units. A benefit unit is typically: +- A single adult, or +- A couple (married or cohabiting), plus +- Any dependent children + +This matters for means-tested benefits like Universal Credit, which are assessed per benefit unit, not per household. + +### The `would_claim_*` Flags + +UK benefits are not automatic. You must set `would_claim_*` flags to `True` for benefits to be calculated: + +| Flag | Benefit | Default | +|------|---------|---------| +| `would_claim_uc` | Universal Credit | False | +| `would_claim_child_benefit` | Child Benefit | False | +| `would_claim_WTC` | Working Tax Credit (legacy) | False | +| `would_claim_CTC` | Child Tax Credit (legacy) | False | +| `would_claim_HB` | Housing Benefit (legacy) | False | +| `would_claim_IS` | Income Support (legacy) | False | +| `would_claim_JSA` | Jobseeker's Allowance | False | +| `would_claim_PC` | Pension Credit | False | + +**✅ Correct — benefits will be calculated:** +```python +benunit={"would_claim_uc": True, "would_claim_child_benefit": True} +``` + +**❌ Wrong — benefits will be zero even if eligible:** +```python +benunit={} # No would_claim flags set +``` + +--- + +## Approach 1: Single Household (calculate_household_impact) + +```python +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + calculate_household_impact, +) + +# Single adult earning £35,000 +household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 35_000}], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 9_600, # £800/month + "council_tax": 1_400, + }, + benunit={ + "would_claim_uc": True, + }, + year=2026, +) +result = calculate_household_impact(household) + +net_income = result.household["hbai_household_net_income"] +income_tax = result.person[0]["income_tax"] +ni = result.person[0]["national_insurance"] +uc = result.benunit[0]["universal_credit"] +``` + +### Common Household Types + +**Single adult, no children, renting:** +```python +UKHouseholdInput( + people=[{"age": 30, "employment_income": 35_000}], + household={ + "region": "LONDON", + "tenure_type": "RENT_PRIVATELY", + "rent": 15_600, # £1,300/month + "council_tax": 1_800, + }, + benunit={"would_claim_uc": True}, + year=2026, +) +``` + +**Single parent, 2 children, renting:** +```python +UKHouseholdInput( + people=[ + {"age": 35, "employment_income": 25_000}, + {"age": 8}, + {"age": 5}, + ], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 7_200, # £600/month + "council_tax": 1_200, + }, + benunit={ + "would_claim_uc": True, + "would_claim_child_benefit": True, + }, + year=2026, +) +``` + +**Couple, 2 children, homeowner:** +```python +UKHouseholdInput( + people=[ + {"age": 40, "employment_income": 50_000}, + {"age": 38, "employment_income": 25_000}, + {"age": 10}, + {"age": 7}, + ], + household={ + "region": "SOUTH_EAST", + "tenure_type": "OWNER_OCCUPIED", + "rent": 0, + "council_tax": 2_400, + }, + benunit={ + "would_claim_child_benefit": True, + }, + year=2026, +) +``` + +**Pensioner, renting:** +```python +UKHouseholdInput( + people=[{"age": 70, "state_pension": 10_600}], + household={ + "region": "WEST_MIDLANDS", + "tenure_type": "RENT_PRIVATELY", + "rent": 6_000, + "council_tax": 1_200, + }, + benunit={ + "would_claim_PC": True, + }, + year=2026, +) +``` + +**Disabled adult claiming UC with LCWRA:** +```python +UKHouseholdInput( + people=[ + { + "age": 45, + "employment_income": 10_000, + "is_disabled_for_benefits": True, + "uc_limited_capability_for_WRA": True, + } + ], + household={ + "region": "NORTH_EAST", + "tenure_type": "RENT_PRIVATELY", + "rent": 6_000, + "council_tax": 1_000, + }, + benunit={"would_claim_uc": True}, + year=2026, +) +``` + +--- + +## Approach 2: Situation Dict (Simulation) + +The pattern used by existing UK analysis repos. + +```python +from policyengine_uk import Simulation + +situation = { + "people": { + "adult": { + "age": {"2026": 35}, + "employment_income": {"2026": 30_000}, + }, + "child1": {"age": {"2026": 8}}, + "child2": {"age": {"2026": 5}}, + }, + "benunits": { + "benunit": { + "members": ["adult", "child1", "child2"], + "would_claim_uc": {"2026": True}, + "would_claim_child_benefit": {"2026": True}, + } + }, + "households": { + "household": { + "members": ["adult", "child1", "child2"], + "region": {"2026": "NORTH_WEST"}, + "tenure_type": {"2026": "RENT_PRIVATELY"}, + "rent": {"2026": 7_200}, + "council_tax": {"2026": 1_200}, + } + }, +} + +sim = Simulation(situation=situation) +net_income = sim.calculate("hbai_household_net_income", "2026") +income_tax = sim.calculate("income_tax", "2026") +uc = sim.calculate("universal_credit", "2026") +child_benefit = sim.calculate("child_benefit", "2026") +``` + +### Income Sweep with Axes + +```python +situation = { + "people": { + "adult": {"age": {"2026": 35}}, + "child1": {"age": {"2026": 8}}, + }, + "benunits": { + "benunit": { + "members": ["adult", "child1"], + "would_claim_uc": {"2026": True}, + "would_claim_child_benefit": {"2026": True}, + } + }, + "households": { + "household": { + "members": ["adult", "child1"], + "region": {"2026": "LONDON"}, + "tenure_type": {"2026": "RENT_PRIVATELY"}, + "rent": {"2026": 12_000}, + "council_tax": {"2026": 1_600}, + } + }, + "axes": [[{ + "name": "employment_income", + "count": 501, + "min": 0, + "max": 100_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("hbai_household_net_income", "2026") +``` + +--- + +## Approach 3: Reform Comparison + +### With calculate_household_impact + +```python +from policyengine.core import Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + calculate_household_impact, + uk_latest, +) +import datetime + +# Reform: increase UC standard allowance +param = Parameter( + name="gov.dwp.universal_credit.standard_allowance.amount.single.over_25", + tax_benefit_model_version=uk_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=500, # Monthly amount +) +policy = Policy(name="UC Increase", parameter_values=[pv]) + +household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 15_000}], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 7_200, + "council_tax": 1_000, + }, + benunit={"would_claim_uc": True}, + year=2026, +) + +baseline = calculate_household_impact(household) +reform = calculate_household_impact(household, policy=policy) + +change = ( + reform.household["hbai_household_net_income"] + - baseline.household["hbai_household_net_income"] +) +``` + +### With Simulation + Situation Dict + +```python +reform_dict = { + "gov.dwp.universal_credit.standard_allowance.amount.single.over_25": { + "2026-01-01.2026-12-31": 500 + } +} + +sim_baseline = Simulation(situation=situation) +sim_reform = Simulation(situation=situation, reform=reform_dict) + +baseline_net = sim_baseline.calculate("hbai_household_net_income", "2026") +reform_net = sim_reform.calculate("hbai_household_net_income", "2026") +change = reform_net - baseline_net +``` + +--- + +## Impact Types + +### Net income change + +```python +baseline_net = baseline.household["hbai_household_net_income"] +reform_net = reform.household["hbai_household_net_income"] +change = reform_net - baseline_net +``` + +### Tax and benefit component breakdown + +```python +components = { + "Income tax": reform.person[0]["income_tax"] - baseline.person[0]["income_tax"], + "National Insurance": reform.person[0]["national_insurance"] - baseline.person[0]["national_insurance"], + "Universal Credit": reform.benunit[0]["universal_credit"] - baseline.benunit[0]["universal_credit"], + "Child Benefit": reform.benunit[0]["child_benefit"] - baseline.benunit[0]["child_benefit"], + "Council Tax Benefit": reform.benunit[0].get("council_tax_benefit", 0) - baseline.benunit[0].get("council_tax_benefit", 0), +} +``` + +### Effective marginal tax rate + +```python +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("hbai_household_net_income", "2026") + +marginal_rates = 1 - np.diff(net_incomes) / np.diff(incomes) +``` + +### UC taper and benefit withdrawal + +Universal Credit tapers at 55p per £1 of net earnings above the work allowance. This creates effective marginal rates above the statutory tax rate. + +```python +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", "2026") +uc = sim.calculate("universal_credit", "2026") + +# Show where UC phases out +import plotly.graph_objects as go +fig = go.Figure() +fig.add_trace(go.Scatter(x=incomes, y=uc, name="Universal Credit")) +fig.update_layout( + xaxis_title="Employment income (£)", + yaxis_title="Universal Credit (£/year)", +) +``` + +--- + +## UK Regions + +The `region` field affects housing costs, council tax, and some benefit rates. + +| Region code | Region | +|-------------|--------| +| `NORTH_EAST` | North East | +| `NORTH_WEST` | North West | +| `YORKSHIRE` | Yorkshire and the Humber | +| `EAST_MIDLANDS` | East Midlands | +| `WEST_MIDLANDS` | West Midlands | +| `EAST_OF_ENGLAND` | East of England | +| `LONDON` | London | +| `SOUTH_EAST` | South East | +| `SOUTH_WEST` | South West | +| `WALES` | Wales | +| `SCOTLAND` | Scotland | +| `NORTHERN_IRELAND` | Northern Ireland | + +Scotland has different income tax rates. Northern Ireland has some separate benefit provisions. + +--- + +## Generating results.json for Household Analysis + +```python +import json, inspect + +REPO = "PolicyEngine/uc-increase-analysis" + +households = { + "single_renter": {"income": 15_000, "children": 0, "rent": 7_200}, + "single_parent_2": {"income": 25_000, "children": 2, "rent": 7_200}, + "couple_2_owner": {"income": 75_000, "children": 2, "rent": 0}, +} + +results = { + "metadata": { + "title": "UC Standard Allowance Increase", + "repo": REPO, + "country_id": "uk", + "year": 2026, + }, + "values": {}, + "tables": {}, + "charts": {}, +} + +rows = [] +for name, params in households.items(): + # ... calculate baseline and reform ... + line = inspect.currentframe().f_lineno + change = reform_net - baseline_net + + results["values"][f"{name}_change"] = { + "value": float(change), + "display": f"£{abs(change):,.0f}", + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", + } + rows.append([name, f"£{params['income']:,}", f"£{change:,.0f}"]) + +line = inspect.currentframe().f_lineno +results["tables"]["household_impacts"] = { + "title": "Household impact by family type", + "headers": ["Household", "Income", "Net income change"], + "rows": rows, + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +} + +with open("results.json", "w") as f: + json.dump(results, f, indent=2) +``` + +--- + +## Writing UK Household Case Studies + +Follow policyengine-writing-skill for all blog post text. + +**✅ Correct (specific, neutral, active):** +``` +A single parent of two children earning £25,000 and renting privately +sees a £520 annual increase in Universal Credit, raising household +net income from £28,400 to £28,920. +``` + +**❌ Wrong (vague, value judgment):** +``` +Working families on Universal Credit see welcome increases to their +income under the reform. +``` + +**✅ Correct (shows calculation):** +``` +The UC standard allowance rises from £393 to £500 per month, a £107 +monthly increase (£1,284 per year). After the 55% taper on her £15,000 +net earnings above the work allowance, she retains £520 of the increase. +``` + +**❌ Wrong (hides calculation):** +``` +Claimants receive a boost to their monthly Universal Credit payments. +``` + +**✅ Correct (acknowledges UK specifics):** +``` +A Scottish taxpayer earning £50,000 pays income tax at Scotland's +intermediate rate of 21%, compared to 20% in England. The reform +increases their net income by £340, compared to £380 for an equivalent +English taxpayer. +``` + +**❌ Wrong (ignores devolution):** +``` +All UK taxpayers earning £50,000 see the same impact from the reform. +``` + +--- + +## Common UK Variables + +| Variable | Entity | Description | +|----------|--------|-------------| +| `hbai_household_net_income` | household | Household net income (HBAI definition) | +| `household_net_income` | household | Household net income | +| `income_tax` | person | Income tax liability | +| `national_insurance` | person | National Insurance contributions | +| `universal_credit` | benunit | Universal Credit entitlement | +| `child_benefit` | benunit | Child Benefit | +| `working_tax_credit` | benunit | Working Tax Credit (legacy) | +| `child_tax_credit` | benunit | Child Tax Credit (legacy) | +| `housing_benefit` | benunit | Housing Benefit (legacy) | +| `pension_credit` | benunit | Pension Credit | +| `council_tax_benefit` | benunit | Council Tax Reduction | +| `employment_income` | person | Employment income | +| `self_employment_income` | person | Self-employment income | +| `state_pension` | person | State Pension | + +--- + +## UK vs US Differences + +| Aspect | UK | US | +|--------|----|----| +| Entity groups | 3 (person, benunit, household) | 6 (person, marital_unit, family, tax_unit, spm_unit, household) | +| Benefits unit | Benefit unit (benunit) | SPM unit for benefits, tax unit for credits | +| Benefit claiming | Must set `would_claim_*` = True | Generally automatic | +| Net income variable | `hbai_household_net_income` | `household_net_income` | +| Currency | £ (GBP) | $ (USD) | +| Housing costs | `rent`, `council_tax`, `tenure_type` required | Not required for most analyses | +| Regional variation | `region` — Scotland has different tax rates | `state_code_str` — 50+ state tax systems | +| Key benefit | Universal Credit | EITC, CTC, SNAP | +| Key taper | UC taper (55% of net earnings) | EITC phase-out, benefit cliffs | + +--- + +## Checklist + +Before publishing UK household analysis: + +- [ ] All 3 entity groups defined (person, benunit, household) +- [ ] `would_claim_*` flags set for all relevant benefits +- [ ] `region` specified (Scotland has different tax rates) +- [ ] `tenure_type`, `rent`, and `council_tax` set for housing-related benefits +- [ ] Representative households cover renters, homeowners, pensioners, and families with children +- [ ] Baseline and reform both calculated for the same household +- [ ] Component breakdown shows which taxes/benefits drive the net change +- [ ] All values in results.json with `source_line` and `source_url` +- [ ] Currency formatted as £ (not $ or GBP) +- [ ] Blog text uses `{{}}` template references, not hard-coded numbers +- [ ] Neutral language — no value judgments +- [ ] Scottish/devolved differences noted where relevant +- [ ] Calculations shown explicitly (e.g., "£107/month × 12 = £1,284/year, minus 55% taper...") + +--- + +## Resources + +- blog-pipeline-skill — results.json schema, template syntax, chart generation +- policyengine-writing-skill — neutral tone, active voice, quantitative precision +- policyengine-analysis-skill — population-level patterns, Plotly charts +- policyengine-uk-skill — UK tax/benefit system domain knowledge +- us-household-analysis-skill — US equivalent patterns for comparison diff --git a/skills/content/us-household-analysis-skill/SKILL.md b/skills/content/us-household-analysis-skill/SKILL.md new file mode 100644 index 0000000..bdc3669 --- /dev/null +++ b/skills/content/us-household-analysis-skill/SKILL.md @@ -0,0 +1,458 @@ +--- +name: us-household-analysis +description: Household-level impact analysis patterns for US policy reforms - define households, calculate tax/benefit changes, generate results.json +--- + +# US Household Analysis + +Patterns for analyzing how US policy reforms affect specific household types. Use this skill when a blog post needs household-level case studies (e.g., "a single parent earning $50,000 sees a $252 increase"). + +## When to Use This Skill + +- Blog posts showing how a reform affects representative households +- Calculators that let users enter their own household details +- Case studies comparing reform impacts across filing statuses, income levels, or family sizes +- Budget constraint / marginal tax rate analysis across an income range + +For population-level microsimulation (deciles, poverty rates, aggregate budget impact), see blog-pipeline-skill instead. + +--- + +## Household Structure + +A US household in PolicyEngine has 6 entity groups. Every person must belong to one of each: + +| Entity | Purpose | Key variables | +|--------|---------|---------------| +| `household` | Physical dwelling | `state_code_str`, `household_net_income` | +| `tax_unit` | IRS filing unit | `filing_status`, `income_tax`, `eitc`, `ctc` | +| `family` | Census family | `family_id` | +| `spm_unit` | Supplemental Poverty Measure unit | `snap`, `ssi`, `tanf` | +| `marital_unit` | Married/unmarried pair | `marital_unit_id` | +| `person` | Individual | `age`, `employment_income`, `income_tax` | + +--- + +## Approach 1: Single Household (calculate_household_impact) + +The simplest way to calculate one household's taxes and benefits. + +```python +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + calculate_household_impact, +) + +# Single filer, no children, $50k income +household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "CA"}, + year=2026, +) +result = calculate_household_impact(household) + +# Access results by entity +net_income = result.household["household_net_income"] +income_tax = result.tax_unit[0]["income_tax"] +eitc = result.tax_unit[0]["eitc"] +snap = result.spm_unit[0]["snap"] +``` + +### Common Household Types + +**Single filer, no children:** +```python +USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "CA"}, + year=2026, +) +``` + +**Single parent, 2 children:** +```python +USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}, + {"age": 8, "is_tax_unit_dependent": True}, + {"age": 5, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code_str": "TX"}, + year=2026, +) +``` + +**Married couple, 2 children:** +```python +USHouseholdInput( + people=[ + {"age": 40, "employment_income": 80_000, "is_tax_unit_head": True}, + {"age": 38, "employment_income": 40_000, "is_tax_unit_spouse": True}, + {"age": 10, "is_tax_unit_dependent": True}, + {"age": 7, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "JOINT"}, + household={"state_code_str": "NY"}, + year=2026, +) +``` + +**Senior, retired:** +```python +USHouseholdInput( + people=[ + {"age": 70, "social_security": 24_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "FL"}, + year=2026, +) +``` + +--- + +## Approach 2: Situation Dict (Simulation) + +The pattern used by existing analysis repos. More verbose but supports axes for income sweeps. + +```python +from policyengine_us import Simulation + +situation = { + "people": { + "adult": { + "age": {"2026": 35}, + "employment_income": {"2026": 50_000}, + }, + "child1": {"age": {"2026": 8}}, + "child2": {"age": {"2026": 5}}, + }, + "families": { + "family": {"members": ["adult", "child1", "child2"]} + }, + "tax_units": { + "tax_unit": {"members": ["adult", "child1", "child2"]} + }, + "spm_units": { + "spm_unit": {"members": ["adult", "child1", "child2"]} + }, + "marital_units": { + "marital_unit": {"members": ["adult"]} + }, + "households": { + "household": { + "members": ["adult", "child1", "child2"], + "state_name": {"2026": "TX"}, + } + }, +} + +sim = Simulation(situation=situation) +net_income = sim.calculate("household_net_income", "2026") +income_tax = sim.calculate("income_tax", "2026") +ctc = sim.calculate("ctc", "2026") +``` + +### Income Sweep with Axes + +Calculate impacts across an income range in a single simulation (much faster than looping): + +```python +situation = { + "people": { + "adult": {"age": {"2026": 35}}, + "child1": {"age": {"2026": 8}}, + }, + "families": {"family": {"members": ["adult", "child1"]}}, + "tax_units": {"tax_unit": {"members": ["adult", "child1"]}}, + "spm_units": {"spm_unit": {"members": ["adult", "child1"]}}, + "marital_units": {"marital_unit": {"members": ["adult"]}}, + "households": { + "household": { + "members": ["adult", "child1"], + "state_name": {"2026": "CA"}, + } + }, + "axes": [[{ + "name": "employment_income", + "count": 501, + "min": 0, + "max": 250_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("household_net_income", "2026") +``` + +--- + +## Approach 3: Reform Comparison + +Compare baseline vs reform for the same household. + +### With calculate_household_impact + +```python +from policyengine.core import Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + calculate_household_impact, + us_latest, +) +import datetime + +# Define reform +param = Parameter( + name="gov.irs.credits.ctc.amount.base_amount", + tax_benefit_model_version=us_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=5000, +) +policy = Policy(name="CTC Expansion", parameter_values=[pv]) + +household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}, + {"age": 8, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code_str": "CA"}, + year=2026, +) + +baseline = calculate_household_impact(household) +reform = calculate_household_impact(household, policy=policy) + +change = reform.household["household_net_income"] - baseline.household["household_net_income"] +``` + +### With Simulation + Situation Dict + +```python +reform_dict = { + "gov.irs.credits.ctc.amount.base_amount": { + "2026-01-01.2026-12-31": 5000 + } +} + +sim_baseline = Simulation(situation=situation) +sim_reform = Simulation(situation=situation, reform=reform_dict) + +baseline_net = sim_baseline.calculate("household_net_income", "2026") +reform_net = sim_reform.calculate("household_net_income", "2026") +change = reform_net - baseline_net +``` + +--- + +## Impact Types + +### Net income change + +The most common household impact. Shows the bottom-line dollar difference. + +```python +baseline_net = baseline.household["household_net_income"] +reform_net = reform.household["household_net_income"] +change = reform_net - baseline_net +``` + +### Tax component breakdown + +Show which specific taxes/credits change and by how much. + +```python +components = { + "Income tax": reform.tax_unit[0]["income_tax"] - baseline.tax_unit[0]["income_tax"], + "Payroll tax": reform.tax_unit[0]["employee_payroll_tax"] - baseline.tax_unit[0]["employee_payroll_tax"], + "EITC": reform.tax_unit[0]["eitc"] - baseline.tax_unit[0]["eitc"], + "CTC": reform.tax_unit[0]["ctc"] - baseline.tax_unit[0]["ctc"], + "SNAP": reform.spm_unit[0]["snap"] - baseline.spm_unit[0]["snap"], +} +``` + +### Effective marginal tax rate + +How much of an additional dollar of earnings the household keeps. + +```python +# Using axes for smooth marginal rate curve +situation_axes = { + # ... household setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 200_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation_axes) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("household_net_income", "2026") + +# Marginal rate = 1 - (change in net income / change in gross income) +marginal_rates = 1 - np.diff(net_incomes) / np.diff(incomes) +``` + +### Benefit eligibility cliff + +Show where benefits phase out sharply. + +```python +sim = Simulation(situation=situation_axes) +incomes = sim.calculate("employment_income", "2026") +snap = sim.calculate("snap", "2026") +medicaid = sim.calculate("medicaid", "2026") +eitc = sim.calculate("eitc", "2026") + +# Plot each benefit against income to show cliffs +``` + +--- + +## Generating results.json for Household Analysis + +Household analyses produce results.json with the same schema as microsimulation analyses, but values come from specific households rather than population aggregates. + +```python +import json, inspect + +REPO = "PolicyEngine/ctc-expansion" + +households = { + "single_no_kids": {"filing": "SINGLE", "income": 50_000, "children": 0}, + "single_parent_2": {"filing": "HOH", "income": 50_000, "children": 2}, + "married_2": {"filing": "JOINT", "income": 100_000, "children": 2}, +} + +results = { + "metadata": { + "title": "CTC Expansion Household Impacts", + "repo": REPO, + "year": 2026, + }, + "values": {}, + "tables": {}, + "charts": {}, +} + +rows = [] +for name, params in households.items(): + # ... calculate baseline and reform ... + line = inspect.currentframe().f_lineno + change = reform_net - baseline_net + + results["values"][f"{name}_change"] = { + "value": float(change), + "display": f"${abs(change):,.0f}", + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", + } + rows.append([name, f"${params['income']:,}", f"${change:,.0f}"]) + +line = inspect.currentframe().f_lineno +results["tables"]["household_impacts"] = { + "title": "Household impact by family type", + "headers": ["Household", "Income", "Net income change"], + "rows": rows, + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +} + +with open("results.json", "w") as f: + json.dump(results, f, indent=2) +``` + +--- + +## Writing Household Case Studies + +Follow policyengine-writing-skill for all blog post text. + +**✅ Correct (specific, neutral, active):** +``` +A single parent of two children earning $50,000 sees a $1,000 increase +in net income: $800 from the expanded CTC and $200 from lower income +tax withholding. +``` + +**❌ Wrong (vague, value judgment):** +``` +Working families see significant benefits from the reform, with +substantial increases to their take-home pay. +``` + +**✅ Correct (shows calculation):** +``` +A married couple with two children earning $100,000 receives $10,000 +in CTC under the reform, compared to $4,000 under current law — a +$6,000 increase ($3,000 per child × 2 children). +``` + +**❌ Wrong (hides calculation):** +``` +A married couple sees their CTC more than double under the reform. +``` + +--- + +## Common US Variables + +| Variable | Entity | Description | +|----------|--------|-------------| +| `household_net_income` | household | Total income after taxes and benefits | +| `income_tax` | tax_unit | Federal income tax liability | +| `state_income_tax` | tax_unit | State income tax liability | +| `employee_payroll_tax` | tax_unit | Employee-side payroll taxes | +| `eitc` | tax_unit | Earned Income Tax Credit | +| `ctc` | tax_unit | Child Tax Credit | +| `snap` | spm_unit | SNAP (food stamps) benefits | +| `ssi` | spm_unit | Supplemental Security Income | +| `tanf` | spm_unit | Temporary Assistance for Needy Families | +| `medicaid` | person | Medicaid eligibility value | +| `employment_income` | person | Wages and salary | +| `self_employment_income` | person | Self-employment income | +| `social_security` | person | Social Security benefits | + +--- + +## Checklist + +Before publishing household analysis: + +- [ ] All 6 entity groups defined for each household (household, tax_unit, family, spm_unit, marital_unit, person) +- [ ] Filing status matches household composition (SINGLE, JOINT, HEAD_OF_HOUSEHOLD) +- [ ] State specified via `state_code_str` or `state_name` +- [ ] Representative households cover relevant filing statuses and income ranges +- [ ] Baseline and reform both calculated for the same household +- [ ] Component breakdown shows which taxes/benefits drive the net change +- [ ] All values in results.json with `source_line` and `source_url` +- [ ] Blog text uses `{{}}` template references, not hard-coded numbers +- [ ] Neutral language — no value judgments on who "benefits" or "loses" +- [ ] Calculations shown explicitly (e.g., "$3,000 per child × 2 = $6,000") + +--- + +## Resources + +- blog-pipeline-skill — results.json schema, template syntax, chart generation +- policyengine-writing-skill — neutral tone, active voice, quantitative precision +- policyengine-analysis-skill — population-level patterns, Plotly charts +- policyengine-us-skill — US tax/benefit system domain knowledge From f24874892e825739d4f7c896d1a1eec882ffe619 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Mon, 23 Feb 2026 18:49:21 -0500 Subject: [PATCH 2/5] Add standard chart and table catalogs to content pipeline skills - blog-pipeline-skill: 7 standard chart types (decile impact, net income curve, winners/losers, budget over time, poverty comparison, waterfall, marginal rate) with Plotly code examples, alt text patterns, and custom chart guidelines. 5 standard table types (household impact, decile distribution, parameter comparison, budgetary impact, poverty/inequality) with column specs and custom table rules. - us-household-analysis-skill: required/optional chart and table catalogs for US household posts (benefit cliff charts, state comparisons, filing status comparisons, benefit eligibility tables) - uk-household-analysis-skill: required/optional chart and table catalogs for UK household posts (UC taper charts, regional comparisons, Scotland vs England, UC calculation walkthrough tables, revenue by fiscal year) Based on inventory of ~85 US and ~72 UK existing blog posts plus 10 analysis repos. Co-Authored-By: Claude Opus 4.6 --- skills/content/blog-pipeline-skill/SKILL.md | 259 +++++++++++++++++- .../uk-household-analysis-skill/SKILL.md | 62 +++++ .../us-household-analysis-skill/SKILL.md | 58 ++++ 3 files changed, 371 insertions(+), 8 deletions(-) diff --git a/skills/content/blog-pipeline-skill/SKILL.md b/skills/content/blog-pipeline-skill/SKILL.md index ffabef0..661449e 100644 --- a/skills/content/blog-pipeline-skill/SKILL.md +++ b/skills/content/blog-pipeline-skill/SKILL.md @@ -254,35 +254,278 @@ budget_impact = reform_revenue.result - baseline_revenue.result --- -### Chart Generation +### Standard Chart Types -Generate chart PNGs using Plotly with PolicyEngine brand styling: +Every analysis should produce charts from this catalog. Pick the ones relevant to the reform. You may also create **custom charts** for topic-specific visualizations — just follow the same Plotly styling and alt text rules. + +#### Chart 1: Decile impact bar chart (required for microsimulation posts) + +Shows average net income change by income decile. The most common chart across both US and UK posts. ```python import plotly.graph_objects as go import plotly.io as pio +import os TEAL = "#39C6C0" -BLUE = "#2C6496" +RED = "#DC2626" fig = go.Figure() fig.add_trace(go.Bar( x=[f"Decile {i}" for i in range(1, 11)], y=decile_values, - marker_color=[TEAL if v >= 0 else "#DC2626" for v in decile_values], + marker_color=[TEAL if v >= 0 else RED for v in decile_values], + text=[f"${v:,.0f}" for v in decile_values], + textposition="outside", )) fig.update_layout( template="plotly_white", font=dict(family="Inter, sans-serif"), - xaxis_title="Income Decile", - yaxis_title="Average Annual Change ($)", + xaxis_title="Income decile", + yaxis_title="Average annual change ($)", ) os.makedirs("charts", exist_ok=True) -pio.write_image(fig, "charts/distributional.png", width=1200, height=600, scale=2) +pio.write_image(fig, "charts/decile_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing [reform] impact by income decile. Bottom decile [gains/loses] [amount]. Top decile [gains/loses] [amount]." + +#### Chart 2: Household net income curve (required for household posts) + +Shows how net income changes across an earnings range for a specific household type. Use axes for efficiency. + +```python +fig = go.Figure() +fig.add_trace(go.Scatter( + x=incomes, y=baseline_net, mode="lines", + name="Current law", line=dict(color=BLUE, width=2), +)) +fig.add_trace(go.Scatter( + x=incomes, y=reform_net, mode="lines", + name="Reform", line=dict(color=TEAL, width=2), +)) +fig.update_layout( + template="plotly_white", + font=dict(family="Inter, sans-serif"), + xaxis_title="Employment income ($)", + yaxis_title="Household net income ($)", + xaxis_tickformat="$,.0f", + yaxis_tickformat="$,.0f", + legend=dict(x=0.02, y=0.98), +) +pio.write_image(fig, "charts/net_income_curve.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Line chart comparing net income under current law and [reform] for [household type]. Reform increases net income by [amount] at [income level], with the largest gain at [peak]." + +#### Chart 3: Winners and losers bar chart + +Shows what percentage of households gain, lose, or are unaffected, often by decile. + +```python +fig = go.Figure() +fig.add_trace(go.Bar( + x=decile_labels, y=pct_gaining, name="Gain", marker_color=TEAL, +)) +fig.add_trace(go.Bar( + x=decile_labels, y=pct_losing, name="Lose", marker_color=RED, +)) +fig.update_layout( + template="plotly_white", + barmode="group", + xaxis_title="Income decile", + yaxis_title="Share of households (%)", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/winners_losers.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing winners and losers by income decile. [X]% of bottom-decile households gain vs [Y]% of top-decile households." + +#### Chart 4: Budgetary impact over time (bar) + +Shows annual cost or revenue impact across a budget window (typically 10 years). + +```python +fig = go.Figure() +fig.add_trace(go.Bar( + x=years, y=annual_costs, + marker_color=BLUE, + text=[f"${v/1e9:.1f}B" for v in annual_costs], + textposition="outside", +)) +fig.update_layout( + template="plotly_white", + xaxis_title="Year", + yaxis_title="Budget impact ($)", + yaxis_tickformat="$,.0f", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/budget_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing annual budget impact from [year] to [year]. Total [10]-year cost: [amount]." + +#### Chart 5: Poverty impact comparison (grouped bar) + +Shows poverty rate changes across demographics (overall, children, seniors, etc.). + +```python +categories = ["Overall", "Children", "Working-age", "Seniors"] +fig = go.Figure() +fig.add_trace(go.Bar( + x=categories, y=baseline_poverty, name="Current law", marker_color="#94A3B8", +)) +fig.add_trace(go.Bar( + x=categories, y=reform_poverty, name="Reform", marker_color=TEAL, +)) +fig.update_layout( + template="plotly_white", + barmode="group", + yaxis_title="Poverty rate (%)", + yaxis_tickformat=".1%", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/poverty_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Grouped bar chart comparing poverty rates under current law and [reform]. Overall poverty [falls/rises] from [X]% to [Y]%. Child poverty [falls/rises] from [X]% to [Y]%." + +#### Chart 6: Waterfall (tax component decomposition) + +Shows how individual reform components add up to the total impact. Used in analysis repos (HR1), underused in blog posts. + +```python +fig = go.Figure(go.Waterfall( + x=["Baseline revenue", "Income tax change", "Payroll tax change", + "Credit expansion", "Reform revenue"], + y=[baseline_rev, income_tax_delta, payroll_delta, credit_delta, 0], + measure=["absolute", "relative", "relative", "relative", "total"], + connector={"line": {"color": "#94A3B8"}}, + increasing={"marker": {"color": TEAL}}, + decreasing={"marker": {"color": RED}}, + totals={"marker": {"color": BLUE}}, + text=[f"${v/1e9:.1f}B" for v in [baseline_rev, income_tax_delta, + payroll_delta, credit_delta, reform_rev]], + textposition="outside", +)) +fig.update_layout( + template="plotly_white", + yaxis_title="Federal revenue ($B)", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/waterfall.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Waterfall chart decomposing budget impact. [Component 1] contributes [amount], [component 2] contributes [amount]. Total reform impact: [amount]." + +#### Chart 7: Marginal tax rate curve (line) + +Shows effective marginal tax rate across an income range, revealing benefit cliffs and taper interactions. + +```python +fig = go.Figure() +fig.add_trace(go.Scatter( + x=incomes[:-1], y=baseline_mtr, mode="lines", + name="Current law", line=dict(color=BLUE, width=2), +)) +fig.add_trace(go.Scatter( + x=incomes[:-1], y=reform_mtr, mode="lines", + name="Reform", line=dict(color=TEAL, width=2), +)) +fig.update_layout( + template="plotly_white", + xaxis_title="Employment income ($)", + yaxis_title="Marginal tax rate (%)", + yaxis_tickformat=".0%", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/marginal_rates.png", width=1200, height=600, scale=2) ``` -Charts are deployed to GitHub Pages via a GitHub Actions workflow in the analysis repo. +**Alt text pattern:** "Line chart showing marginal tax rates under current law and [reform] for [household type]. Reform [reduces/increases] peak marginal rate from [X]% to [Y]% at [income level]." + +#### Custom Charts + +For topic-specific visualizations not covered above (e.g., state comparison maps, benefit phase-in schedules, wealth decile breakdowns, animated time series), follow these rules: + +1. Use the same Plotly styling: `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +2. Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"`, `GRAY = "#94A3B8"` +3. Save as PNG at 1200x600, scale=2 +4. Write descriptive alt text with chart type and 2-3 key data points +5. Include the chart in results.json with `url`, `alt`, `width`, `height`, `source_line`, `source_url` + +--- + +### Standard Table Types + +Every analysis should produce tables from this catalog where relevant. You may also create **custom tables** — just follow the same formatting rules. + +#### Table 1: Household impact table (required for household posts) + +Shows net income change for representative household types. + +| Household | Income | Filing status | Net income change | +|-----------|--------|---------------|-------------------| +| Single, no children | $40,000 | Single | +$0 | +| Single parent, 2 children | $50,000 | Head of household | +$1,000 | +| Married, 2 children | $100,000 | Joint | +$2,000 | +| Senior, retired | $24,000 | Single | +$0 | + +**UK equivalent:** Replace "Filing status" with "Tenure type" or "Region". Include `would_claim_*` context. + +#### Table 2: Income decile distribution table (required for microsimulation posts) + +Shows average impact, share affected, and share of total benefit by decile. + +| Decile | Avg. change | % affected | Share of total benefit | +|--------|-------------|------------|----------------------| +| 1 (bottom 10%) | +$340 | 12% | 2% | +| ... | ... | ... | ... | +| 10 (top 10%) | +$8,200 | 89% | 42% | + +#### Table 3: Parameter comparison table + +Shows what the reform changes — current law values vs. reform values. + +| Parameter | Current law | Reform | +|-----------|------------|--------| +| CTC base amount | $2,000 | $5,000 | +| Phase-out threshold (single) | $200,000 | $200,000 | +| Phase-out threshold (joint) | $400,000 | $400,000 | +| Refundability | $1,700 | Fully refundable | + +#### Table 4: Budgetary impact by year table + +Shows annual fiscal cost over a budget window. + +| Year | Static cost ($B) | Dynamic cost ($B) | +|------|-----------------|-------------------| +| 2026 | 15.2 | 18.4 | +| 2027 | 15.8 | 19.1 | +| ... | ... | ... | +| 2026-2035 total | 162.0 | 195.0 | + +#### Table 5: Poverty and inequality summary table + +Shows key distributional metrics before and after reform. + +| Metric | Baseline | Reform | Change | +|--------|----------|--------|--------| +| Overall poverty rate (SPM) | 12.4% | 12.0% | -0.4pp | +| Child poverty rate | 13.2% | 11.8% | -1.4pp | +| Gini index | 0.414 | 0.412 | -0.002 | +| Top 10% income share | 31.2% | 30.8% | -0.4pp | + +#### Custom Tables + +For topic-specific tables not covered above (e.g., state-by-state comparisons, methodology comparisons, tax rate schedules, benefit phase-in tables), follow these rules: + +1. Include in results.json under `"tables"` with `headers`, `rows`, `source_line`, `source_url` +2. Pre-format all values as display strings (e.g., "$15.2 billion", "12.4%") +3. Use `{{table:name}}` in the blog post markdown +4. Keep tables under 15 rows — split into multiple tables if larger --- diff --git a/skills/content/uk-household-analysis-skill/SKILL.md b/skills/content/uk-household-analysis-skill/SKILL.md index 5e1c7fc..2df0d8b 100644 --- a/skills/content/uk-household-analysis-skill/SKILL.md +++ b/skills/content/uk-household-analysis-skill/SKILL.md @@ -420,6 +420,68 @@ Scotland has different income tax rates. Northern Ireland has some separate bene --- +## Charts for UK Household Posts + +Produce these charts for household-level analysis. See blog-pipeline-skill for full Plotly styling details and additional chart types. + +### Required charts + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Net income curve** | Baseline vs reform net income across earnings range | Every household post | +| **UC taper chart** | Universal Credit amount vs earnings, showing work allowance and 55% taper | Posts about UC changes | +| **Component breakdown bar** | Which taxes/benefits drive the net income change (income tax, NI, UC, child benefit) | Posts where multiple programs interact | + +### Optional charts (use when relevant) + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Marginal tax rate curve** | Effective MTR showing income tax + NI + UC taper interaction | Posts about taper rates or benefit withdrawal | +| **Regional comparison bar** | Same household across regions (London vs North West vs Scotland) | Posts about regional variation | +| **Scotland vs England comparison** | Same income, different tax systems | Posts about Scottish tax rate changes | +| **Benefit cliff chart** | Individual benefit amounts (UC, child benefit, housing element) vs income | Posts about benefit interactions | +| **Revenue impact time series** | Annual cost across fiscal years (2026-27 to 2030-31) | Posts with multi-year budget windows | + +### Custom charts + +For topic-specific visuals not listed above, follow these rules: +- Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"` +- Plotly with `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +- Save as PNG at 1200x600, scale=2 +- Format currency as £ throughout (not $ or GBP) +- Write alt text with chart type and 2-3 key data points +- Include in results.json under `"charts"` with `url`, `alt`, `source_line`, `source_url` + +## Tables for UK Household Posts + +### Required tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Household impact table** | Household type, Income, Tenure, Region, Net income change | Every household post | +| **Component breakdown table** | Component (income tax, NI, UC, child benefit...), Baseline, Reform, Change | Posts where multiple programs interact | + +### Optional tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Parameter comparison** | Parameter, Current law, Reform | Posts introducing the reform details | +| **Regional comparison** | Region, Net income change, Key driver | Posts about regional variation | +| **Scotland vs England** | Metric, Scotland, England, Difference | Posts about devolved tax rates | +| **UC calculation walkthrough** | Step (standard allowance, child elements, housing, earnings deduction, taper), Amount | Posts explaining UC mechanics | +| **Revenue impact by year** | Fiscal year, Static cost (£B), Dynamic cost (£B) | Posts with multi-year analysis | +| **Poverty and inequality** | Metric (AHC poverty, BHC poverty, child poverty, Gini), Baseline, Reform, Change | Posts with distributional analysis | + +### Custom tables + +For topic-specific tables, follow these rules: +- Include in results.json with `headers`, `rows`, `source_line`, `source_url` +- Pre-format values as display strings ("£1,200", "12.4%") +- Use `{{table:name}}` in blog post markdown +- Keep under 15 rows + +--- + ## Generating results.json for Household Analysis ```python diff --git a/skills/content/us-household-analysis-skill/SKILL.md b/skills/content/us-household-analysis-skill/SKILL.md index bdc3669..5159193 100644 --- a/skills/content/us-household-analysis-skill/SKILL.md +++ b/skills/content/us-household-analysis-skill/SKILL.md @@ -327,6 +327,64 @@ eitc = sim.calculate("eitc", "2026") --- +## Charts for Household Posts + +Produce these charts for household-level analysis. See blog-pipeline-skill for full Plotly styling details and additional chart types. + +### Required charts + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Net income curve** | Baseline vs reform net income across earnings range | Every household post | +| **Marginal tax rate curve** | Effective MTR across earnings range, baseline vs reform | Posts about tax rate changes or benefit cliffs | +| **Component breakdown bar** | Which taxes/credits drive the net income change | Posts where multiple programs interact | + +### Optional charts (use when relevant) + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Benefit cliff chart** | Individual benefit amounts (EITC, CTC, SNAP) vs income | Posts about benefit interactions or phase-outs | +| **State comparison bar** | Same household, different states | Posts about state-level variation | +| **Filing status comparison** | Same income, different filing statuses | Posts about marriage penalties or filing status effects | +| **Waterfall** | Tax component decomposition for one household | Posts breaking down a complex reform | + +### Custom charts + +For topic-specific visuals not listed above, follow these rules: +- Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"` +- Plotly with `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +- Save as PNG at 1200x600, scale=2 +- Write alt text with chart type and 2-3 key data points +- Include in results.json under `"charts"` with `url`, `alt`, `source_line`, `source_url` + +## Tables for Household Posts + +### Required tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Household impact table** | Household type, Income, Filing status, Net income change | Every household post | +| **Component breakdown table** | Component (income tax, EITC, CTC, SNAP...), Baseline, Reform, Change | Posts where multiple programs interact | + +### Optional tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Parameter comparison** | Parameter, Current law, Reform | Posts introducing the reform details | +| **State comparison** | State, Net income change, Key driver | Posts about state variation | +| **Income sweep summary** | Income level, Baseline net, Reform net, Change, MTR | Posts with detailed income range analysis | +| **Benefit eligibility** | Income level, EITC, CTC, SNAP, Medicaid, Total benefits | Posts about benefit cliffs | + +### Custom tables + +For topic-specific tables, follow these rules: +- Include in results.json with `headers`, `rows`, `source_line`, `source_url` +- Pre-format values as display strings ("$1,200", "12.4%") +- Use `{{table:name}}` in blog post markdown +- Keep under 15 rows + +--- + ## Generating results.json for Household Analysis Household analyses produce results.json with the same schema as microsimulation analyses, but values come from specific households rather than population aggregates. From d5bfdb2be7cb9db830dc14bae753aeacfab9abd1 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Mon, 23 Feb 2026 19:51:47 -0500 Subject: [PATCH 3/5] Add analysis-writer, blog-writer, and pipeline-validator agents Three specialist agents for the content pipeline: - analysis-writer: writes analysis.py, runs policyengine.py simulations, produces validated results.json with tracked_value() - blog-writer: writes blog post markdown with {{}} template references, zero hard-coded numbers, neutral tone - pipeline-validator: 9-check read-only validation (schema, refs, hard-coded numbers, language, voice, headings, charts, traceability, structure) Also updates publish-analysis command to spawn these agents sequentially (each depends on the previous), following the same pattern as audit-seo with specialist agents. Registered in content, analysis-tools, and complete plugins. Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 17 +- agents/content/analysis-writer.md | 131 +++++++++++++++ agents/content/blog-writer.md | 146 +++++++++++++++++ agents/content/pipeline-validator.md | 134 +++++++++++++++ commands/publish-analysis.md | 235 +++++++++------------------ 5 files changed, 504 insertions(+), 159 deletions(-) create mode 100644 agents/content/analysis-writer.md create mode 100644 agents/content/blog-writer.md create mode 100644 agents/content/pipeline-validator.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d77681c..e3525d5 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -161,13 +161,18 @@ "description": "Policy analysis and research - impact studies, dashboards, notebooks, and visualizations", "source": "./", "category": "analysis", - "version": "3.10.0", + "version": "3.11.0", "keywords": ["analysis", "research", "policy", "impact", "streamlit", "plotly", "notebooks", "blog", "pipeline"], "author": { "name": "PolicyEngine", "url": "https://github.com/PolicyEngine" }, "license": "MIT", + "agents": [ + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md" + ], "commands": [ "./commands/publish-analysis.md" ], @@ -221,7 +226,7 @@ "description": "Content pipeline - blog post analysis, social images, and distribution from policy reforms", "source": "./", "category": "marketing", - "version": "3.10.0", + "version": "3.11.0", "keywords": ["content", "social", "marketing", "images", "blog", "pipeline", "analysis"], "author": { "name": "PolicyEngine", @@ -229,7 +234,10 @@ }, "license": "MIT", "agents": [ - "./agents/content/content-orchestrator.md" + "./agents/content/content-orchestrator.md", + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md" ], "commands": [ "./commands/generate-content.md", @@ -264,6 +272,9 @@ "./agents/app/seo-content-checker.md", "./agents/branch-comparator.md", "./agents/content/content-orchestrator.md", + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md", "./agents/country-models/ci-fixer.md", "./agents/country-models/cross-program-validator.md", "./agents/country-models/document-collector.md", diff --git a/agents/content/analysis-writer.md b/agents/content/analysis-writer.md new file mode 100644 index 0000000..2e204e4 --- /dev/null +++ b/agents/content/analysis-writer.md @@ -0,0 +1,131 @@ +--- +name: analysis-writer +description: Writes analysis.py scripts that run policyengine.py simulations, generate charts, and produce validated results.json +tools: + - Read + - Write + - Edit + - Bash + - Glob + - Grep + - Skill +model: sonnet +--- + +# Analysis Writer Agent + +You write analysis.py scripts that use policyengine.py to simulate policy reforms and produce results.json with traceable values, tables, and charts. + +## Required skills + +Load these before starting: +- `blog-pipeline` — results.json schema, chart catalog, policyengine.py patterns +- `us-household-analysis` or `uk-household-analysis` — depending on country +- `policyengine-writing-skill` — neutral language for alt text + +## Inputs + +You receive: +- **Reform definition**: parameter paths, values, year, country +- **Analysis type**: microsimulation (population-level) or household (case studies) or both +- **Repo slug**: directory name within the analyses repo +- **Output directory**: where to write analysis.py, results.json, and charts/ + +## Your workflow + +### 1. Write analysis.py + +The script must: + +**Define the reform:** +```python +from policyengine.core import Policy, Parameter, ParameterValue +param = Parameter(name="...", tax_benefit_model_version=..., data_type=float) +pv = ParameterValue(parameter=param, start_date=..., end_date=..., value=...) +policy = Policy(name="...", parameter_values=[pv]) +``` + +**Run simulations:** +- For microsimulation: load dataset, run baseline + reform via `Simulation.run()` +- For household: use `calculate_household_impact()` or situation dicts with axes + +**Compute outputs using built-in classes:** +- `calculate_decile_impacts()` for decile bar charts +- `calculate_us_poverty_rates()` or `calculate_uk_poverty_rates()` +- `Aggregate` for budget impact +- `ChangeAggregate` for winners/losers counts + +**Generate charts using Plotly:** +- Pick from the standard chart catalog (see blog-pipeline skill) +- Use `format_fig()` from `policyengine.utils.plotting` for PE brand styling +- Save as PNG at 1200x600, scale=2 in charts/ directory +- Write descriptive alt text (chart type + 2-3 key data points) + +**Build results.json with source tracking:** +```python +from policyengine.results import ResultsJson, ResultsMetadata, ValueEntry, tracked_value + +# Use tracked_value() for automatic source line capture +results["values"]["budget_impact"] = tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo=REPO, +) + +# Validate with Pydantic schema before writing +validated = ResultsJson(**results) +validated.write("results.json") +``` + +### 2. Run the script + +```bash +pip install -r requirements.txt +python analysis.py +``` + +### 3. Verify outputs + +- `results.json` exists and is valid JSON +- All values have `source_line` and `source_url` +- `charts/*.png` files exist +- Source URLs point to real line numbers in the script + +## Chart selection + +Pick charts based on analysis type: + +**Microsimulation posts — required:** +- Decile impact bar chart +- Winners/losers chart + +**Microsimulation posts — optional:** +- Budget impact over time +- Poverty comparison +- Waterfall (component decomposition) + +**Household posts — required:** +- Net income curve (baseline vs reform across earnings) +- Household impact table + +**Household posts — optional:** +- Marginal tax rate curve +- Benefit cliff chart +- Component breakdown bar + +## Rules + +1. **Use `policyengine.results.tracked_value()`** for every value — never write source_line manually +2. **Use `policyengine.results.ResultsJson`** to validate before writing — catches schema errors early +3. **Use `policyengine.utils.plotting.format_fig()`** for chart styling — never set colors/fonts manually +4. **Alt text must include chart type and 2-3 data points** — "Bar chart showing X. Top decile Y. Bottom decile Z." +5. **No hard-coded display values** — derive display strings from computed values using f-strings +6. **Pre-format table cell values** as strings — results.json rows contain display-ready text + +## Output + +Return: +- Path to analysis.py +- Path to results.json +- List of chart paths with their alt text +- Any errors encountered during execution diff --git a/agents/content/blog-writer.md b/agents/content/blog-writer.md new file mode 100644 index 0000000..9999260 --- /dev/null +++ b/agents/content/blog-writer.md @@ -0,0 +1,146 @@ +--- +name: blog-writer +description: Writes blog post markdown with {{}} template references from results.json — zero hard-coded numbers, neutral tone +tools: + - Read + - Write + - Edit + - Glob + - Grep + - Skill +model: sonnet +--- + +# Blog Writer Agent + +You write blog post markdown files that reference results.json via `{{}}` templates. Every number in the post comes from results.json — zero hard-coded values. + +## Required skills + +Load these before starting: +- `policyengine-writing-skill` — neutral tone, active voice, sentence case, quantitative precision +- `blog-pipeline` — template syntax, results.json schema, post structure + +## Inputs + +You receive: +- **results.json path**: the validated results file from the analysis-writer agent +- **Reform description**: what the policy does, in plain language +- **Country**: us or uk +- **Output path**: where to write the markdown file + +## Your workflow + +### 1. Read results.json + +Parse the file and inventory all available keys: +- `values.*` — individual numbers available as `{{key}}` +- `tables.*` — tables available as `{{table:key}}` +- `charts.*` — charts available as `{{chart:key}}` + +### 2. Write the blog post + +Follow this structure: + +```markdown +# [Title — sentence case] + +[Opening paragraph: who, what, when, with link to PolicyEngine] + +Key results in [year]: +- [Bullet 1 using {{value_ref}}] +- [Bullet 2 using {{value_ref}}] +- [Bullet 3 using {{value_ref}}] + +## The proposal + +[Description of what changes, with parameter comparison table if available] + +{{table:parameters}} + +## Household impacts + +[Case studies for 3-5 representative households] + +{{table:household_impacts}} + +{{chart:net_income_curve}} + +## [Nationwide/Statewide] impacts + +### Budgetary impact + +{{value_ref}} [in context] + +{{chart:budget_impact}} + +### Distributional impact + +{{chart:decile_impact}} + +{{table:decile_distribution}} + +### Poverty and inequality + +{{chart:poverty_impact}} + +{{table:poverty_summary}} + +## Methodology + +This analysis uses PolicyEngine's microsimulation model with the +[dataset] dataset ([year]). All calculations are open source and +reproducible. [View the analysis code](https://github.com/[repo]). +``` + +### 3. Writing rules + +**Neutral tone — describe what policies do, not whether they are good:** + +✅ "The reform reduces poverty by {{poverty_change}}" +❌ "The reform successfully tackles poverty" + +**Active voice with specific numbers:** + +✅ "Repealing the SALT cap costs {{budget_impact}} in {{year}}" +❌ "The deficit is increased by the SALT cap repeal" + +**Sentence case for all headings:** + +✅ `## Budgetary impact` +❌ `## Budgetary Impact` + +**Show calculations explicitly:** + +✅ "The reform costs {{budget_impact}}: {{income_tax_change}} in reduced revenue, offset by {{payroll_change}} in higher collections" +❌ "The reform has a significant budgetary impact" + +**Every number is a `{{}}` reference:** + +✅ `The top decile receives {{top_decile_share}} of total benefits` +❌ `The top decile receives 42% of total benefits` + +### 4. Validate references + +After writing, verify: +- Every `{{name}}` in the markdown exists as a key in results.json values +- Every `{{table:name}}` exists in results.json tables +- Every `{{chart:name}}` exists in results.json charts +- No raw numbers appear in the markdown (search for digit patterns outside `{{}}`) + +## Rules + +1. **Zero hard-coded numbers** — if it's a number, it must be a `{{}}` reference +2. **Every heading is sentence case** — only capitalize first word and proper nouns +3. **Active voice throughout** — no passive constructions +4. **Neutral tone** — no "unfortunately", "significant", "dramatic", "benefit", "suffer" +5. **Include methodology section** — model version, dataset, year, assumptions, code link +6. **Include key findings bullets** — quantitative, at the top of the post +7. **Use tables before charts** — show the data, then visualize it + +## Output + +Return: +- Path to the markdown file +- List of all `{{}}` references used +- Any references that don't match results.json keys (errors) diff --git a/agents/content/pipeline-validator.md b/agents/content/pipeline-validator.md new file mode 100644 index 0000000..f74b4ee --- /dev/null +++ b/agents/content/pipeline-validator.md @@ -0,0 +1,134 @@ +--- +name: pipeline-validator +description: Validates the full blog pipeline — results.json schema, template references, chart accessibility, neutral language, source traceability +tools: + - Read + - Bash + - Glob + - Grep + - Skill +model: sonnet +--- + +# Pipeline Validator Agent + +You validate the output of the blog post pipeline. You check that results.json is valid, all template references resolve, charts are accessible, language is neutral, and every number is traceable. + +## Required skills + +Load these before starting: +- `blog-pipeline` — results.json schema, template syntax +- `policyengine-writing-skill` — neutral tone rules + +## Inputs + +You receive: +- **results.json path**: the analysis output +- **blog post markdown path**: the written post +- **charts directory**: where chart PNGs live + +## Checks + +Run all checks and report pass/fail for each. + +### Check 1: results.json schema + +Read results.json and verify: +- [ ] `metadata.repo` is present +- [ ] `metadata.title` is present +- [ ] Every entry in `values` has `value`, `display`, `source_line`, `source_url` +- [ ] Every entry in `tables` has `title`, `headers`, `rows`, `source_line`, `source_url` +- [ ] Every table has consistent row widths (same number of columns as headers) +- [ ] Every entry in `charts` has `url`, `alt`, `source_line`, `source_url` +- [ ] Every chart alt text is >= 20 characters and starts with a chart type word + +### Check 2: Template references + +Read the blog post markdown and verify: +- [ ] Every `{{value_name}}` matches a key in results.json `values` +- [ ] Every `{{table:name}}` matches a key in results.json `tables` +- [ ] Every `{{chart:name}}` matches a key in results.json `charts` +- [ ] No unresolved `{{` patterns remain +- [ ] No orphan keys in results.json (values/tables/charts not referenced by the post) + +### Check 3: No hard-coded numbers + +Search the markdown for raw numbers outside `{{}}` references: +- [ ] No dollar amounts (e.g., "$15.2 billion") outside template refs +- [ ] No percentages (e.g., "3.2%") outside template refs +- [ ] Exception: year numbers (2026), section numbering, and methodology references are OK + +### Check 4: Neutral language + +Search the markdown for value-judgment words: +- [ ] No "unfortunately", "fortunately", "hopefully" +- [ ] No "significant", "dramatic", "massive", "enormous" +- [ ] No "benefit" as a verb meaning "help" (the noun is OK) +- [ ] No "suffer", "hurt", "harm" (use "reduces net income" instead) +- [ ] No "disproportionate", "unfair", "regressive", "progressive" as value judgments +- [ ] No superlatives without specific comparisons ("largest", "most") + +### Check 5: Active voice + +Search for passive constructions: +- [ ] No "is reduced by", "are projected by", "was proposed by" +- [ ] No "it is estimated that", "it was found that" + +### Check 6: Heading style + +- [ ] All H2 and H3 headings use sentence case (not Title Case) +- [ ] Only first word and proper nouns capitalized + +### Check 7: Chart accessibility + +For each chart in results.json: +- [ ] Alt text starts with chart type ("Bar chart", "Line chart", etc.) +- [ ] Alt text includes at least 2 specific numbers +- [ ] Alt text is 1-3 sentences +- [ ] Chart PNG file exists in the charts directory + +### Check 8: Source traceability + +For each value in results.json: +- [ ] `source_url` contains the repo name from metadata +- [ ] `source_url` ends with `#L{source_line}` +- [ ] `source_line` is a positive integer + +### Check 9: Post structure + +- [ ] Post starts with an H1 title +- [ ] Key findings bullets appear within the first 20 lines +- [ ] Methodology section exists (search for "methodology" or "method" heading) +- [ ] Post links to the analysis repo + +## Report format + +``` +## Pipeline Validation Report + +**Results.json**: ✅ / ❌ ({N} values, {N} tables, {N} charts) +**Template refs**: ✅ / ❌ ({N} resolved, {N} missing, {N} orphaned) +**Hard-coded numbers**: ✅ / ❌ ({N} found) +**Neutral language**: ✅ / ❌ ({N} issues) +**Active voice**: ✅ / ❌ ({N} passive constructions) +**Heading style**: ✅ / ❌ ({N} title-case headings) +**Chart accessibility**: ✅ / ❌ ({N} charts checked) +**Source traceability**: ✅ / ❌ ({N} values checked) +**Post structure**: ✅ / ❌ + +### Issues + +1. **{Category}**: {Description} — Line {N} +2. ... + +### Summary + +{N}/9 checks passed. {Ready to publish / Needs fixes}. +``` + +## Rules + +1. **Read-only** — never modify files, only report findings +2. **Be specific** — include line numbers and exact text for every issue +3. **Prioritize** — schema and reference errors are blockers; language issues are warnings +4. **No false positives** — year numbers, methodology text, and proper nouns are not issues diff --git a/commands/publish-analysis.md b/commands/publish-analysis.md index 1b66dbe..cc7b18a 100644 --- a/commands/publish-analysis.md +++ b/commands/publish-analysis.md @@ -20,9 +20,9 @@ Generate a complete, SEO-optimized blog post from a policy reform — all number ## Prerequisites Load these skills before starting: -- `blog-pipeline` — results.json schema, template syntax, policyengine.py patterns +- `blog-pipeline` — results.json schema, template syntax, chart/table catalogs - `policyengine-writing-skill` — neutral tone, active voice, PE style -- `policyengine-analysis-skill` — simulation patterns, chart types +- `us-household-analysis` or `uk-household-analysis` — depending on country - `content-generation-skill` — social images and copy --- @@ -68,178 +68,100 @@ Before starting: --- -## Phase 2: Create Analysis Repo +## Phase 2: Create Analysis Directory -1. **Create a new GitHub repo** under PolicyEngine org: - ```bash - gh repo create PolicyEngine/{topic-slug} --public --clone - cd {topic-slug} - ``` - -2. **Create the repo structure:** - ``` - analysis.py # Full simulation + results.json generation - results.json # Generated by analysis.py - charts/ # Generated by analysis.py - requirements.txt # policyengine, plotly, kaleido - README.md # How to reproduce - .github/workflows/pages.yml # Chart deployment to GitHub Pages - ``` - -3. **Write requirements.txt:** - ``` - policyengine>=0.1.0 - plotly>=5.15.0 - kaleido>=0.2.1 - ``` - -4. **Write .github/workflows/pages.yml** for auto-deploying charts to GitHub Pages. +Create a directory in the analysis-notebooks repo (or a new repo if user prefers): -5. **Enable GitHub Pages** on the repo (Settings > Pages > GitHub Actions). - ---- - -## Phase 3: Write and Run analysis.py - -Write a Python script using `policyengine.py` that: -1. Defines the reform using `Policy`, `Parameter`, `ParameterValue` -2. Loads the dataset (`PolicyEngineUSDataset` or `PolicyEngineUKDataset`) -3. Runs baseline and reform simulations via `Simulation.run()` -4. Computes all outputs using built-in classes (`DecileImpact`, `Poverty`, `Aggregate`, `ChangeAggregate`) -5. Generates chart PNGs using Plotly with PE brand colors -6. Writes results.json with source line tracking via `inspect` - -### analysis.py structure: - -```python -import json, os, inspect, datetime -from policyengine.core import Simulation, Policy, Parameter, ParameterValue -from policyengine.tax_benefit_models.us import PolicyEngineUSDataset, us_latest -from policyengine.outputs.aggregate import Aggregate, AggregateType -from policyengine.outputs.decile_impact import DecileImpact -from policyengine.outputs.poverty import Poverty -from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType -import plotly.graph_objects as go -import plotly.io as pio - -REPO = "PolicyEngine/{topic-slug}" -YEAR = 2026 - -# 1. Define reform -param = Parameter(name="...", tax_benefit_model_version=us_latest, data_type=float) -pv = ParameterValue(parameter=param, start_date=datetime.date(2026,1,1), - end_date=datetime.date(2026,12,31), value=NEW_VALUE) -policy = Policy(name="Reform Name", parameter_values=[pv]) - -# 2. Load dataset -dataset = PolicyEngineUSDataset(name="enhanced_cps_2024", - filepath="path/to/enhanced_cps_2024.h5", year=YEAR) - -# 3. Run simulations -baseline = Simulation(dataset=dataset, tax_benefit_model_version=us_latest) -baseline.run() - -reform = Simulation(dataset=dataset, tax_benefit_model_version=us_latest, policy=policy) -reform.run() - -# 4. Extract results using built-in output classes -decile = DecileImpact(baseline_simulation=baseline, reform_simulation=reform, - variable="household_net_income") -decile.run() - -poverty = Poverty(baseline_simulation=baseline, reform_simulation=reform) -poverty.run() - -# 5. Direct variable access for custom analysis -baseline_net = baseline.output_dataset.data.household["household_net_income"] -reform_net = reform.output_dataset.data.household["household_net_income"] -weights = baseline.output_dataset.data.household["household_weight"] - -# 6. Generate charts -fig = go.Figure(...) -os.makedirs("charts", exist_ok=True) -pio.write_image(fig, "charts/distributional.png", width=1200, height=600, scale=2) - -# 7. Build results.json with source tracking -results = { - "metadata": {"title": "...", "repo": REPO, ...}, - "values": { - "budget_impact": { - "value": budget_impact, - "display": format_currency(budget_impact), - "source_line": inspect.currentframe().f_lineno, - "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", - } - }, - "tables": {...}, - "charts": {...}, -} -with open("results.json", "w") as f: - json.dump(results, f, indent=2) ``` - -### Run the script: -```bash -pip install -r requirements.txt -python analysis.py +{topic-slug}/ + analysis.py # Full simulation + results.json generation + results.json # Generated by analysis.py + charts/ # Generated by analysis.py + requirements.txt # policyengine, plotly, kaleido + README.md # How to reproduce ``` -### Verify outputs: -- `results.json` exists with all expected keys -- `charts/*.png` files exist and look correct -- All `source_line` values point to real code lines +Write requirements.txt: +``` +policyengine>=0.1.0 +plotly>=5.15.0 +kaleido>=0.2.1 +``` --- -## Phase 4: Write Blog Post - -1. **Draft the blog post** as a markdown file using `{{}}` template references: +## Phase 3: Spawn Specialist Agents - ```markdown - The [reform name] would [verb] {{budget_impact}} per year. +Spawn agents sequentially — each phase depends on the previous. - ## Budgetary impact +### Agent 1: Analysis Writer - {{table:distributional}} - - {{chart:distributional}} +Invoke **analysis-writer** agent: +``` +Write analysis.py for the following reform: + +- Reform: {parsed reform description} +- Country: {us/uk} +- Year: {year} +- Parameter paths: {identified paths from Phase 1} +- Analysis type: {microsimulation / household / both} +- Output directory: {topic-slug}/ +- Repo slug: PolicyEngine/{repo-name} + +Follow the instructions in agents/content/analysis-writer.md. +Use policyengine.results.tracked_value() for every value. +Use policyengine.results.ResultsJson to validate before writing. +Use policyengine.utils.plotting.format_fig() for chart styling. +``` - ## Household examples +**Verify before proceeding:** +- `results.json` exists and is valid JSON +- All values have `source_line` and `source_url` +- `charts/*.png` files exist +- Source URLs point to real line numbers - {{table:household_examples}} +### Agent 2: Blog Writer - ## Poverty impact +Invoke **blog-writer** agent: +``` +Write a blog post for the following analysis: - The reform would change the poverty rate by {{poverty_change}}. +- results.json path: {topic-slug}/results.json +- Reform description: {parsed reform description} +- Country: {us/uk} +- Output path: {topic-slug}/post.md - ## Methodology +Follow the instructions in agents/content/blog-writer.md. +Every number must be a {{}} reference — zero hard-coded values. +Use neutral tone, active voice, sentence case headings. +``` - This analysis uses PolicyEngine's microsimulation model with the - Enhanced CPS 2024 dataset. All calculations are open source and - reproducible. [View the analysis code](https://github.com/REPO). - ``` +**Verify before proceeding:** +- Every `{{name}}` matches a key in results.json +- No raw numbers outside `{{}}` +- Methodology section exists with repo link -2. **Follow the writing skill rules:** +### Agent 3: Pipeline Validator - **✅ Correct (neutral, active, quantitative):** - ``` - Repealing the SALT cap costs {{budget_impact}} in {{year}}. - The top income decile receives {{top_decile_share}} of total benefits. - ``` +Invoke **pipeline-validator** agent: +``` +Validate the full pipeline output: - **❌ Wrong (value judgments, passive, vague):** - ``` - Repealing the SALT cap would unfortunately cost a significant amount. - The wealthiest households receive a disproportionate share of benefits. - ``` +- results.json path: {topic-slug}/results.json +- Blog post path: {topic-slug}/post.md +- Charts directory: {topic-slug}/charts/ -3. **Write chart alt text** — descriptive, includes key data points, 1-3 sentences. +Follow the instructions in agents/content/pipeline-validator.md. +Run all 9 checks and produce the validation report. +``` -4. **Validate all references** — every `{{name}}` in the markdown must exist in results.json. +**If validator reports failures:** +- Schema/reference errors are **blockers** — fix before proceeding +- Language/style issues are **warnings** — fix if possible, note if not --- -## Phase 5: Create Posts Entry +## Phase 4: Create Posts Entry Add an entry to `posts.json` in policyengine-app-v2: @@ -252,7 +174,7 @@ Add an entry to `posts.json` in policyengine-app-v2: "authors": ["..."], "filename": "{topic-slug}.md", "image": "{topic-slug}.png", - "analysis_repo": "PolicyEngine/{topic-slug}" + "analysis_repo": "PolicyEngine/{repo-name}" } ``` @@ -260,7 +182,7 @@ The `analysis_repo` field triggers the resolve-posts build step. --- -## Phase 6: Generate Social Content +## Phase 5: Generate Social Content Use the content-generation skill to create: @@ -270,7 +192,7 @@ Use the content-generation skill to create: Social copy must follow the same neutral tone as the blog post: -**✅ Correct:** +**Correct:** ``` Repealing the SALT cap would cost $15.2 billion in 2026. The top income decile receives 42% of total benefits. @@ -278,7 +200,7 @@ The top income decile receives 42% of total benefits. Full analysis: [link] ``` -**❌ Wrong:** +**Wrong:** ``` BREAKING: SALT cap repeal is a massive giveaway to the wealthy! This shocking analysis reveals who really benefits. @@ -286,7 +208,7 @@ This shocking analysis reveals who really benefits. --- -## Phase 7: Create PRs +## Phase 6: Create PRs ### Analysis repo ```bash @@ -301,7 +223,7 @@ Use `/create-pr` command for proper PR creation with CI check waiting. --- -## Phase 8: Verify +## Phase 7: Verify Before marking as done, run through this checklist: @@ -319,7 +241,7 @@ Before marking as done, run through this checklist: --- -## Phase 9: Distribution Checklist +## Phase 8: Distribution Checklist After merge and deploy: @@ -342,6 +264,7 @@ After merge and deploy: | Chart generation fails | kaleido not installed | `pip install kaleido` or note in results.json that charts need manual generation | | Unresolvable `{{ref}}` | Key mismatch between markdown and results.json | Fix spelling or add missing key to results.json | | Stale source lines | Code changed after generating results.json | Re-run analysis.py to regenerate results.json | +| Validator fails | Schema or reference errors | Fix blockers before proceeding; warnings can be noted | --- From 6e46e3e23a3209a87206af39f63ef68567fa9df7 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Mon, 23 Feb 2026 19:59:13 -0500 Subject: [PATCH 4/5] Fix code examples to use tracked_value() and ResultsJson correctly - analysis-writer: fix results.json construction to wrap tracked_value() dicts in ValueEntry before passing to ResultsJson - blog-pipeline-skill: replace manual inspect.currentframe() pattern with tracked_value() as the recommended approach - us-household-analysis-skill: update results.json generation to use tracked_value(), ValueEntry, TableEntry, and ResultsJson - uk-household-analysis-skill: same updates for UK patterns All code examples now match the actual policyengine.results API. Co-Authored-By: Claude Opus 4.6 --- agents/content/analysis-writer.md | 28 ++++++--- skills/content/blog-pipeline-skill/SKILL.md | 41 +++++++----- .../uk-household-analysis-skill/SKILL.md | 62 ++++++++++--------- .../us-household-analysis-skill/SKILL.md | 61 +++++++++--------- 4 files changed, 110 insertions(+), 82 deletions(-) diff --git a/agents/content/analysis-writer.md b/agents/content/analysis-writer.md index 2e204e4..df76426 100644 --- a/agents/content/analysis-writer.md +++ b/agents/content/analysis-writer.md @@ -63,18 +63,32 @@ policy = Policy(name="...", parameter_values=[pv]) **Build results.json with source tracking:** ```python -from policyengine.results import ResultsJson, ResultsMetadata, ValueEntry, tracked_value +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, ChartEntry, tracked_value, +) + +REPO = "PolicyEngine/salt-cap-analysis" -# Use tracked_value() for automatic source line capture -results["values"]["budget_impact"] = tracked_value( +# tracked_value() returns a dict — wrap in ValueEntry for validation +budget_entry = ValueEntry(**tracked_value( value=budget_impact, display=f"${abs(budget_impact)/1e9:.1f} billion", repo=REPO, +)) + +# Build the validated results object directly +results = ResultsJson( + metadata=ResultsMetadata( + title="SALT Cap Repeal", + repo=REPO, + country_id="us", + year=2026, + ), + values={"budget_impact": budget_entry}, + tables={...}, # TableEntry objects + charts={...}, # ChartEntry objects ) - -# Validate with Pydantic schema before writing -validated = ResultsJson(**results) -validated.write("results.json") +results.write("results.json") ``` ### 2. Run the script diff --git a/skills/content/blog-pipeline-skill/SKILL.md b/skills/content/blog-pipeline-skill/SKILL.md index 661449e..2a5943c 100644 --- a/skills/content/blog-pipeline-skill/SKILL.md +++ b/skills/content/blog-pipeline-skill/SKILL.md @@ -624,34 +624,41 @@ The reform has a significant budgetary impact. ### Source Tracking -Every value in results.json must include `source_line` and `source_url` pointing to the exact line in analysis.py that computed it: +Every value in results.json must include `source_line` and `source_url` pointing to the exact line in analysis.py that computed it. Use the `tracked_value()` helper from `policyengine.results` — it captures the caller's line number automatically via `inspect.stack()`: ```python -import inspect +from policyengine.results import tracked_value, ValueEntry -line = inspect.currentframe().f_lineno budget_impact = reform_revenue.result - baseline_revenue.result -results["values"]["budget_impact"] = { +# tracked_value() captures this line number automatically +budget_entry = ValueEntry(**tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo="PolicyEngine/salt-cap-analysis", +)) +``` + +**✅ Correct — using tracked_value():** +```python +entry = ValueEntry(**tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo=REPO, +)) +``` + +**❌ Wrong — manual line tracking (error-prone, goes stale on refactoring):** +```python +line = inspect.currentframe().f_lineno +entry = { "value": budget_impact, - "display": format_currency(budget_impact), + "display": f"${abs(budget_impact)/1e9:.1f} billion", "source_line": line, "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", } ``` -**✅ Correct — every value traceable:** -```json -{ - "budget_impact": { - "value": -15200000000, - "display": "$15.2 billion", - "source_line": 47, - "source_url": "https://github.com/PolicyEngine/salt-cap/blob/main/analysis.py#L47" - } -} -``` - **❌ Wrong — value without source:** ```json { diff --git a/skills/content/uk-household-analysis-skill/SKILL.md b/skills/content/uk-household-analysis-skill/SKILL.md index 2df0d8b..ad0e614 100644 --- a/skills/content/uk-household-analysis-skill/SKILL.md +++ b/skills/content/uk-household-analysis-skill/SKILL.md @@ -484,8 +484,12 @@ For topic-specific tables, follow these rules: ## Generating results.json for Household Analysis +Use `tracked_value()` for automatic source line tracking and `ResultsJson` for schema validation. + ```python -import json, inspect +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, tracked_value, +) REPO = "PolicyEngine/uc-increase-analysis" @@ -495,43 +499,43 @@ households = { "couple_2_owner": {"income": 75_000, "children": 2, "rent": 0}, } -results = { - "metadata": { - "title": "UC Standard Allowance Increase", - "repo": REPO, - "country_id": "uk", - "year": 2026, - }, - "values": {}, - "tables": {}, - "charts": {}, -} - +values = {} rows = [] for name, params in households.items(): # ... calculate baseline and reform ... - line = inspect.currentframe().f_lineno change = reform_net - baseline_net - results["values"][f"{name}_change"] = { - "value": float(change), - "display": f"£{abs(change):,.0f}", - "source_line": line, - "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", - } + # tracked_value() captures this line number automatically + values[f"{name}_change"] = ValueEntry(**tracked_value( + value=float(change), + display=f"£{abs(change):,.0f}", + repo=REPO, + )) rows.append([name, f"£{params['income']:,}", f"£{change:,.0f}"]) -line = inspect.currentframe().f_lineno -results["tables"]["household_impacts"] = { - "title": "Household impact by family type", - "headers": ["Household", "Income", "Net income change"], - "rows": rows, - "source_line": line, - "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +import inspect +table_line = inspect.currentframe().f_lineno +tables = { + "household_impacts": TableEntry( + title="Household impact by family type", + headers=["Household", "Income", "Net income change"], + rows=rows, + source_line=table_line, + source_url=f"https://github.com/{REPO}/blob/main/analysis.py#L{table_line}", + ), } -with open("results.json", "w") as f: - json.dump(results, f, indent=2) +results = ResultsJson( + metadata=ResultsMetadata( + title="UC Standard Allowance Increase", + repo=REPO, + country_id="uk", + year=2026, + ), + values=values, + tables=tables, +) +results.write("results.json") ``` --- diff --git a/skills/content/us-household-analysis-skill/SKILL.md b/skills/content/us-household-analysis-skill/SKILL.md index 5159193..642caff 100644 --- a/skills/content/us-household-analysis-skill/SKILL.md +++ b/skills/content/us-household-analysis-skill/SKILL.md @@ -387,10 +387,12 @@ For topic-specific tables, follow these rules: ## Generating results.json for Household Analysis -Household analyses produce results.json with the same schema as microsimulation analyses, but values come from specific households rather than population aggregates. +Household analyses produce results.json with the same schema as microsimulation analyses, but values come from specific households rather than population aggregates. Use `tracked_value()` for automatic source line tracking and `ResultsJson` for schema validation. ```python -import json, inspect +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, tracked_value, +) REPO = "PolicyEngine/ctc-expansion" @@ -400,42 +402,43 @@ households = { "married_2": {"filing": "JOINT", "income": 100_000, "children": 2}, } -results = { - "metadata": { - "title": "CTC Expansion Household Impacts", - "repo": REPO, - "year": 2026, - }, - "values": {}, - "tables": {}, - "charts": {}, -} - +values = {} rows = [] for name, params in households.items(): # ... calculate baseline and reform ... - line = inspect.currentframe().f_lineno change = reform_net - baseline_net - results["values"][f"{name}_change"] = { - "value": float(change), - "display": f"${abs(change):,.0f}", - "source_line": line, - "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", - } + # tracked_value() captures this line number automatically + values[f"{name}_change"] = ValueEntry(**tracked_value( + value=float(change), + display=f"${abs(change):,.0f}", + repo=REPO, + )) rows.append([name, f"${params['income']:,}", f"${change:,.0f}"]) -line = inspect.currentframe().f_lineno -results["tables"]["household_impacts"] = { - "title": "Household impact by family type", - "headers": ["Household", "Income", "Net income change"], - "rows": rows, - "source_line": line, - "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +import inspect +table_line = inspect.currentframe().f_lineno +tables = { + "household_impacts": TableEntry( + title="Household impact by family type", + headers=["Household", "Income", "Net income change"], + rows=rows, + source_line=table_line, + source_url=f"https://github.com/{REPO}/blob/main/analysis.py#L{table_line}", + ), } -with open("results.json", "w") as f: - json.dump(results, f, indent=2) +results = ResultsJson( + metadata=ResultsMetadata( + title="CTC Expansion Household Impacts", + repo=REPO, + country_id="us", + year=2026, + ), + values=values, + tables=tables, +) +results.write("results.json") ``` --- From c575c28b2966912e96604cb6e73cc5cd35122a3e Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Mon, 23 Feb 2026 20:26:23 -0500 Subject: [PATCH 5/5] Add validation gates to publish-analysis command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redesign the pipeline with 3 human checkpoints, external estimate comparison (with % thresholds), chart sanity checks, and status-based publication control — patterns adopted from encode-bill command. Co-Authored-By: Claude Opus 4.6 --- commands/publish-analysis.md | 605 +++++++++++++++++++++++++++-------- 1 file changed, 465 insertions(+), 140 deletions(-) diff --git a/commands/publish-analysis.md b/commands/publish-analysis.md index cc7b18a..4716a6f 100644 --- a/commands/publish-analysis.md +++ b/commands/publish-analysis.md @@ -1,6 +1,6 @@ --- name: publish-analysis -description: End-to-end blog post pipeline - from research question to published, distributed post with traceable numbers +description: End-to-end blog post pipeline - from research question to published, distributed post with traceable numbers and validated results arguments: - name: topic description: Research question, reform description, or bill reference (e.g., "SALT cap repeal" or "HR 1234") @@ -15,7 +15,7 @@ arguments: # Publish Analysis: $ARGUMENTS -Generate a complete, SEO-optimized blog post from a policy reform — all numbers traceable to code, zero hard-coded values. Uses `policyengine.py` for local simulation. +Generate a complete, validated, SEO-optimized blog post from a policy reform — every number traceable to code, validated against external estimates, zero hard-coded values. ## Prerequisites @@ -27,221 +27,479 @@ Load these skills before starting: --- -## Pre-Flight Checklist +## Workflow Overview -Before starting: -- [ ] I will use `policyengine.py` for all simulations (not API, not policyengine-us directly) -- [ ] I will generate results.json with `source_line` and `source_url` for every value -- [ ] I will use `{{}}` template references — zero hard-coded numbers in the blog post -- [ ] I will follow policyengine-writing-skill for neutral tone and active voice -- [ ] I will generate descriptive alt text with 2-3 key data points for every chart -- [ ] I will ask the user before creating any GitHub repos or PRs +``` +┌──────────────────────────────────────────────────────────────────────────┐ +│ /publish-analysis {TOPIC} │ +└──────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 0: PRE-FLIGHT │ + │ Check for existing analysis │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 1: PARALLEL RESEARCH │ + │ (Task agents) │ + └───────────────────────────────┘ + │ + ┌─────────────────────┴─────────────────────────┐ + │ │ + ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ +│ reform-definer │ │ estimate-finder │ +│ (define reform, │ │ (CBO, JCT, Tax │ +│ map parameters) │ │ Foundation etc.) │ +└─────────┬─────────┘ └─────────┬─────────┘ + └─────────────────┬───────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #1: REVIEW │ + │ Reform definition + │ + │ external estimates │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 2: ANALYSIS │ + │ analysis-writer agent │ + │ (analysis.py + results.json) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 2b: CHART SANITY │ + │ Household sweep chart — │ + │ does shape match intent? │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #2: REVIEW │ + │ PE results vs external │ + │ estimates + chart shape │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 3: BLOG POST │ + │ blog-writer agent │ + │ (markdown with {{}} refs) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 4: VALIDATION │ + │ pipeline-validator agent │ + │ (9 automated checks) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #3: REVIEW │ + │ Full post + validation │ + │ report before PR │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 5: PR + DISTRIBUTE │ + │ Draft PR (in_review) │ + │ Merge = publish │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────┐ + │ DONE! │ + └───────────┘ +``` --- ## Key Rules -1. **Zero hard-coded values**: Every number in the blog post comes from results.json via `{{}}` templates -2. **Every number is traceable**: `source_line` and `source_url` in results.json point to the exact code -3. **Neutral language**: Describe what policies do, not whether they are good or bad (see policyengine-writing-skill) -4. **No iframes**: Charts are static `` tags from GitHub Pages with descriptive alt text -5. **Active voice**: "The reform reduces poverty by 3.2%" not "Poverty is reduced by 3.2%" -6. **Quantitative precision**: "$15.2 billion" not "significant cost" -7. **Sentence case headings**: "Budgetary impact" not "Budgetary Impact" -8. **Show calculations**: Spell out how derived values are computed +1. **Zero hard-coded values** — every number in the blog post comes from results.json via `{{}}` templates +2. **Every number is traceable** — `source_line` and `source_url` point to the exact code +3. **All computation via analysis.py** — never compute impacts inline or with ad-hoc code +4. **Validate against external estimates** — compare PE results to CBO/JCT/fiscal notes/think tanks +5. **Human reviews at every gate** — 3 explicit checkpoints, each requires approve/adjust/cancel +6. **Neutral language** — describe what policies do, not whether they are good or bad +7. **No iframes** — charts are static `` from GitHub Pages with descriptive alt text +8. **Draft PR = in_review** — content is NOT published until PR is merged --- -## Phase 1: Define the Reform +## Phase 0: Pre-Flight Check + +**BEFORE doing any research**, check if this analysis already exists: -1. **Parse the topic** — identify what policy change to analyze -2. **Ask clarifying questions** if needed: - - What specific parameters change? - - What is the baseline (current law, TCJA extension, etc.)? - - What year to analyze? - - US or UK? -3. **Identify the PE parameter paths** for the reform: - ```python - from policyengine.tax_benefit_models.{country} import {country}_latest - # Search parameter names matching the topic - ``` +1. Check if analysis directory already exists in analysis-notebooks repo +2. Check if a blog post with this topic exists in policyengine-app-v2 posts.json + +**If found with published results**: Show existing analysis, ask if re-computation needed. +**If not found**: Proceed with Phase 1. --- -## Phase 2: Create Analysis Directory +## Phase 1: Parallel Research + +Spawn two Task agents in parallel: -Create a directory in the analysis-notebooks repo (or a new repo if user prefers): +### 1a. Reform Definition ``` -{topic-slug}/ - analysis.py # Full simulation + results.json generation - results.json # Generated by analysis.py - charts/ # Generated by analysis.py - requirements.txt # policyengine, plotly, kaleido - README.md # How to reproduce +Task: Define the reform for "{TOPIC}" + +1. Identify what policy changes to analyze +2. Find the PE parameter paths for the reform +3. Confirm parameter paths exist in policyengine-us or policyengine-uk +4. Build the reform definition (parameter paths, values, effective dates) +5. Determine analysis type: microsimulation, household, or both + +Return: +- Reform parameter paths and values +- Analysis type +- Effective dates +- Any parameters that don't exist yet (blockers) +``` + +### 1b. External Estimate Finder + +``` +Task: Find external estimates for "{TOPIC}" + +Search for existing analyses of this reform: +- CBO/JCT scores (for federal bills) +- State fiscal notes (for state bills) +- Tax Foundation, ITEP, CBPP analyses +- Academic papers with revenue/distributional estimates +- Back-of-envelope calculation (ALWAYS required) + +For each estimate found, capture: +- Source name and URL +- Revenue/cost estimate +- Time period and methodology +- How comparable to PE's approach + +Return structured estimates for validation. ``` -Write requirements.txt: +Wait for both to complete, then combine results. + +--- + +## Checkpoint #1: Reform Definition Review + +Present the reform definition AND external estimates for human approval: + ``` -policyengine>=0.1.0 -plotly>=5.15.0 -kaleido>=0.2.1 +═══════════════════════════════════════════════════════════════════════════ +REFORM DEFINITION & EXTERNAL ESTIMATES REVIEW +═══════════════════════════════════════════════════════════════════════════ + +TOPIC: {topic} +COUNTRY: {country} +YEAR: {year} +ANALYSIS TYPE: {microsimulation / household / both} + +REFORM PARAMETERS: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Parameter │ Current │ Proposed │ +│──────────────────────────────│────────────│────────────────────────────│ +│ {parameter_path} │ {baseline} │ {reform} │ +└─────────────────────────────────────────────────────────────────────────┘ + +EXTERNAL ESTIMATES: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Source │ Estimate │ Period │ Link │ +│──────────────────────│───────────────│───────────│─────────────────────│ +│ CBO/JCT │ -$15.2B │ Annual │ [link] │ +│ Tax Foundation │ -$14.8B │ Annual │ [link] │ +│ Back-of-envelope │ -$16.0B │ Annual │ (see calculation) │ +└─────────────────────────────────────────────────────────────────────────┘ + +BACK-OF-ENVELOPE CHECK: +> {Simple calculation showing expected order of magnitude} +> Example: 15M itemizers × avg $12k SALT deduction × 24% avg rate = ~$43B +> (Rough estimate — actual varies due to AMT interaction and cap level) + +═══════════════════════════════════════════════════════════════════════════ ``` +Use `AskUserQuestion` to confirm: +- Does this reform definition look correct? +- Are the external estimates reasonable comparisons? +- Options: **Yes, proceed** / **No, adjust** / **Cancel** + +**Do NOT proceed until the user explicitly approves.** + --- -## Phase 3: Spawn Specialist Agents +## Phase 2: Run Analysis + +### 2a. Create Analysis Directory -Spawn agents sequentially — each phase depends on the previous. +Create a directory in the analysis-notebooks repo: -### Agent 1: Analysis Writer +``` +{topic-slug}/ + analysis.py # Full simulation + results.json generation + results.json # Generated by analysis.py + charts/ # Generated PNGs + requirements.txt # policyengine, plotly, kaleido + README.md # How to reproduce +``` + +### 2b. Spawn analysis-writer Agent -Invoke **analysis-writer** agent: ``` -Write analysis.py for the following reform: +Task: analysis-writer + +Write and run analysis.py for the following reform: -- Reform: {parsed reform description} -- Country: {us/uk} +- Reform: {approved reform definition from Checkpoint #1} +- Country: {country} - Year: {year} -- Parameter paths: {identified paths from Phase 1} +- Parameter paths: {approved parameter paths} - Analysis type: {microsimulation / household / both} - Output directory: {topic-slug}/ - Repo slug: PolicyEngine/{repo-name} -Follow the instructions in agents/content/analysis-writer.md. -Use policyengine.results.tracked_value() for every value. -Use policyengine.results.ResultsJson to validate before writing. -Use policyengine.utils.plotting.format_fig() for chart styling. +CRITICAL: Use tracked_value() for every value. Use ResultsJson to validate. +Use format_fig() for chart styling. ALL computation in analysis.py — no inline. ``` -**Verify before proceeding:** -- `results.json` exists and is valid JSON -- All values have `source_line` and `source_url` -- `charts/*.png` files exist -- Source URLs point to real line numbers +### 2c. Chart Sanity Check + +After analysis.py completes, generate a household-level earnings sweep chart to verify the reform's shape: + +**Quick sanity check**: Does the benefit curve match the reform's intent? +- Tax rate cut → linearly increasing benefit with income +- CTC expansion → flat benefit up to income limit, then phase-out +- EITC expansion → triangle shape (phase-in, plateau, phase-out) +- SALT cap change → benefit concentrated at high incomes +- UBI → flat benefit, then clawed back via taxes -### Agent 2: Blog Writer +**If the chart looks wrong**: Investigate before proceeding — likely a parameter mapping error. -Invoke **blog-writer** agent: +--- + +## Checkpoint #2: Results Validation + +Compare PE results against external estimates. This is the most important validation step. + +``` +═══════════════════════════════════════════════════════════════════════════ +RESULTS VALIDATION +═══════════════════════════════════════════════════════════════════════════ + +PE RESULTS: + Budget impact: {budget_impact} + Poverty change: {poverty_change} + Winners: {winners_pct} + Losers: {losers_pct} + Top decile avg: {top_decile_avg} + Bottom decile avg: {bottom_decile_avg} + +CHART SANITY CHECK: + Household sweep shape: {matches intent? describe} + +VALIDATION — PE vs EXTERNAL: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Source │ Estimate │ vs PE │ Difference │ Verdict │ +│──────────────────────│───────────│────────────│────────────│───────────│ +│ PE (PolicyEngine) │ -$14.1B │ — │ — │ — │ +│ CBO/JCT │ -$15.2B │ -7.2% │ < 10% │ Excellent │ +│ Tax Foundation │ -$14.8B │ -4.7% │ < 10% │ Excellent │ +│ Back-of-envelope │ -$16.0B │ -11.9% │ 10-25% │ Acceptable│ +└─────────────────────────────────────────────────────────────────────────┘ + +THRESHOLDS: + < 10% → Excellent match + 10-25% → Acceptable (note methodology differences) + 25-50% → Review needed (re-check parameters) + > 50% → Likely error (stop and investigate) + +DISCREPANCY EXPLANATION: + {1-2 sentences explaining likely sources of difference — e.g., PE uses + Enhanced CPS microdata vs CBO's proprietary tax model, static vs dynamic + scoring, different base year assumptions} + +═══════════════════════════════════════════════════════════════════════════ ``` + +Use `AskUserQuestion`: +- Results look correct? External comparison acceptable? +- Options: **Yes, proceed to blog post** / **Re-run with adjusted parameters** / **Cancel** + +**If difference > 50%**: Do NOT offer "proceed" option. Force investigation. + +**Do NOT proceed until the user explicitly approves.** + +--- + +## Phase 3: Write Blog Post + +Spawn blog-writer agent: + +``` +Task: blog-writer + Write a blog post for the following analysis: - results.json path: {topic-slug}/results.json -- Reform description: {parsed reform description} -- Country: {us/uk} +- Reform description: {approved reform description} +- Country: {country} - Output path: {topic-slug}/post.md - -Follow the instructions in agents/content/blog-writer.md. -Every number must be a {{}} reference — zero hard-coded values. -Use neutral tone, active voice, sentence case headings. +- External estimates: {sources from Checkpoint #1 — for methodology section} + +RULES: +- Every number must be a {{}} reference — zero hard-coded values +- Neutral tone, active voice, sentence case headings +- Methodology section must cite PE model version, dataset, and year +- Methodology section must note comparison to external estimates +- Include link to analysis repo code ``` -**Verify before proceeding:** -- Every `{{name}}` matches a key in results.json -- No raw numbers outside `{{}}` -- Methodology section exists with repo link +--- -### Agent 3: Pipeline Validator +## Phase 4: Automated Validation + +Spawn pipeline-validator agent: -Invoke **pipeline-validator** agent: ``` +Task: pipeline-validator + Validate the full pipeline output: - results.json path: {topic-slug}/results.json - Blog post path: {topic-slug}/post.md - Charts directory: {topic-slug}/charts/ -Follow the instructions in agents/content/pipeline-validator.md. Run all 9 checks and produce the validation report. ``` -**If validator reports failures:** -- Schema/reference errors are **blockers** — fix before proceeding -- Language/style issues are **warnings** — fix if possible, note if not +### Validation Checks (9 automated) ---- - -## Phase 4: Create Posts Entry - -Add an entry to `posts.json` in policyengine-app-v2: - -```json -{ - "title": "...", - "description": "...", - "date": "YYYY-MM-DD", - "tags": ["{country}", "policy"], - "authors": ["..."], - "filename": "{topic-slug}.md", - "image": "{topic-slug}.png", - "analysis_repo": "PolicyEngine/{repo-name}" -} -``` +| # | Check | Blocker? | +|---|-------|----------| +| 1 | results.json schema (source_line, source_url, alt text, row widths) | Yes | +| 2 | Template references (every `{{}}` resolves, no orphans) | Yes | +| 3 | No hard-coded numbers (no raw `$` or `%` outside `{{}}`) | Yes | +| 4 | Neutral language (no value judgments) | Warning | +| 5 | Active voice (no passive constructions) | Warning | +| 6 | Sentence case headings | Warning | +| 7 | Chart accessibility (alt text with chart type + 2-3 data points) | Yes | +| 8 | Source traceability (source_url contains repo, ends with #L{line}) | Yes | +| 9 | Post structure (H1 title, key findings, methodology, repo link) | Yes | -The `analysis_repo` field triggers the resolve-posts build step. +**Blockers must pass before proceeding. Warnings should be fixed but don't block.** --- -## Phase 5: Generate Social Content +## Checkpoint #3: Final Review Before PR -Use the content-generation skill to create: +Present the complete post + validation report for human approval: -1. **Social sharing image** (1200x630) using the social-image template -2. **Twitter/X post** — key finding + image + link -3. **LinkedIn post** — more context, professional tone - -Social copy must follow the same neutral tone as the blog post: - -**Correct:** ``` -Repealing the SALT cap would cost $15.2 billion in 2026. -The top income decile receives 42% of total benefits. - -Full analysis: [link] +═══════════════════════════════════════════════════════════════════════════ +FINAL REVIEW BEFORE PR +═══════════════════════════════════════════════════════════════════════════ + +VALIDATION REPORT: + results.json schema: ✅ + Template references: ✅ (14 resolved, 0 missing, 0 orphaned) + Hard-coded numbers: ✅ (0 found) + Neutral language: ✅ (0 issues) + Active voice: ✅ (0 passive) + Sentence case: ✅ + Chart accessibility: ✅ (3 charts checked) + Source traceability: ✅ (14 values checked) + Post structure: ✅ + + Result: 9/9 checks passed. Ready for PR. + +EXTERNAL VALIDATION: + PE vs CBO/JCT: -7.2% (Excellent) + PE vs Tax Foundation: -4.7% (Excellent) + PE vs back-of-envelope: -11.9% (Acceptable) + +POST SUMMARY: + Title: {title} + Key findings: {3 bullet points} + Charts: {N} charts with alt text + Tables: {N} tables + Values: {N} traceable values + Word count: {N} + +═══════════════════════════════════════════════════════════════════════════ ``` -**Wrong:** -``` -BREAKING: SALT cap repeal is a massive giveaway to the wealthy! -This shocking analysis reveals who really benefits. -``` +Use `AskUserQuestion`: +- Ready to create PR? +- Options: **Yes, create draft PR** / **No, needs edits** / **Cancel** + +**Do NOT proceed until the user explicitly approves.** --- -## Phase 6: Create PRs +## Phase 5: Create PR + Distribute + +### 5a. Create Analysis PR -### Analysis repo ```bash -cd {topic-slug} +cd {analysis-directory} git add . git commit -m "Add {topic} analysis with results.json and charts" git push origin main ``` -### Blog post PR (policyengine-app-v2) -Use `/create-pr` command for proper PR creation with CI check waiting. +### 5b. Create Blog Post PR ---- +Create a draft PR in policyengine-app-v2 that adds: +1. Blog post markdown in `articles/` +2. posts.json entry with `analysis_repo` field + +PR body must include: + +```markdown +## Blog Post: {title} + +**Analysis repo**: [PolicyEngine/{repo}](https://github.com/PolicyEngine/{repo}) -## Phase 7: Verify +### Reform +| Parameter | Current | Proposed | +|-----------|---------|----------| +| {param} | {base} | {reform} | -Before marking as done, run through this checklist: +### External validation +| Source | Estimate | vs PE | Verdict | +|--------|----------|-------|---------| +| PE (PolicyEngine) | {pe_estimate} | — | — | +| {source} | {estimate} | {diff}% | {verdict} | +| Back-of-envelope | {estimate} | {diff}% | {verdict} | -| Check | How to verify | -|-------|---------------| -| All `{{}}` refs resolve | Search markdown for `{{` — each must match a key in results.json | -| Charts load | curl each GitHub Pages chart URL — expect 200 | -| Alt text is descriptive | Each alt starts with chart type and includes 2-3 data points | -| No hard-coded numbers | Search markdown for raw digits — each should be inside `{{}}` | -| Neutral language | No "unfortunately", "significant", "massive", "dramatic" | -| Active voice | No "is reduced by", "are projected by" | -| Sentence case headings | No title case in H2/H3 headers | -| Source links work | `source_url` values return 200, point to correct lines | -| Methodology section | Specifies model version, dataset, year, and assumptions | +### Key results +| Metric | Value | +|--------|-------| +| Budget impact | {budget_impact} | +| Poverty change | {poverty_change} | +| Winners | {winners_pct} | + +### Validation +Pipeline validator: {N}/9 checks passed. --- +*Generated by `/publish-analysis` — PolicyEngine Claude Plugin* +``` + +**The blog post is NOT published until the PR is merged.** The resolve-posts build step runs on deploy, fetches results.json, and resolves all `{{}}` templates. -## Phase 8: Distribution Checklist +### 5c. Distribution Checklist After merge and deploy: @@ -249,23 +507,90 @@ After merge and deploy: - [ ] Post to LinkedIn with key finding + image - [ ] Send to newsletter list (if applicable) - [ ] Direct outreach to bill sponsors (if bill analysis) -- [ ] Pitch to relevant reporters +- [ ] Pitch to relevant journalists - [ ] Log in CRM - [ ] Confirm GA4 events firing --- +## Final Output + +``` +═══════════════════════════════════════════════════════════════════════════ +COMPLETE: {TOPIC} +═══════════════════════════════════════════════════════════════════════════ + +ANALYSIS: + ✓ analysis.py written and executed + ✓ results.json validated (Pydantic schema) + ✓ {N} charts generated with alt text + ✓ {N} values with source line tracking + +VALIDATION: + ✓ Pipeline validator: 9/9 checks passed + ✓ PE vs external: {best_match}% ({verdict}) + ✓ Chart sanity check: shape matches intent + ✓ Human approved at 3 checkpoints + +PRs: + Analysis: {analysis_pr_url} + Blog post: {blog_pr_url} + +NEXT STEPS: + 1. Review both PRs + 2. Merge blog post PR to publish + 3. Run distribution checklist + +═══════════════════════════════════════════════════════════════════════════ +``` + +--- + ## Error Handling | Problem | Cause | Fix | |---------|-------|-----| | Dataset not found | HDF5 file not available locally | Download from HuggingFace: `hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5` | -| Memory issues | Microsimulation loads ~60k households | Ensure 8GB+ RAM available. Use `simulation.ensure()` for caching | -| Chart generation fails | kaleido not installed | `pip install kaleido` or note in results.json that charts need manual generation | +| Memory issues | Microsimulation loads ~60k households | Ensure 8GB+ RAM available | +| PE vs external > 50% | Parameter mapping error or methodological mismatch | **Stop.** Re-check parameter paths, compare baseline assumptions, verify reform encoding | +| PE vs external 25-50% | Moderate discrepancy | Note in methodology section. Check for known differences (static vs dynamic, different base year) | +| Chart shape wrong | Parameter mapping error | Return to Checkpoint #1, fix parameters, re-run | | Unresolvable `{{ref}}` | Key mismatch between markdown and results.json | Fix spelling or add missing key to results.json | -| Stale source lines | Code changed after generating results.json | Re-run analysis.py to regenerate results.json | -| Validator fails | Schema or reference errors | Fix blockers before proceeding; warnings can be noted | +| Stale source lines | Code changed after generating results.json | Re-run analysis.py to regenerate | +| Validator blockers | Schema or reference errors | Fix before proceeding — do NOT skip | + +--- + +## Key Principle: All Computation via analysis.py + +**NEVER compute impacts inline or with ad-hoc code.** All computation goes through analysis.py because: + +1. **Reproducibility** — anyone can re-run the same script +2. **Auditability** — every value traceable to a specific line +3. **Schema consistency** — ResultsJson validates output +4. **Source tracking** — tracked_value() captures line numbers automatically + +The agents research and generate the reform definition. analysis.py does computation. The blog post is a presentation layer only. + +--- + +## Agents Used + +| Agent | Purpose | Phase | +|-------|---------|-------| +| analysis-writer | Write and run analysis.py, produce results.json | 2 | +| blog-writer | Write blog post with {{}} template refs | 3 | +| pipeline-validator | 9 automated checks on schema, refs, language | 4 | + +## Scripts & Tools + +| Tool | Purpose | +|------|---------| +| `policyengine.py` | Local microsimulation (not API) | +| `policyengine.results.tracked_value()` | Auto-capture source line numbers | +| `policyengine.results.ResultsJson` | Pydantic schema validation | +| `policyengine.utils.plotting.format_fig()` | PE brand chart styling | --- -Start by parsing the topic, then proceed through all phases. +Start by checking for existing analysis (Phase 0), then proceed through all phases. Never skip a checkpoint.