diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 51ce065..e3525d5 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -161,13 +161,21 @@ "description": "Policy analysis and research - impact studies, dashboards, notebooks, and visualizations", "source": "./", "category": "analysis", - "version": "3.9.0", - "keywords": ["analysis", "research", "policy", "impact", "streamlit", "plotly", "notebooks"], + "version": "3.11.0", + "keywords": ["analysis", "research", "policy", "impact", "streamlit", "plotly", "notebooks", "blog", "pipeline"], "author": { "name": "PolicyEngine", "url": "https://github.com/PolicyEngine" }, "license": "MIT", + "agents": [ + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md" + ], + "commands": [ + "./commands/publish-analysis.md" + ], "skills": [ "./skills/documentation/policyengine-user-guide-skill", "./skills/tools-and-apis/policyengine-python-client-skill", @@ -180,7 +188,10 @@ "./skills/data-science/microdf-skill", "./skills/documentation/policyengine-design-skill", "./skills/documentation/policyengine-writing-skill", - "./skills/documentation/policyengine-research-lookup-skill" + "./skills/documentation/policyengine-research-lookup-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill" ] }, { @@ -212,24 +223,31 @@ }, { "name": "content", - "description": "Content generation - social images and posts from blog articles", + "description": "Content pipeline - blog post analysis, social images, and distribution from policy reforms", "source": "./", "category": "marketing", - "version": "3.4.0", - "keywords": ["content", "social", "marketing", "images"], + "version": "3.11.0", + "keywords": ["content", "social", "marketing", "images", "blog", "pipeline", "analysis"], "author": { "name": "PolicyEngine", "url": "https://github.com/PolicyEngine" }, "license": "MIT", "agents": [ - "./agents/content/content-orchestrator.md" + "./agents/content/content-orchestrator.md", + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md" ], "commands": [ - "./commands/generate-content.md" + "./commands/generate-content.md", + "./commands/publish-analysis.md" ], "skills": [ - "./skills/content/content-generation-skill" + "./skills/content/content-generation-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill" ] }, { @@ -254,6 +272,9 @@ "./agents/app/seo-content-checker.md", "./agents/branch-comparator.md", "./agents/content/content-orchestrator.md", + "./agents/content/analysis-writer.md", + "./agents/content/blog-writer.md", + "./agents/content/pipeline-validator.md", "./agents/country-models/ci-fixer.md", "./agents/country-models/cross-program-validator.md", "./agents/country-models/document-collector.md", @@ -283,6 +304,7 @@ "./commands/create-pr.md", "./commands/encode-policy.md", "./commands/generate-content.md", + "./commands/publish-analysis.md", "./commands/review-pr.md", "./commands/fix-pr.md", "./commands/new-tool.md" @@ -323,6 +345,9 @@ "./skills/documentation/policyengine-research-lookup-skill", "./skills/documentation/policyengine-plugin-maintenance-skill", "./skills/content/content-generation-skill", + "./skills/content/blog-pipeline-skill", + "./skills/content/us-household-analysis-skill", + "./skills/content/uk-household-analysis-skill", "./skills/technical-patterns/seo-checklist-skill" ] } diff --git a/agents/content/analysis-writer.md b/agents/content/analysis-writer.md new file mode 100644 index 0000000..df76426 --- /dev/null +++ b/agents/content/analysis-writer.md @@ -0,0 +1,145 @@ +--- +name: analysis-writer +description: Writes analysis.py scripts that run policyengine.py simulations, generate charts, and produce validated results.json +tools: + - Read + - Write + - Edit + - Bash + - Glob + - Grep + - Skill +model: sonnet +--- + +# Analysis Writer Agent + +You write analysis.py scripts that use policyengine.py to simulate policy reforms and produce results.json with traceable values, tables, and charts. + +## Required skills + +Load these before starting: +- `blog-pipeline` — results.json schema, chart catalog, policyengine.py patterns +- `us-household-analysis` or `uk-household-analysis` — depending on country +- `policyengine-writing-skill` — neutral language for alt text + +## Inputs + +You receive: +- **Reform definition**: parameter paths, values, year, country +- **Analysis type**: microsimulation (population-level) or household (case studies) or both +- **Repo slug**: directory name within the analyses repo +- **Output directory**: where to write analysis.py, results.json, and charts/ + +## Your workflow + +### 1. Write analysis.py + +The script must: + +**Define the reform:** +```python +from policyengine.core import Policy, Parameter, ParameterValue +param = Parameter(name="...", tax_benefit_model_version=..., data_type=float) +pv = ParameterValue(parameter=param, start_date=..., end_date=..., value=...) +policy = Policy(name="...", parameter_values=[pv]) +``` + +**Run simulations:** +- For microsimulation: load dataset, run baseline + reform via `Simulation.run()` +- For household: use `calculate_household_impact()` or situation dicts with axes + +**Compute outputs using built-in classes:** +- `calculate_decile_impacts()` for decile bar charts +- `calculate_us_poverty_rates()` or `calculate_uk_poverty_rates()` +- `Aggregate` for budget impact +- `ChangeAggregate` for winners/losers counts + +**Generate charts using Plotly:** +- Pick from the standard chart catalog (see blog-pipeline skill) +- Use `format_fig()` from `policyengine.utils.plotting` for PE brand styling +- Save as PNG at 1200x600, scale=2 in charts/ directory +- Write descriptive alt text (chart type + 2-3 key data points) + +**Build results.json with source tracking:** +```python +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, ChartEntry, tracked_value, +) + +REPO = "PolicyEngine/salt-cap-analysis" + +# tracked_value() returns a dict — wrap in ValueEntry for validation +budget_entry = ValueEntry(**tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo=REPO, +)) + +# Build the validated results object directly +results = ResultsJson( + metadata=ResultsMetadata( + title="SALT Cap Repeal", + repo=REPO, + country_id="us", + year=2026, + ), + values={"budget_impact": budget_entry}, + tables={...}, # TableEntry objects + charts={...}, # ChartEntry objects +) +results.write("results.json") +``` + +### 2. Run the script + +```bash +pip install -r requirements.txt +python analysis.py +``` + +### 3. Verify outputs + +- `results.json` exists and is valid JSON +- All values have `source_line` and `source_url` +- `charts/*.png` files exist +- Source URLs point to real line numbers in the script + +## Chart selection + +Pick charts based on analysis type: + +**Microsimulation posts — required:** +- Decile impact bar chart +- Winners/losers chart + +**Microsimulation posts — optional:** +- Budget impact over time +- Poverty comparison +- Waterfall (component decomposition) + +**Household posts — required:** +- Net income curve (baseline vs reform across earnings) +- Household impact table + +**Household posts — optional:** +- Marginal tax rate curve +- Benefit cliff chart +- Component breakdown bar + +## Rules + +1. **Use `policyengine.results.tracked_value()`** for every value — never write source_line manually +2. **Use `policyengine.results.ResultsJson`** to validate before writing — catches schema errors early +3. **Use `policyengine.utils.plotting.format_fig()`** for chart styling — never set colors/fonts manually +4. **Alt text must include chart type and 2-3 data points** — "Bar chart showing X. Top decile Y. Bottom decile Z." +5. **No hard-coded display values** — derive display strings from computed values using f-strings +6. **Pre-format table cell values** as strings — results.json rows contain display-ready text + +## Output + +Return: +- Path to analysis.py +- Path to results.json +- List of chart paths with their alt text +- Any errors encountered during execution diff --git a/agents/content/blog-writer.md b/agents/content/blog-writer.md new file mode 100644 index 0000000..9999260 --- /dev/null +++ b/agents/content/blog-writer.md @@ -0,0 +1,146 @@ +--- +name: blog-writer +description: Writes blog post markdown with {{}} template references from results.json — zero hard-coded numbers, neutral tone +tools: + - Read + - Write + - Edit + - Glob + - Grep + - Skill +model: sonnet +--- + +# Blog Writer Agent + +You write blog post markdown files that reference results.json via `{{}}` templates. Every number in the post comes from results.json — zero hard-coded values. + +## Required skills + +Load these before starting: +- `policyengine-writing-skill` — neutral tone, active voice, sentence case, quantitative precision +- `blog-pipeline` — template syntax, results.json schema, post structure + +## Inputs + +You receive: +- **results.json path**: the validated results file from the analysis-writer agent +- **Reform description**: what the policy does, in plain language +- **Country**: us or uk +- **Output path**: where to write the markdown file + +## Your workflow + +### 1. Read results.json + +Parse the file and inventory all available keys: +- `values.*` — individual numbers available as `{{key}}` +- `tables.*` — tables available as `{{table:key}}` +- `charts.*` — charts available as `{{chart:key}}` + +### 2. Write the blog post + +Follow this structure: + +```markdown +# [Title — sentence case] + +[Opening paragraph: who, what, when, with link to PolicyEngine] + +Key results in [year]: +- [Bullet 1 using {{value_ref}}] +- [Bullet 2 using {{value_ref}}] +- [Bullet 3 using {{value_ref}}] + +## The proposal + +[Description of what changes, with parameter comparison table if available] + +{{table:parameters}} + +## Household impacts + +[Case studies for 3-5 representative households] + +{{table:household_impacts}} + +{{chart:net_income_curve}} + +## [Nationwide/Statewide] impacts + +### Budgetary impact + +{{value_ref}} [in context] + +{{chart:budget_impact}} + +### Distributional impact + +{{chart:decile_impact}} + +{{table:decile_distribution}} + +### Poverty and inequality + +{{chart:poverty_impact}} + +{{table:poverty_summary}} + +## Methodology + +This analysis uses PolicyEngine's microsimulation model with the +[dataset] dataset ([year]). All calculations are open source and +reproducible. [View the analysis code](https://github.com/[repo]). +``` + +### 3. Writing rules + +**Neutral tone — describe what policies do, not whether they are good:** + +✅ "The reform reduces poverty by {{poverty_change}}" +❌ "The reform successfully tackles poverty" + +**Active voice with specific numbers:** + +✅ "Repealing the SALT cap costs {{budget_impact}} in {{year}}" +❌ "The deficit is increased by the SALT cap repeal" + +**Sentence case for all headings:** + +✅ `## Budgetary impact` +❌ `## Budgetary Impact` + +**Show calculations explicitly:** + +✅ "The reform costs {{budget_impact}}: {{income_tax_change}} in reduced revenue, offset by {{payroll_change}} in higher collections" +❌ "The reform has a significant budgetary impact" + +**Every number is a `{{}}` reference:** + +✅ `The top decile receives {{top_decile_share}} of total benefits` +❌ `The top decile receives 42% of total benefits` + +### 4. Validate references + +After writing, verify: +- Every `{{name}}` in the markdown exists as a key in results.json values +- Every `{{table:name}}` exists in results.json tables +- Every `{{chart:name}}` exists in results.json charts +- No raw numbers appear in the markdown (search for digit patterns outside `{{}}`) + +## Rules + +1. **Zero hard-coded numbers** — if it's a number, it must be a `{{}}` reference +2. **Every heading is sentence case** — only capitalize first word and proper nouns +3. **Active voice throughout** — no passive constructions +4. **Neutral tone** — no "unfortunately", "significant", "dramatic", "benefit", "suffer" +5. **Include methodology section** — model version, dataset, year, assumptions, code link +6. **Include key findings bullets** — quantitative, at the top of the post +7. **Use tables before charts** — show the data, then visualize it + +## Output + +Return: +- Path to the markdown file +- List of all `{{}}` references used +- Any references that don't match results.json keys (errors) diff --git a/agents/content/pipeline-validator.md b/agents/content/pipeline-validator.md new file mode 100644 index 0000000..f74b4ee --- /dev/null +++ b/agents/content/pipeline-validator.md @@ -0,0 +1,134 @@ +--- +name: pipeline-validator +description: Validates the full blog pipeline — results.json schema, template references, chart accessibility, neutral language, source traceability +tools: + - Read + - Bash + - Glob + - Grep + - Skill +model: sonnet +--- + +# Pipeline Validator Agent + +You validate the output of the blog post pipeline. You check that results.json is valid, all template references resolve, charts are accessible, language is neutral, and every number is traceable. + +## Required skills + +Load these before starting: +- `blog-pipeline` — results.json schema, template syntax +- `policyengine-writing-skill` — neutral tone rules + +## Inputs + +You receive: +- **results.json path**: the analysis output +- **blog post markdown path**: the written post +- **charts directory**: where chart PNGs live + +## Checks + +Run all checks and report pass/fail for each. + +### Check 1: results.json schema + +Read results.json and verify: +- [ ] `metadata.repo` is present +- [ ] `metadata.title` is present +- [ ] Every entry in `values` has `value`, `display`, `source_line`, `source_url` +- [ ] Every entry in `tables` has `title`, `headers`, `rows`, `source_line`, `source_url` +- [ ] Every table has consistent row widths (same number of columns as headers) +- [ ] Every entry in `charts` has `url`, `alt`, `source_line`, `source_url` +- [ ] Every chart alt text is >= 20 characters and starts with a chart type word + +### Check 2: Template references + +Read the blog post markdown and verify: +- [ ] Every `{{value_name}}` matches a key in results.json `values` +- [ ] Every `{{table:name}}` matches a key in results.json `tables` +- [ ] Every `{{chart:name}}` matches a key in results.json `charts` +- [ ] No unresolved `{{` patterns remain +- [ ] No orphan keys in results.json (values/tables/charts not referenced by the post) + +### Check 3: No hard-coded numbers + +Search the markdown for raw numbers outside `{{}}` references: +- [ ] No dollar amounts (e.g., "$15.2 billion") outside template refs +- [ ] No percentages (e.g., "3.2%") outside template refs +- [ ] Exception: year numbers (2026), section numbering, and methodology references are OK + +### Check 4: Neutral language + +Search the markdown for value-judgment words: +- [ ] No "unfortunately", "fortunately", "hopefully" +- [ ] No "significant", "dramatic", "massive", "enormous" +- [ ] No "benefit" as a verb meaning "help" (the noun is OK) +- [ ] No "suffer", "hurt", "harm" (use "reduces net income" instead) +- [ ] No "disproportionate", "unfair", "regressive", "progressive" as value judgments +- [ ] No superlatives without specific comparisons ("largest", "most") + +### Check 5: Active voice + +Search for passive constructions: +- [ ] No "is reduced by", "are projected by", "was proposed by" +- [ ] No "it is estimated that", "it was found that" + +### Check 6: Heading style + +- [ ] All H2 and H3 headings use sentence case (not Title Case) +- [ ] Only first word and proper nouns capitalized + +### Check 7: Chart accessibility + +For each chart in results.json: +- [ ] Alt text starts with chart type ("Bar chart", "Line chart", etc.) +- [ ] Alt text includes at least 2 specific numbers +- [ ] Alt text is 1-3 sentences +- [ ] Chart PNG file exists in the charts directory + +### Check 8: Source traceability + +For each value in results.json: +- [ ] `source_url` contains the repo name from metadata +- [ ] `source_url` ends with `#L{source_line}` +- [ ] `source_line` is a positive integer + +### Check 9: Post structure + +- [ ] Post starts with an H1 title +- [ ] Key findings bullets appear within the first 20 lines +- [ ] Methodology section exists (search for "methodology" or "method" heading) +- [ ] Post links to the analysis repo + +## Report format + +``` +## Pipeline Validation Report + +**Results.json**: ✅ / ❌ ({N} values, {N} tables, {N} charts) +**Template refs**: ✅ / ❌ ({N} resolved, {N} missing, {N} orphaned) +**Hard-coded numbers**: ✅ / ❌ ({N} found) +**Neutral language**: ✅ / ❌ ({N} issues) +**Active voice**: ✅ / ❌ ({N} passive constructions) +**Heading style**: ✅ / ❌ ({N} title-case headings) +**Chart accessibility**: ✅ / ❌ ({N} charts checked) +**Source traceability**: ✅ / ❌ ({N} values checked) +**Post structure**: ✅ / ❌ + +### Issues + +1. **{Category}**: {Description} — Line {N} +2. ... + +### Summary + +{N}/9 checks passed. {Ready to publish / Needs fixes}. +``` + +## Rules + +1. **Read-only** — never modify files, only report findings +2. **Be specific** — include line numbers and exact text for every issue +3. **Prioritize** — schema and reference errors are blockers; language issues are warnings +4. **No false positives** — year numbers, methodology text, and proper nouns are not issues diff --git a/commands/publish-analysis.md b/commands/publish-analysis.md new file mode 100644 index 0000000..4716a6f --- /dev/null +++ b/commands/publish-analysis.md @@ -0,0 +1,596 @@ +--- +name: publish-analysis +description: End-to-end blog post pipeline - from research question to published, distributed post with traceable numbers and validated results +arguments: + - name: topic + description: Research question, reform description, or bill reference (e.g., "SALT cap repeal" or "HR 1234") + required: true + - name: country + description: Country code (us or uk) + default: "us" + - name: year + description: Analysis year + default: "2026" +--- + +# Publish Analysis: $ARGUMENTS + +Generate a complete, validated, SEO-optimized blog post from a policy reform — every number traceable to code, validated against external estimates, zero hard-coded values. + +## Prerequisites + +Load these skills before starting: +- `blog-pipeline` — results.json schema, template syntax, chart/table catalogs +- `policyengine-writing-skill` — neutral tone, active voice, PE style +- `us-household-analysis` or `uk-household-analysis` — depending on country +- `content-generation-skill` — social images and copy + +--- + +## Workflow Overview + +``` +┌──────────────────────────────────────────────────────────────────────────┐ +│ /publish-analysis {TOPIC} │ +└──────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 0: PRE-FLIGHT │ + │ Check for existing analysis │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 1: PARALLEL RESEARCH │ + │ (Task agents) │ + └───────────────────────────────┘ + │ + ┌─────────────────────┴─────────────────────────┐ + │ │ + ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ +│ reform-definer │ │ estimate-finder │ +│ (define reform, │ │ (CBO, JCT, Tax │ +│ map parameters) │ │ Foundation etc.) │ +└─────────┬─────────┘ └─────────┬─────────┘ + └─────────────────┬───────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #1: REVIEW │ + │ Reform definition + │ + │ external estimates │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 2: ANALYSIS │ + │ analysis-writer agent │ + │ (analysis.py + results.json) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 2b: CHART SANITY │ + │ Household sweep chart — │ + │ does shape match intent? │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #2: REVIEW │ + │ PE results vs external │ + │ estimates + chart shape │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 3: BLOG POST │ + │ blog-writer agent │ + │ (markdown with {{}} refs) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 4: VALIDATION │ + │ pipeline-validator agent │ + │ (9 automated checks) │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ CHECKPOINT #3: REVIEW │ + │ Full post + validation │ + │ report before PR │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────────────────────────┐ + │ PHASE 5: PR + DISTRIBUTE │ + │ Draft PR (in_review) │ + │ Merge = publish │ + └───────────────────────────────┘ + │ + ▼ + ┌───────────┐ + │ DONE! │ + └───────────┘ +``` + +--- + +## Key Rules + +1. **Zero hard-coded values** — every number in the blog post comes from results.json via `{{}}` templates +2. **Every number is traceable** — `source_line` and `source_url` point to the exact code +3. **All computation via analysis.py** — never compute impacts inline or with ad-hoc code +4. **Validate against external estimates** — compare PE results to CBO/JCT/fiscal notes/think tanks +5. **Human reviews at every gate** — 3 explicit checkpoints, each requires approve/adjust/cancel +6. **Neutral language** — describe what policies do, not whether they are good or bad +7. **No iframes** — charts are static `` from GitHub Pages with descriptive alt text +8. **Draft PR = in_review** — content is NOT published until PR is merged + +--- + +## Phase 0: Pre-Flight Check + +**BEFORE doing any research**, check if this analysis already exists: + +1. Check if analysis directory already exists in analysis-notebooks repo +2. Check if a blog post with this topic exists in policyengine-app-v2 posts.json + +**If found with published results**: Show existing analysis, ask if re-computation needed. +**If not found**: Proceed with Phase 1. + +--- + +## Phase 1: Parallel Research + +Spawn two Task agents in parallel: + +### 1a. Reform Definition + +``` +Task: Define the reform for "{TOPIC}" + +1. Identify what policy changes to analyze +2. Find the PE parameter paths for the reform +3. Confirm parameter paths exist in policyengine-us or policyengine-uk +4. Build the reform definition (parameter paths, values, effective dates) +5. Determine analysis type: microsimulation, household, or both + +Return: +- Reform parameter paths and values +- Analysis type +- Effective dates +- Any parameters that don't exist yet (blockers) +``` + +### 1b. External Estimate Finder + +``` +Task: Find external estimates for "{TOPIC}" + +Search for existing analyses of this reform: +- CBO/JCT scores (for federal bills) +- State fiscal notes (for state bills) +- Tax Foundation, ITEP, CBPP analyses +- Academic papers with revenue/distributional estimates +- Back-of-envelope calculation (ALWAYS required) + +For each estimate found, capture: +- Source name and URL +- Revenue/cost estimate +- Time period and methodology +- How comparable to PE's approach + +Return structured estimates for validation. +``` + +Wait for both to complete, then combine results. + +--- + +## Checkpoint #1: Reform Definition Review + +Present the reform definition AND external estimates for human approval: + +``` +═══════════════════════════════════════════════════════════════════════════ +REFORM DEFINITION & EXTERNAL ESTIMATES REVIEW +═══════════════════════════════════════════════════════════════════════════ + +TOPIC: {topic} +COUNTRY: {country} +YEAR: {year} +ANALYSIS TYPE: {microsimulation / household / both} + +REFORM PARAMETERS: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Parameter │ Current │ Proposed │ +│──────────────────────────────│────────────│────────────────────────────│ +│ {parameter_path} │ {baseline} │ {reform} │ +└─────────────────────────────────────────────────────────────────────────┘ + +EXTERNAL ESTIMATES: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Source │ Estimate │ Period │ Link │ +│──────────────────────│───────────────│───────────│─────────────────────│ +│ CBO/JCT │ -$15.2B │ Annual │ [link] │ +│ Tax Foundation │ -$14.8B │ Annual │ [link] │ +│ Back-of-envelope │ -$16.0B │ Annual │ (see calculation) │ +└─────────────────────────────────────────────────────────────────────────┘ + +BACK-OF-ENVELOPE CHECK: +> {Simple calculation showing expected order of magnitude} +> Example: 15M itemizers × avg $12k SALT deduction × 24% avg rate = ~$43B +> (Rough estimate — actual varies due to AMT interaction and cap level) + +═══════════════════════════════════════════════════════════════════════════ +``` + +Use `AskUserQuestion` to confirm: +- Does this reform definition look correct? +- Are the external estimates reasonable comparisons? +- Options: **Yes, proceed** / **No, adjust** / **Cancel** + +**Do NOT proceed until the user explicitly approves.** + +--- + +## Phase 2: Run Analysis + +### 2a. Create Analysis Directory + +Create a directory in the analysis-notebooks repo: + +``` +{topic-slug}/ + analysis.py # Full simulation + results.json generation + results.json # Generated by analysis.py + charts/ # Generated PNGs + requirements.txt # policyengine, plotly, kaleido + README.md # How to reproduce +``` + +### 2b. Spawn analysis-writer Agent + +``` +Task: analysis-writer + +Write and run analysis.py for the following reform: + +- Reform: {approved reform definition from Checkpoint #1} +- Country: {country} +- Year: {year} +- Parameter paths: {approved parameter paths} +- Analysis type: {microsimulation / household / both} +- Output directory: {topic-slug}/ +- Repo slug: PolicyEngine/{repo-name} + +CRITICAL: Use tracked_value() for every value. Use ResultsJson to validate. +Use format_fig() for chart styling. ALL computation in analysis.py — no inline. +``` + +### 2c. Chart Sanity Check + +After analysis.py completes, generate a household-level earnings sweep chart to verify the reform's shape: + +**Quick sanity check**: Does the benefit curve match the reform's intent? +- Tax rate cut → linearly increasing benefit with income +- CTC expansion → flat benefit up to income limit, then phase-out +- EITC expansion → triangle shape (phase-in, plateau, phase-out) +- SALT cap change → benefit concentrated at high incomes +- UBI → flat benefit, then clawed back via taxes + +**If the chart looks wrong**: Investigate before proceeding — likely a parameter mapping error. + +--- + +## Checkpoint #2: Results Validation + +Compare PE results against external estimates. This is the most important validation step. + +``` +═══════════════════════════════════════════════════════════════════════════ +RESULTS VALIDATION +═══════════════════════════════════════════════════════════════════════════ + +PE RESULTS: + Budget impact: {budget_impact} + Poverty change: {poverty_change} + Winners: {winners_pct} + Losers: {losers_pct} + Top decile avg: {top_decile_avg} + Bottom decile avg: {bottom_decile_avg} + +CHART SANITY CHECK: + Household sweep shape: {matches intent? describe} + +VALIDATION — PE vs EXTERNAL: +┌─────────────────────────────────────────────────────────────────────────┐ +│ Source │ Estimate │ vs PE │ Difference │ Verdict │ +│──────────────────────│───────────│────────────│────────────│───────────│ +│ PE (PolicyEngine) │ -$14.1B │ — │ — │ — │ +│ CBO/JCT │ -$15.2B │ -7.2% │ < 10% │ Excellent │ +│ Tax Foundation │ -$14.8B │ -4.7% │ < 10% │ Excellent │ +│ Back-of-envelope │ -$16.0B │ -11.9% │ 10-25% │ Acceptable│ +└─────────────────────────────────────────────────────────────────────────┘ + +THRESHOLDS: + < 10% → Excellent match + 10-25% → Acceptable (note methodology differences) + 25-50% → Review needed (re-check parameters) + > 50% → Likely error (stop and investigate) + +DISCREPANCY EXPLANATION: + {1-2 sentences explaining likely sources of difference — e.g., PE uses + Enhanced CPS microdata vs CBO's proprietary tax model, static vs dynamic + scoring, different base year assumptions} + +═══════════════════════════════════════════════════════════════════════════ +``` + +Use `AskUserQuestion`: +- Results look correct? External comparison acceptable? +- Options: **Yes, proceed to blog post** / **Re-run with adjusted parameters** / **Cancel** + +**If difference > 50%**: Do NOT offer "proceed" option. Force investigation. + +**Do NOT proceed until the user explicitly approves.** + +--- + +## Phase 3: Write Blog Post + +Spawn blog-writer agent: + +``` +Task: blog-writer + +Write a blog post for the following analysis: + +- results.json path: {topic-slug}/results.json +- Reform description: {approved reform description} +- Country: {country} +- Output path: {topic-slug}/post.md +- External estimates: {sources from Checkpoint #1 — for methodology section} + +RULES: +- Every number must be a {{}} reference — zero hard-coded values +- Neutral tone, active voice, sentence case headings +- Methodology section must cite PE model version, dataset, and year +- Methodology section must note comparison to external estimates +- Include link to analysis repo code +``` + +--- + +## Phase 4: Automated Validation + +Spawn pipeline-validator agent: + +``` +Task: pipeline-validator + +Validate the full pipeline output: + +- results.json path: {topic-slug}/results.json +- Blog post path: {topic-slug}/post.md +- Charts directory: {topic-slug}/charts/ + +Run all 9 checks and produce the validation report. +``` + +### Validation Checks (9 automated) + +| # | Check | Blocker? | +|---|-------|----------| +| 1 | results.json schema (source_line, source_url, alt text, row widths) | Yes | +| 2 | Template references (every `{{}}` resolves, no orphans) | Yes | +| 3 | No hard-coded numbers (no raw `$` or `%` outside `{{}}`) | Yes | +| 4 | Neutral language (no value judgments) | Warning | +| 5 | Active voice (no passive constructions) | Warning | +| 6 | Sentence case headings | Warning | +| 7 | Chart accessibility (alt text with chart type + 2-3 data points) | Yes | +| 8 | Source traceability (source_url contains repo, ends with #L{line}) | Yes | +| 9 | Post structure (H1 title, key findings, methodology, repo link) | Yes | + +**Blockers must pass before proceeding. Warnings should be fixed but don't block.** + +--- + +## Checkpoint #3: Final Review Before PR + +Present the complete post + validation report for human approval: + +``` +═══════════════════════════════════════════════════════════════════════════ +FINAL REVIEW BEFORE PR +═══════════════════════════════════════════════════════════════════════════ + +VALIDATION REPORT: + results.json schema: ✅ + Template references: ✅ (14 resolved, 0 missing, 0 orphaned) + Hard-coded numbers: ✅ (0 found) + Neutral language: ✅ (0 issues) + Active voice: ✅ (0 passive) + Sentence case: ✅ + Chart accessibility: ✅ (3 charts checked) + Source traceability: ✅ (14 values checked) + Post structure: ✅ + + Result: 9/9 checks passed. Ready for PR. + +EXTERNAL VALIDATION: + PE vs CBO/JCT: -7.2% (Excellent) + PE vs Tax Foundation: -4.7% (Excellent) + PE vs back-of-envelope: -11.9% (Acceptable) + +POST SUMMARY: + Title: {title} + Key findings: {3 bullet points} + Charts: {N} charts with alt text + Tables: {N} tables + Values: {N} traceable values + Word count: {N} + +═══════════════════════════════════════════════════════════════════════════ +``` + +Use `AskUserQuestion`: +- Ready to create PR? +- Options: **Yes, create draft PR** / **No, needs edits** / **Cancel** + +**Do NOT proceed until the user explicitly approves.** + +--- + +## Phase 5: Create PR + Distribute + +### 5a. Create Analysis PR + +```bash +cd {analysis-directory} +git add . +git commit -m "Add {topic} analysis with results.json and charts" +git push origin main +``` + +### 5b. Create Blog Post PR + +Create a draft PR in policyengine-app-v2 that adds: +1. Blog post markdown in `articles/` +2. posts.json entry with `analysis_repo` field + +PR body must include: + +```markdown +## Blog Post: {title} + +**Analysis repo**: [PolicyEngine/{repo}](https://github.com/PolicyEngine/{repo}) + +### Reform +| Parameter | Current | Proposed | +|-----------|---------|----------| +| {param} | {base} | {reform} | + +### External validation +| Source | Estimate | vs PE | Verdict | +|--------|----------|-------|---------| +| PE (PolicyEngine) | {pe_estimate} | — | — | +| {source} | {estimate} | {diff}% | {verdict} | +| Back-of-envelope | {estimate} | {diff}% | {verdict} | + +### Key results +| Metric | Value | +|--------|-------| +| Budget impact | {budget_impact} | +| Poverty change | {poverty_change} | +| Winners | {winners_pct} | + +### Validation +Pipeline validator: {N}/9 checks passed. + +--- +*Generated by `/publish-analysis` — PolicyEngine Claude Plugin* +``` + +**The blog post is NOT published until the PR is merged.** The resolve-posts build step runs on deploy, fetches results.json, and resolves all `{{}}` templates. + +### 5c. Distribution Checklist + +After merge and deploy: + +- [ ] Post to Twitter/X with key finding + image +- [ ] Post to LinkedIn with key finding + image +- [ ] Send to newsletter list (if applicable) +- [ ] Direct outreach to bill sponsors (if bill analysis) +- [ ] Pitch to relevant journalists +- [ ] Log in CRM +- [ ] Confirm GA4 events firing + +--- + +## Final Output + +``` +═══════════════════════════════════════════════════════════════════════════ +COMPLETE: {TOPIC} +═══════════════════════════════════════════════════════════════════════════ + +ANALYSIS: + ✓ analysis.py written and executed + ✓ results.json validated (Pydantic schema) + ✓ {N} charts generated with alt text + ✓ {N} values with source line tracking + +VALIDATION: + ✓ Pipeline validator: 9/9 checks passed + ✓ PE vs external: {best_match}% ({verdict}) + ✓ Chart sanity check: shape matches intent + ✓ Human approved at 3 checkpoints + +PRs: + Analysis: {analysis_pr_url} + Blog post: {blog_pr_url} + +NEXT STEPS: + 1. Review both PRs + 2. Merge blog post PR to publish + 3. Run distribution checklist + +═══════════════════════════════════════════════════════════════════════════ +``` + +--- + +## Error Handling + +| Problem | Cause | Fix | +|---------|-------|-----| +| Dataset not found | HDF5 file not available locally | Download from HuggingFace: `hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5` | +| Memory issues | Microsimulation loads ~60k households | Ensure 8GB+ RAM available | +| PE vs external > 50% | Parameter mapping error or methodological mismatch | **Stop.** Re-check parameter paths, compare baseline assumptions, verify reform encoding | +| PE vs external 25-50% | Moderate discrepancy | Note in methodology section. Check for known differences (static vs dynamic, different base year) | +| Chart shape wrong | Parameter mapping error | Return to Checkpoint #1, fix parameters, re-run | +| Unresolvable `{{ref}}` | Key mismatch between markdown and results.json | Fix spelling or add missing key to results.json | +| Stale source lines | Code changed after generating results.json | Re-run analysis.py to regenerate | +| Validator blockers | Schema or reference errors | Fix before proceeding — do NOT skip | + +--- + +## Key Principle: All Computation via analysis.py + +**NEVER compute impacts inline or with ad-hoc code.** All computation goes through analysis.py because: + +1. **Reproducibility** — anyone can re-run the same script +2. **Auditability** — every value traceable to a specific line +3. **Schema consistency** — ResultsJson validates output +4. **Source tracking** — tracked_value() captures line numbers automatically + +The agents research and generate the reform definition. analysis.py does computation. The blog post is a presentation layer only. + +--- + +## Agents Used + +| Agent | Purpose | Phase | +|-------|---------|-------| +| analysis-writer | Write and run analysis.py, produce results.json | 2 | +| blog-writer | Write blog post with {{}} template refs | 3 | +| pipeline-validator | 9 automated checks on schema, refs, language | 4 | + +## Scripts & Tools + +| Tool | Purpose | +|------|---------| +| `policyengine.py` | Local microsimulation (not API) | +| `policyengine.results.tracked_value()` | Auto-capture source line numbers | +| `policyengine.results.ResultsJson` | Pydantic schema validation | +| `policyengine.utils.plotting.format_fig()` | PE brand chart styling | + +--- + +Start by checking for existing analysis (Phase 0), then proceed through all phases. Never skip a checkpoint. diff --git a/skills/content/blog-pipeline-skill/SKILL.md b/skills/content/blog-pipeline-skill/SKILL.md new file mode 100644 index 0000000..2a5943c --- /dev/null +++ b/skills/content/blog-pipeline-skill/SKILL.md @@ -0,0 +1,774 @@ +--- +name: blog-pipeline +description: End-to-end blog post pipeline - results.json schema, template syntax, policyengine.py local simulation, chart generation, and SEO-optimized publishing +--- + +# Blog Post Pipeline + +How to produce a fully traceable, SEO-optimized blog post from a policy reform using `policyengine.py` for local simulation. + +## For Users + +### What This Pipeline Does + +Every PolicyEngine blog post follows a strict pipeline: + +1. **Agent runs simulations locally** using `policyengine.py` — full microsimulation, all variables accessible +2. **Agent generates results.json** with every value, table, and chart traceable to its source +3. **Agent writes a blog post** using `{{value}}` template references — zero hard-coded numbers +4. **Build step resolves templates** at deploy time by fetching results.json from GitHub +5. **Output is fully indexable** — text with source links, `` charts with alt text, HTML tables + +### Core Principles + +1. **Zero hard-coded values** — every number comes from results.json +2. **Every number is traceable** — click any number to see the code line that produced it +3. **No iframes** — charts are static `` from GitHub Pages with descriptive alt text +4. **No computation in posts** — blog posts are a presentation layer only +5. **Auto-updating on deploy** — resolve-posts fetches latest results.json automatically +6. **Neutral language** — active voice, quantitative precision, no value judgments (see policyengine-writing-skill) + +--- + +## For Analysts + +### results.json Schema + +Every analysis produces a `results.json` file. This is the contract between the analysis and the blog post. + +```json +{ + "metadata": { + "title": "SALT Cap Repeal Impact Analysis", + "repo": "PolicyEngine/salt-cap-analysis", + "commit": "a1b2c3d", + "generated_at": "2026-02-23T14:30:00Z", + "policyengine_version": "0.1.0", + "dataset": "enhanced_cps_2024", + "country_id": "us", + "year": 2026 + }, + "values": { + "budget_impact": { + "value": -15200000000, + "display": "$15.2 billion", + "source_line": 47, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L47" + } + }, + "tables": { + "distributional": { + "title": "Distributional Impact by Income Decile", + "headers": ["Decile", "Avg Change", "% Affected"], + "rows": [["Bottom 10%", "+$340", "12%"], ["Top 10%", "+$8,200", "89%"]], + "source_line": 82, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L82" + } + }, + "charts": { + "distributional": { + "url": "https://PolicyEngine.github.io/salt-cap-analysis/charts/distributional.png", + "alt": "Bar chart showing SALT cap repeal benefits by income decile. Top 10% gains $8,200 on average.", + "width": 1200, + "height": 600, + "source_line": 105, + "source_url": "https://github.com/PolicyEngine/salt-cap-analysis/blob/main/analysis.py#L105" + } + } +} +``` + +### Required Fields + +| Section | Field | Type | Purpose | +|---------|-------|------|---------| +| metadata | `repo` | string | GitHub org/repo for source links | +| metadata | `commit` | string | Exact commit for reproducibility | +| metadata | `policyengine_version` | string | Package version used | +| metadata | `dataset` | string | Dataset name (e.g., `enhanced_cps_2024`) | +| metadata | `year` | number | Analysis year | +| values.* | `value` | number | Raw numeric value | +| values.* | `display` | string | Formatted display text (e.g., "$15.2 billion") | +| values.* | `source_line` | number | Line number in analysis.py | +| values.* | `source_url` | string | Full GitHub URL to source line | +| tables.* | `headers` | string[] | Column headers | +| tables.* | `rows` | string[][] | Row data (pre-formatted) | +| charts.* | `url` | string | GitHub Pages URL to PNG | +| charts.* | `alt` | string | Descriptive alt text with data points | + +--- + +### Template Syntax + +Blog posts use `{{}}` references that are resolved at build time: + +| Pattern | Resolves To | Example | +|---------|-------------|---------| +| `{{value_name}}` | Linked display text | `{{budget_impact}}` → `[$15.2 billion](source_url)` | +| `{{table:name}}` | Markdown table with caption | `{{table:distributional}}` → full table | +| `{{chart:name}}` | `![alt](url)` image | `{{chart:distributional}}` → image with alt text | + +**✅ Correct template usage:** +```markdown +The reform would cost {{budget_impact}} per year. + +{{table:distributional}} + +{{chart:distributional}} + +The poverty rate changes by {{poverty_change}}. +``` + +**❌ Wrong — hard-coded numbers:** +```markdown +The reform would cost $15.2 billion per year. + +| Decile | Avg Change | +| Bottom 10% | +$340 | + +![chart](https://example.com/chart.png) + +The poverty rate falls by 3.2%. +``` + +Every number in the blog post must come from results.json. If you type a raw number into the markdown, it will not have a source link and cannot be traced or auto-updated. + +--- + +### Using policyengine.py + +The agent writes and runs Python using `policyengine.py` for all simulations. This runs locally — no API calls, full variable access. + +**Install:** +```bash +pip install policyengine +``` + +**Define a reform:** +```python +from policyengine.core import Simulation, Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.us import PolicyEngineUSDataset, us_latest +import datetime + +# Create reform: e.g., remove SALT cap +param = Parameter( + name="gov.irs.deductions.itemized.salt_and_real_estate.cap", + tax_benefit_model_version=us_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=0, # Remove cap +) +policy = Policy(name="SALT Cap Repeal", parameter_values=[pv]) +``` + +**Run baseline + reform microsimulations:** +```python +# Load dataset +dataset = PolicyEngineUSDataset( + name="enhanced_cps_2024", + filepath="path/to/enhanced_cps_2024.h5", + year=2026, +) + +# Baseline +baseline_sim = Simulation(dataset=dataset, tax_benefit_model_version=us_latest) +baseline_sim.run() + +# Reform +reform_sim = Simulation(dataset=dataset, tax_benefit_model_version=us_latest, policy=policy) +reform_sim.run() +``` + +**Access any variable directly:** +```python +# Household-level results +baseline_net = baseline_sim.output_dataset.data.household["household_net_income"] +reform_net = reform_sim.output_dataset.data.household["household_net_income"] +change = reform_net - baseline_net + +# Person-level +baseline_tax = baseline_sim.output_dataset.data.tax_unit["income_tax"] +reform_tax = reform_sim.output_dataset.data.tax_unit["income_tax"] + +# Access weights for proper aggregation +weights = baseline_sim.output_dataset.data.household["household_weight"] +``` + +**Built-in distributional analysis:** +```python +from policyengine.outputs.decile_impact import DecileImpact +from policyengine.outputs.poverty import Poverty +from policyengine.outputs.inequality import Inequality +from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType + +# Decile impact +decile = DecileImpact( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", +) +decile.run() + +# Poverty +poverty = Poverty( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, +) +poverty.run() + +# Winners/losers count +winners = ChangeAggregate( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", + aggregate_type=ChangeAggregateType.COUNT, + change_geq=1, +) +winners.run() +``` + +**Budget impact:** +```python +from policyengine.outputs.aggregate import Aggregate, AggregateType + +baseline_revenue = Aggregate( + simulation=baseline_sim, + variable="household_tax", + aggregate_type=AggregateType.SUM, +) +baseline_revenue.run() + +reform_revenue = Aggregate( + simulation=reform_sim, + variable="household_tax", + aggregate_type=AggregateType.SUM, +) +reform_revenue.run() + +budget_impact = reform_revenue.result - baseline_revenue.result +``` + +--- + +### Standard Chart Types + +Every analysis should produce charts from this catalog. Pick the ones relevant to the reform. You may also create **custom charts** for topic-specific visualizations — just follow the same Plotly styling and alt text rules. + +#### Chart 1: Decile impact bar chart (required for microsimulation posts) + +Shows average net income change by income decile. The most common chart across both US and UK posts. + +```python +import plotly.graph_objects as go +import plotly.io as pio +import os + +TEAL = "#39C6C0" +RED = "#DC2626" + +fig = go.Figure() +fig.add_trace(go.Bar( + x=[f"Decile {i}" for i in range(1, 11)], + y=decile_values, + marker_color=[TEAL if v >= 0 else RED for v in decile_values], + text=[f"${v:,.0f}" for v in decile_values], + textposition="outside", +)) +fig.update_layout( + template="plotly_white", + font=dict(family="Inter, sans-serif"), + xaxis_title="Income decile", + yaxis_title="Average annual change ($)", +) + +os.makedirs("charts", exist_ok=True) +pio.write_image(fig, "charts/decile_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing [reform] impact by income decile. Bottom decile [gains/loses] [amount]. Top decile [gains/loses] [amount]." + +#### Chart 2: Household net income curve (required for household posts) + +Shows how net income changes across an earnings range for a specific household type. Use axes for efficiency. + +```python +fig = go.Figure() +fig.add_trace(go.Scatter( + x=incomes, y=baseline_net, mode="lines", + name="Current law", line=dict(color=BLUE, width=2), +)) +fig.add_trace(go.Scatter( + x=incomes, y=reform_net, mode="lines", + name="Reform", line=dict(color=TEAL, width=2), +)) +fig.update_layout( + template="plotly_white", + font=dict(family="Inter, sans-serif"), + xaxis_title="Employment income ($)", + yaxis_title="Household net income ($)", + xaxis_tickformat="$,.0f", + yaxis_tickformat="$,.0f", + legend=dict(x=0.02, y=0.98), +) +pio.write_image(fig, "charts/net_income_curve.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Line chart comparing net income under current law and [reform] for [household type]. Reform increases net income by [amount] at [income level], with the largest gain at [peak]." + +#### Chart 3: Winners and losers bar chart + +Shows what percentage of households gain, lose, or are unaffected, often by decile. + +```python +fig = go.Figure() +fig.add_trace(go.Bar( + x=decile_labels, y=pct_gaining, name="Gain", marker_color=TEAL, +)) +fig.add_trace(go.Bar( + x=decile_labels, y=pct_losing, name="Lose", marker_color=RED, +)) +fig.update_layout( + template="plotly_white", + barmode="group", + xaxis_title="Income decile", + yaxis_title="Share of households (%)", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/winners_losers.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing winners and losers by income decile. [X]% of bottom-decile households gain vs [Y]% of top-decile households." + +#### Chart 4: Budgetary impact over time (bar) + +Shows annual cost or revenue impact across a budget window (typically 10 years). + +```python +fig = go.Figure() +fig.add_trace(go.Bar( + x=years, y=annual_costs, + marker_color=BLUE, + text=[f"${v/1e9:.1f}B" for v in annual_costs], + textposition="outside", +)) +fig.update_layout( + template="plotly_white", + xaxis_title="Year", + yaxis_title="Budget impact ($)", + yaxis_tickformat="$,.0f", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/budget_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Bar chart showing annual budget impact from [year] to [year]. Total [10]-year cost: [amount]." + +#### Chart 5: Poverty impact comparison (grouped bar) + +Shows poverty rate changes across demographics (overall, children, seniors, etc.). + +```python +categories = ["Overall", "Children", "Working-age", "Seniors"] +fig = go.Figure() +fig.add_trace(go.Bar( + x=categories, y=baseline_poverty, name="Current law", marker_color="#94A3B8", +)) +fig.add_trace(go.Bar( + x=categories, y=reform_poverty, name="Reform", marker_color=TEAL, +)) +fig.update_layout( + template="plotly_white", + barmode="group", + yaxis_title="Poverty rate (%)", + yaxis_tickformat=".1%", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/poverty_impact.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Grouped bar chart comparing poverty rates under current law and [reform]. Overall poverty [falls/rises] from [X]% to [Y]%. Child poverty [falls/rises] from [X]% to [Y]%." + +#### Chart 6: Waterfall (tax component decomposition) + +Shows how individual reform components add up to the total impact. Used in analysis repos (HR1), underused in blog posts. + +```python +fig = go.Figure(go.Waterfall( + x=["Baseline revenue", "Income tax change", "Payroll tax change", + "Credit expansion", "Reform revenue"], + y=[baseline_rev, income_tax_delta, payroll_delta, credit_delta, 0], + measure=["absolute", "relative", "relative", "relative", "total"], + connector={"line": {"color": "#94A3B8"}}, + increasing={"marker": {"color": TEAL}}, + decreasing={"marker": {"color": RED}}, + totals={"marker": {"color": BLUE}}, + text=[f"${v/1e9:.1f}B" for v in [baseline_rev, income_tax_delta, + payroll_delta, credit_delta, reform_rev]], + textposition="outside", +)) +fig.update_layout( + template="plotly_white", + yaxis_title="Federal revenue ($B)", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/waterfall.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Waterfall chart decomposing budget impact. [Component 1] contributes [amount], [component 2] contributes [amount]. Total reform impact: [amount]." + +#### Chart 7: Marginal tax rate curve (line) + +Shows effective marginal tax rate across an income range, revealing benefit cliffs and taper interactions. + +```python +fig = go.Figure() +fig.add_trace(go.Scatter( + x=incomes[:-1], y=baseline_mtr, mode="lines", + name="Current law", line=dict(color=BLUE, width=2), +)) +fig.add_trace(go.Scatter( + x=incomes[:-1], y=reform_mtr, mode="lines", + name="Reform", line=dict(color=TEAL, width=2), +)) +fig.update_layout( + template="plotly_white", + xaxis_title="Employment income ($)", + yaxis_title="Marginal tax rate (%)", + yaxis_tickformat=".0%", + font=dict(family="Inter, sans-serif"), +) +pio.write_image(fig, "charts/marginal_rates.png", width=1200, height=600, scale=2) +``` + +**Alt text pattern:** "Line chart showing marginal tax rates under current law and [reform] for [household type]. Reform [reduces/increases] peak marginal rate from [X]% to [Y]% at [income level]." + +#### Custom Charts + +For topic-specific visualizations not covered above (e.g., state comparison maps, benefit phase-in schedules, wealth decile breakdowns, animated time series), follow these rules: + +1. Use the same Plotly styling: `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +2. Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"`, `GRAY = "#94A3B8"` +3. Save as PNG at 1200x600, scale=2 +4. Write descriptive alt text with chart type and 2-3 key data points +5. Include the chart in results.json with `url`, `alt`, `width`, `height`, `source_line`, `source_url` + +--- + +### Standard Table Types + +Every analysis should produce tables from this catalog where relevant. You may also create **custom tables** — just follow the same formatting rules. + +#### Table 1: Household impact table (required for household posts) + +Shows net income change for representative household types. + +| Household | Income | Filing status | Net income change | +|-----------|--------|---------------|-------------------| +| Single, no children | $40,000 | Single | +$0 | +| Single parent, 2 children | $50,000 | Head of household | +$1,000 | +| Married, 2 children | $100,000 | Joint | +$2,000 | +| Senior, retired | $24,000 | Single | +$0 | + +**UK equivalent:** Replace "Filing status" with "Tenure type" or "Region". Include `would_claim_*` context. + +#### Table 2: Income decile distribution table (required for microsimulation posts) + +Shows average impact, share affected, and share of total benefit by decile. + +| Decile | Avg. change | % affected | Share of total benefit | +|--------|-------------|------------|----------------------| +| 1 (bottom 10%) | +$340 | 12% | 2% | +| ... | ... | ... | ... | +| 10 (top 10%) | +$8,200 | 89% | 42% | + +#### Table 3: Parameter comparison table + +Shows what the reform changes — current law values vs. reform values. + +| Parameter | Current law | Reform | +|-----------|------------|--------| +| CTC base amount | $2,000 | $5,000 | +| Phase-out threshold (single) | $200,000 | $200,000 | +| Phase-out threshold (joint) | $400,000 | $400,000 | +| Refundability | $1,700 | Fully refundable | + +#### Table 4: Budgetary impact by year table + +Shows annual fiscal cost over a budget window. + +| Year | Static cost ($B) | Dynamic cost ($B) | +|------|-----------------|-------------------| +| 2026 | 15.2 | 18.4 | +| 2027 | 15.8 | 19.1 | +| ... | ... | ... | +| 2026-2035 total | 162.0 | 195.0 | + +#### Table 5: Poverty and inequality summary table + +Shows key distributional metrics before and after reform. + +| Metric | Baseline | Reform | Change | +|--------|----------|--------|--------| +| Overall poverty rate (SPM) | 12.4% | 12.0% | -0.4pp | +| Child poverty rate | 13.2% | 11.8% | -1.4pp | +| Gini index | 0.414 | 0.412 | -0.002 | +| Top 10% income share | 31.2% | 30.8% | -0.4pp | + +#### Custom Tables + +For topic-specific tables not covered above (e.g., state-by-state comparisons, methodology comparisons, tax rate schedules, benefit phase-in tables), follow these rules: + +1. Include in results.json under `"tables"` with `headers`, `rows`, `source_line`, `source_url` +2. Pre-format all values as display strings (e.g., "$15.2 billion", "12.4%") +3. Use `{{table:name}}` in the blog post markdown +4. Keep tables under 15 rows — split into multiple tables if larger + +--- + +### Alt Text for Charts + +Every chart needs descriptive alt text that includes key data points. Critical for SEO and accessibility. + +**✅ Correct (descriptive, includes data):** +``` +Bar chart showing SALT cap repeal benefits by income decile. +Top decile gains $8,200 average. Bottom decile gains $340 average. +Middle deciles gain $500-$1,200. 89% of top-decile households affected. +``` + +**❌ Wrong (vague, no data):** +``` +Chart showing distributional impact of the reform. +``` +``` +distributional.png +``` +``` +Impact by income group. +``` + +Alt text should: +- Start with the chart type ("Bar chart showing...", "Line chart of...") +- Include 2-3 key data points with actual numbers +- Mention the most significant finding +- Be 1-3 sentences + +--- + +### Blog Post Writing Rules + +Blog posts generated through this pipeline must follow the policyengine-writing-skill. Key rules: + +#### Neutral tone + +**✅ Correct (neutral — describes what policies do):** +``` +The reform reduces poverty by 3.2% and raises inequality by 0.16% +The top income decile receives 42% of total benefits +``` + +**❌ Wrong (value judgments):** +``` +The reform successfully reduces poverty but unfortunately raises inequality +The wealthiest households receive a disproportionate share of benefits +``` + +#### Active voice with specific numbers + +**✅ Correct:** +``` +The bill lowers the top rate from 5.9% to 5.4% +Repealing the SALT cap costs $15.2 billion in 2026 +``` + +**❌ Wrong:** +``` +The top rate is lowered by the bill +Repealing the SALT cap significantly increases the deficit +``` + +#### Sentence case headings + +**✅ Correct:** +``` +## Budgetary impact +## Distributional analysis +## Poverty and inequality +``` + +**❌ Wrong:** +``` +## Budgetary Impact +## Distributional Analysis +## Poverty and Inequality +``` + +#### Show calculations explicitly + +**✅ Correct:** +``` +The reform costs $15.2 billion per year: $18.4 billion in reduced income tax +revenue, partially offset by $3.2 billion in higher payroll tax collections. +``` + +**❌ Wrong:** +``` +The reform has a significant budgetary impact. +``` + +--- + +### Source Tracking + +Every value in results.json must include `source_line` and `source_url` pointing to the exact line in analysis.py that computed it. Use the `tracked_value()` helper from `policyengine.results` — it captures the caller's line number automatically via `inspect.stack()`: + +```python +from policyengine.results import tracked_value, ValueEntry + +budget_impact = reform_revenue.result - baseline_revenue.result + +# tracked_value() captures this line number automatically +budget_entry = ValueEntry(**tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo="PolicyEngine/salt-cap-analysis", +)) +``` + +**✅ Correct — using tracked_value():** +```python +entry = ValueEntry(**tracked_value( + value=budget_impact, + display=f"${abs(budget_impact)/1e9:.1f} billion", + repo=REPO, +)) +``` + +**❌ Wrong — manual line tracking (error-prone, goes stale on refactoring):** +```python +line = inspect.currentframe().f_lineno +entry = { + "value": budget_impact, + "display": f"${abs(budget_impact)/1e9:.1f} billion", + "source_line": line, + "source_url": f"https://github.com/{REPO}/blob/main/analysis.py#L{line}", +} +``` + +**❌ Wrong — value without source:** +```json +{ + "budget_impact": { + "value": -15200000000, + "display": "$15.2 billion" + } +} +``` + +--- + +## For Contributors + +### Analysis Repo Structure + +``` +analysis-repo/ +├── analysis.py # Main script — policyengine.py simulations, charts, results.json +├── results.json # Generated output — the contract +├── charts/ # Generated PNGs — deployed to GitHub Pages +│ ├── distributional.png +│ └── household_impact.png +├── requirements.txt # policyengine, plotly, kaleido +├── README.md # How to reproduce +└── .github/workflows/ + └── pages.yml # Auto-deploy charts to GitHub Pages on push +``` + +### Blog Post in policyengine-app-v2 + +``` +app/src/data/posts/ +├── posts.json # Add entry with analysis_repo field +└── articles/ + └── salt-cap-analysis.md # Blog post with {{}} template refs +``` + +**posts.json entry:** +```json +{ + "title": "SALT Cap Repeal Would Cost $15 Billion", + "description": "Analysis of repealing the SALT deduction cap...", + "date": "2026-02-23", + "tags": ["us", "policy", "featured"], + "authors": ["max-ghenis"], + "filename": "salt-cap-analysis.md", + "image": "salt-cap-analysis.png", + "analysis_repo": "PolicyEngine/salt-cap-analysis" +} +``` + +The `analysis_repo` field triggers the resolve-posts build step to fetch results.json and resolve all `{{}}` templates before Vite builds the site. + +### resolve-posts Build Step + +Runs automatically before Vite build: + +1. Reads posts.json → finds posts with `analysis_repo` field +2. Fetches `results.json` from `raw.githubusercontent.com/{repo}/main/results.json` +3. Reads the markdown template file +4. Resolves `{{value}}` → `[display](source_url)` (linked text) +5. Resolves `{{table:name}}` → markdown table with caption and source link +6. Resolves `{{chart:name}}` → `![alt](github_pages_url)` image +7. Writes resolved markdown back to the articles directory +8. Vite builds the site with all values populated + +### SEO Output + +The resolved blog post produces: +- **Text with source links** — every number is a clickable link to the code +- **Charts as ``** — from GitHub Pages with descriptive alt text (fully indexable) +- **Data tables in HTML** — eligible for Google featured snippets +- **JSON-LD Article schema** — served by middleware to crawlers +- **OG tags** — for social media sharing previews + +--- + +## Pipeline Checklist + +Before publishing, verify: + +- [ ] Every `{{}}` ref in the markdown exists in results.json +- [ ] Every value in results.json has `source_line` and `source_url` +- [ ] Charts load from GitHub Pages URLs +- [ ] Alt text is descriptive with 2-3 key data points +- [ ] No hard-coded numbers in the markdown (search for raw digits) +- [ ] Neutral language — no value judgments (see policyengine-writing-skill) +- [ ] Active voice throughout +- [ ] Sentence case headings +- [ ] Methodology section specifies model version, dataset, and assumptions +- [ ] Source links point to real code lines (not stale line numbers) + +--- + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| `{{name}}` appears literally in published post | Key missing from results.json | Add the key to results.json or fix the spelling | +| Source link points to wrong line | Code changed after results.json was generated | Re-run analysis.py to regenerate results.json | +| Chart 404 on GitHub Pages | Pages workflow hasn't run | Push to main to trigger the pages.yml workflow | +| Numbers don't match between text and tables | Template refs point to different values | Each number should reference one canonical value in results.json | +| Alt text says "chart" with no data | Generic placeholder | Rewrite to include chart type and 2-3 key data points | + +--- + +## Resources + +- policyengine.py repo: See policyengine-python-client-skill +- Writing skill: See policyengine-writing-skill for tone and style +- Content generation skill: See content-generation-skill for social images +- Analysis skill: See policyengine-analysis-skill for simulation patterns diff --git a/skills/content/uk-household-analysis-skill/SKILL.md b/skills/content/uk-household-analysis-skill/SKILL.md new file mode 100644 index 0000000..ad0e614 --- /dev/null +++ b/skills/content/uk-household-analysis-skill/SKILL.md @@ -0,0 +1,650 @@ +--- +name: uk-household-analysis +description: Household-level impact analysis patterns for UK policy reforms - define households, calculate tax/benefit changes, generate results.json +--- + +# UK Household Analysis + +Patterns for analyzing how UK policy reforms affect specific household types. Use this skill when a blog post needs household-level case studies (e.g., "a single parent earning £30,000 sees a £520 increase in Universal Credit"). + +## When to Use This Skill + +- Blog posts showing how a reform affects representative households +- Calculators that let users enter their own household details +- Case studies comparing reform impacts across family types, income levels, or regions +- Budget constraint / marginal tax rate analysis across an income range + +For population-level microsimulation (deciles, poverty rates, aggregate budget impact), see blog-pipeline-skill instead. + +--- + +## Household Structure + +A UK household in PolicyEngine has 3 entity groups. Simpler than US — no tax units or SPM units. + +| Entity | Purpose | Key variables | +|--------|---------|---------------| +| `household` | Physical dwelling | `region`, `rent`, `council_tax`, `tenure_type`, `hbai_household_net_income` | +| `benunit` | Benefit unit (means-testing unit) | `universal_credit`, `child_benefit`, `would_claim_uc` | +| `person` | Individual | `age`, `employment_income`, `income_tax`, `national_insurance` | + +### Benefit Unit vs Household + +A household can contain multiple benefit units. A benefit unit is typically: +- A single adult, or +- A couple (married or cohabiting), plus +- Any dependent children + +This matters for means-tested benefits like Universal Credit, which are assessed per benefit unit, not per household. + +### The `would_claim_*` Flags + +UK benefits are not automatic. You must set `would_claim_*` flags to `True` for benefits to be calculated: + +| Flag | Benefit | Default | +|------|---------|---------| +| `would_claim_uc` | Universal Credit | False | +| `would_claim_child_benefit` | Child Benefit | False | +| `would_claim_WTC` | Working Tax Credit (legacy) | False | +| `would_claim_CTC` | Child Tax Credit (legacy) | False | +| `would_claim_HB` | Housing Benefit (legacy) | False | +| `would_claim_IS` | Income Support (legacy) | False | +| `would_claim_JSA` | Jobseeker's Allowance | False | +| `would_claim_PC` | Pension Credit | False | + +**✅ Correct — benefits will be calculated:** +```python +benunit={"would_claim_uc": True, "would_claim_child_benefit": True} +``` + +**❌ Wrong — benefits will be zero even if eligible:** +```python +benunit={} # No would_claim flags set +``` + +--- + +## Approach 1: Single Household (calculate_household_impact) + +```python +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + calculate_household_impact, +) + +# Single adult earning £35,000 +household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 35_000}], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 9_600, # £800/month + "council_tax": 1_400, + }, + benunit={ + "would_claim_uc": True, + }, + year=2026, +) +result = calculate_household_impact(household) + +net_income = result.household["hbai_household_net_income"] +income_tax = result.person[0]["income_tax"] +ni = result.person[0]["national_insurance"] +uc = result.benunit[0]["universal_credit"] +``` + +### Common Household Types + +**Single adult, no children, renting:** +```python +UKHouseholdInput( + people=[{"age": 30, "employment_income": 35_000}], + household={ + "region": "LONDON", + "tenure_type": "RENT_PRIVATELY", + "rent": 15_600, # £1,300/month + "council_tax": 1_800, + }, + benunit={"would_claim_uc": True}, + year=2026, +) +``` + +**Single parent, 2 children, renting:** +```python +UKHouseholdInput( + people=[ + {"age": 35, "employment_income": 25_000}, + {"age": 8}, + {"age": 5}, + ], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 7_200, # £600/month + "council_tax": 1_200, + }, + benunit={ + "would_claim_uc": True, + "would_claim_child_benefit": True, + }, + year=2026, +) +``` + +**Couple, 2 children, homeowner:** +```python +UKHouseholdInput( + people=[ + {"age": 40, "employment_income": 50_000}, + {"age": 38, "employment_income": 25_000}, + {"age": 10}, + {"age": 7}, + ], + household={ + "region": "SOUTH_EAST", + "tenure_type": "OWNER_OCCUPIED", + "rent": 0, + "council_tax": 2_400, + }, + benunit={ + "would_claim_child_benefit": True, + }, + year=2026, +) +``` + +**Pensioner, renting:** +```python +UKHouseholdInput( + people=[{"age": 70, "state_pension": 10_600}], + household={ + "region": "WEST_MIDLANDS", + "tenure_type": "RENT_PRIVATELY", + "rent": 6_000, + "council_tax": 1_200, + }, + benunit={ + "would_claim_PC": True, + }, + year=2026, +) +``` + +**Disabled adult claiming UC with LCWRA:** +```python +UKHouseholdInput( + people=[ + { + "age": 45, + "employment_income": 10_000, + "is_disabled_for_benefits": True, + "uc_limited_capability_for_WRA": True, + } + ], + household={ + "region": "NORTH_EAST", + "tenure_type": "RENT_PRIVATELY", + "rent": 6_000, + "council_tax": 1_000, + }, + benunit={"would_claim_uc": True}, + year=2026, +) +``` + +--- + +## Approach 2: Situation Dict (Simulation) + +The pattern used by existing UK analysis repos. + +```python +from policyengine_uk import Simulation + +situation = { + "people": { + "adult": { + "age": {"2026": 35}, + "employment_income": {"2026": 30_000}, + }, + "child1": {"age": {"2026": 8}}, + "child2": {"age": {"2026": 5}}, + }, + "benunits": { + "benunit": { + "members": ["adult", "child1", "child2"], + "would_claim_uc": {"2026": True}, + "would_claim_child_benefit": {"2026": True}, + } + }, + "households": { + "household": { + "members": ["adult", "child1", "child2"], + "region": {"2026": "NORTH_WEST"}, + "tenure_type": {"2026": "RENT_PRIVATELY"}, + "rent": {"2026": 7_200}, + "council_tax": {"2026": 1_200}, + } + }, +} + +sim = Simulation(situation=situation) +net_income = sim.calculate("hbai_household_net_income", "2026") +income_tax = sim.calculate("income_tax", "2026") +uc = sim.calculate("universal_credit", "2026") +child_benefit = sim.calculate("child_benefit", "2026") +``` + +### Income Sweep with Axes + +```python +situation = { + "people": { + "adult": {"age": {"2026": 35}}, + "child1": {"age": {"2026": 8}}, + }, + "benunits": { + "benunit": { + "members": ["adult", "child1"], + "would_claim_uc": {"2026": True}, + "would_claim_child_benefit": {"2026": True}, + } + }, + "households": { + "household": { + "members": ["adult", "child1"], + "region": {"2026": "LONDON"}, + "tenure_type": {"2026": "RENT_PRIVATELY"}, + "rent": {"2026": 12_000}, + "council_tax": {"2026": 1_600}, + } + }, + "axes": [[{ + "name": "employment_income", + "count": 501, + "min": 0, + "max": 100_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("hbai_household_net_income", "2026") +``` + +--- + +## Approach 3: Reform Comparison + +### With calculate_household_impact + +```python +from policyengine.core import Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.uk import ( + UKHouseholdInput, + calculate_household_impact, + uk_latest, +) +import datetime + +# Reform: increase UC standard allowance +param = Parameter( + name="gov.dwp.universal_credit.standard_allowance.amount.single.over_25", + tax_benefit_model_version=uk_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=500, # Monthly amount +) +policy = Policy(name="UC Increase", parameter_values=[pv]) + +household = UKHouseholdInput( + people=[{"age": 30, "employment_income": 15_000}], + household={ + "region": "NORTH_WEST", + "tenure_type": "RENT_PRIVATELY", + "rent": 7_200, + "council_tax": 1_000, + }, + benunit={"would_claim_uc": True}, + year=2026, +) + +baseline = calculate_household_impact(household) +reform = calculate_household_impact(household, policy=policy) + +change = ( + reform.household["hbai_household_net_income"] + - baseline.household["hbai_household_net_income"] +) +``` + +### With Simulation + Situation Dict + +```python +reform_dict = { + "gov.dwp.universal_credit.standard_allowance.amount.single.over_25": { + "2026-01-01.2026-12-31": 500 + } +} + +sim_baseline = Simulation(situation=situation) +sim_reform = Simulation(situation=situation, reform=reform_dict) + +baseline_net = sim_baseline.calculate("hbai_household_net_income", "2026") +reform_net = sim_reform.calculate("hbai_household_net_income", "2026") +change = reform_net - baseline_net +``` + +--- + +## Impact Types + +### Net income change + +```python +baseline_net = baseline.household["hbai_household_net_income"] +reform_net = reform.household["hbai_household_net_income"] +change = reform_net - baseline_net +``` + +### Tax and benefit component breakdown + +```python +components = { + "Income tax": reform.person[0]["income_tax"] - baseline.person[0]["income_tax"], + "National Insurance": reform.person[0]["national_insurance"] - baseline.person[0]["national_insurance"], + "Universal Credit": reform.benunit[0]["universal_credit"] - baseline.benunit[0]["universal_credit"], + "Child Benefit": reform.benunit[0]["child_benefit"] - baseline.benunit[0]["child_benefit"], + "Council Tax Benefit": reform.benunit[0].get("council_tax_benefit", 0) - baseline.benunit[0].get("council_tax_benefit", 0), +} +``` + +### Effective marginal tax rate + +```python +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("hbai_household_net_income", "2026") + +marginal_rates = 1 - np.diff(net_incomes) / np.diff(incomes) +``` + +### UC taper and benefit withdrawal + +Universal Credit tapers at 55p per £1 of net earnings above the work allowance. This creates effective marginal rates above the statutory tax rate. + +```python +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", "2026") +uc = sim.calculate("universal_credit", "2026") + +# Show where UC phases out +import plotly.graph_objects as go +fig = go.Figure() +fig.add_trace(go.Scatter(x=incomes, y=uc, name="Universal Credit")) +fig.update_layout( + xaxis_title="Employment income (£)", + yaxis_title="Universal Credit (£/year)", +) +``` + +--- + +## UK Regions + +The `region` field affects housing costs, council tax, and some benefit rates. + +| Region code | Region | +|-------------|--------| +| `NORTH_EAST` | North East | +| `NORTH_WEST` | North West | +| `YORKSHIRE` | Yorkshire and the Humber | +| `EAST_MIDLANDS` | East Midlands | +| `WEST_MIDLANDS` | West Midlands | +| `EAST_OF_ENGLAND` | East of England | +| `LONDON` | London | +| `SOUTH_EAST` | South East | +| `SOUTH_WEST` | South West | +| `WALES` | Wales | +| `SCOTLAND` | Scotland | +| `NORTHERN_IRELAND` | Northern Ireland | + +Scotland has different income tax rates. Northern Ireland has some separate benefit provisions. + +--- + +## Charts for UK Household Posts + +Produce these charts for household-level analysis. See blog-pipeline-skill for full Plotly styling details and additional chart types. + +### Required charts + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Net income curve** | Baseline vs reform net income across earnings range | Every household post | +| **UC taper chart** | Universal Credit amount vs earnings, showing work allowance and 55% taper | Posts about UC changes | +| **Component breakdown bar** | Which taxes/benefits drive the net income change (income tax, NI, UC, child benefit) | Posts where multiple programs interact | + +### Optional charts (use when relevant) + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Marginal tax rate curve** | Effective MTR showing income tax + NI + UC taper interaction | Posts about taper rates or benefit withdrawal | +| **Regional comparison bar** | Same household across regions (London vs North West vs Scotland) | Posts about regional variation | +| **Scotland vs England comparison** | Same income, different tax systems | Posts about Scottish tax rate changes | +| **Benefit cliff chart** | Individual benefit amounts (UC, child benefit, housing element) vs income | Posts about benefit interactions | +| **Revenue impact time series** | Annual cost across fiscal years (2026-27 to 2030-31) | Posts with multi-year budget windows | + +### Custom charts + +For topic-specific visuals not listed above, follow these rules: +- Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"` +- Plotly with `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +- Save as PNG at 1200x600, scale=2 +- Format currency as £ throughout (not $ or GBP) +- Write alt text with chart type and 2-3 key data points +- Include in results.json under `"charts"` with `url`, `alt`, `source_line`, `source_url` + +## Tables for UK Household Posts + +### Required tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Household impact table** | Household type, Income, Tenure, Region, Net income change | Every household post | +| **Component breakdown table** | Component (income tax, NI, UC, child benefit...), Baseline, Reform, Change | Posts where multiple programs interact | + +### Optional tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Parameter comparison** | Parameter, Current law, Reform | Posts introducing the reform details | +| **Regional comparison** | Region, Net income change, Key driver | Posts about regional variation | +| **Scotland vs England** | Metric, Scotland, England, Difference | Posts about devolved tax rates | +| **UC calculation walkthrough** | Step (standard allowance, child elements, housing, earnings deduction, taper), Amount | Posts explaining UC mechanics | +| **Revenue impact by year** | Fiscal year, Static cost (£B), Dynamic cost (£B) | Posts with multi-year analysis | +| **Poverty and inequality** | Metric (AHC poverty, BHC poverty, child poverty, Gini), Baseline, Reform, Change | Posts with distributional analysis | + +### Custom tables + +For topic-specific tables, follow these rules: +- Include in results.json with `headers`, `rows`, `source_line`, `source_url` +- Pre-format values as display strings ("£1,200", "12.4%") +- Use `{{table:name}}` in blog post markdown +- Keep under 15 rows + +--- + +## Generating results.json for Household Analysis + +Use `tracked_value()` for automatic source line tracking and `ResultsJson` for schema validation. + +```python +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, tracked_value, +) + +REPO = "PolicyEngine/uc-increase-analysis" + +households = { + "single_renter": {"income": 15_000, "children": 0, "rent": 7_200}, + "single_parent_2": {"income": 25_000, "children": 2, "rent": 7_200}, + "couple_2_owner": {"income": 75_000, "children": 2, "rent": 0}, +} + +values = {} +rows = [] +for name, params in households.items(): + # ... calculate baseline and reform ... + change = reform_net - baseline_net + + # tracked_value() captures this line number automatically + values[f"{name}_change"] = ValueEntry(**tracked_value( + value=float(change), + display=f"£{abs(change):,.0f}", + repo=REPO, + )) + rows.append([name, f"£{params['income']:,}", f"£{change:,.0f}"]) + +import inspect +table_line = inspect.currentframe().f_lineno +tables = { + "household_impacts": TableEntry( + title="Household impact by family type", + headers=["Household", "Income", "Net income change"], + rows=rows, + source_line=table_line, + source_url=f"https://github.com/{REPO}/blob/main/analysis.py#L{table_line}", + ), +} + +results = ResultsJson( + metadata=ResultsMetadata( + title="UC Standard Allowance Increase", + repo=REPO, + country_id="uk", + year=2026, + ), + values=values, + tables=tables, +) +results.write("results.json") +``` + +--- + +## Writing UK Household Case Studies + +Follow policyengine-writing-skill for all blog post text. + +**✅ Correct (specific, neutral, active):** +``` +A single parent of two children earning £25,000 and renting privately +sees a £520 annual increase in Universal Credit, raising household +net income from £28,400 to £28,920. +``` + +**❌ Wrong (vague, value judgment):** +``` +Working families on Universal Credit see welcome increases to their +income under the reform. +``` + +**✅ Correct (shows calculation):** +``` +The UC standard allowance rises from £393 to £500 per month, a £107 +monthly increase (£1,284 per year). After the 55% taper on her £15,000 +net earnings above the work allowance, she retains £520 of the increase. +``` + +**❌ Wrong (hides calculation):** +``` +Claimants receive a boost to their monthly Universal Credit payments. +``` + +**✅ Correct (acknowledges UK specifics):** +``` +A Scottish taxpayer earning £50,000 pays income tax at Scotland's +intermediate rate of 21%, compared to 20% in England. The reform +increases their net income by £340, compared to £380 for an equivalent +English taxpayer. +``` + +**❌ Wrong (ignores devolution):** +``` +All UK taxpayers earning £50,000 see the same impact from the reform. +``` + +--- + +## Common UK Variables + +| Variable | Entity | Description | +|----------|--------|-------------| +| `hbai_household_net_income` | household | Household net income (HBAI definition) | +| `household_net_income` | household | Household net income | +| `income_tax` | person | Income tax liability | +| `national_insurance` | person | National Insurance contributions | +| `universal_credit` | benunit | Universal Credit entitlement | +| `child_benefit` | benunit | Child Benefit | +| `working_tax_credit` | benunit | Working Tax Credit (legacy) | +| `child_tax_credit` | benunit | Child Tax Credit (legacy) | +| `housing_benefit` | benunit | Housing Benefit (legacy) | +| `pension_credit` | benunit | Pension Credit | +| `council_tax_benefit` | benunit | Council Tax Reduction | +| `employment_income` | person | Employment income | +| `self_employment_income` | person | Self-employment income | +| `state_pension` | person | State Pension | + +--- + +## UK vs US Differences + +| Aspect | UK | US | +|--------|----|----| +| Entity groups | 3 (person, benunit, household) | 6 (person, marital_unit, family, tax_unit, spm_unit, household) | +| Benefits unit | Benefit unit (benunit) | SPM unit for benefits, tax unit for credits | +| Benefit claiming | Must set `would_claim_*` = True | Generally automatic | +| Net income variable | `hbai_household_net_income` | `household_net_income` | +| Currency | £ (GBP) | $ (USD) | +| Housing costs | `rent`, `council_tax`, `tenure_type` required | Not required for most analyses | +| Regional variation | `region` — Scotland has different tax rates | `state_code_str` — 50+ state tax systems | +| Key benefit | Universal Credit | EITC, CTC, SNAP | +| Key taper | UC taper (55% of net earnings) | EITC phase-out, benefit cliffs | + +--- + +## Checklist + +Before publishing UK household analysis: + +- [ ] All 3 entity groups defined (person, benunit, household) +- [ ] `would_claim_*` flags set for all relevant benefits +- [ ] `region` specified (Scotland has different tax rates) +- [ ] `tenure_type`, `rent`, and `council_tax` set for housing-related benefits +- [ ] Representative households cover renters, homeowners, pensioners, and families with children +- [ ] Baseline and reform both calculated for the same household +- [ ] Component breakdown shows which taxes/benefits drive the net change +- [ ] All values in results.json with `source_line` and `source_url` +- [ ] Currency formatted as £ (not $ or GBP) +- [ ] Blog text uses `{{}}` template references, not hard-coded numbers +- [ ] Neutral language — no value judgments +- [ ] Scottish/devolved differences noted where relevant +- [ ] Calculations shown explicitly (e.g., "£107/month × 12 = £1,284/year, minus 55% taper...") + +--- + +## Resources + +- blog-pipeline-skill — results.json schema, template syntax, chart generation +- policyengine-writing-skill — neutral tone, active voice, quantitative precision +- policyengine-analysis-skill — population-level patterns, Plotly charts +- policyengine-uk-skill — UK tax/benefit system domain knowledge +- us-household-analysis-skill — US equivalent patterns for comparison diff --git a/skills/content/us-household-analysis-skill/SKILL.md b/skills/content/us-household-analysis-skill/SKILL.md new file mode 100644 index 0000000..642caff --- /dev/null +++ b/skills/content/us-household-analysis-skill/SKILL.md @@ -0,0 +1,519 @@ +--- +name: us-household-analysis +description: Household-level impact analysis patterns for US policy reforms - define households, calculate tax/benefit changes, generate results.json +--- + +# US Household Analysis + +Patterns for analyzing how US policy reforms affect specific household types. Use this skill when a blog post needs household-level case studies (e.g., "a single parent earning $50,000 sees a $252 increase"). + +## When to Use This Skill + +- Blog posts showing how a reform affects representative households +- Calculators that let users enter their own household details +- Case studies comparing reform impacts across filing statuses, income levels, or family sizes +- Budget constraint / marginal tax rate analysis across an income range + +For population-level microsimulation (deciles, poverty rates, aggregate budget impact), see blog-pipeline-skill instead. + +--- + +## Household Structure + +A US household in PolicyEngine has 6 entity groups. Every person must belong to one of each: + +| Entity | Purpose | Key variables | +|--------|---------|---------------| +| `household` | Physical dwelling | `state_code_str`, `household_net_income` | +| `tax_unit` | IRS filing unit | `filing_status`, `income_tax`, `eitc`, `ctc` | +| `family` | Census family | `family_id` | +| `spm_unit` | Supplemental Poverty Measure unit | `snap`, `ssi`, `tanf` | +| `marital_unit` | Married/unmarried pair | `marital_unit_id` | +| `person` | Individual | `age`, `employment_income`, `income_tax` | + +--- + +## Approach 1: Single Household (calculate_household_impact) + +The simplest way to calculate one household's taxes and benefits. + +```python +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + calculate_household_impact, +) + +# Single filer, no children, $50k income +household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "CA"}, + year=2026, +) +result = calculate_household_impact(household) + +# Access results by entity +net_income = result.household["household_net_income"] +income_tax = result.tax_unit[0]["income_tax"] +eitc = result.tax_unit[0]["eitc"] +snap = result.spm_unit[0]["snap"] +``` + +### Common Household Types + +**Single filer, no children:** +```python +USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "CA"}, + year=2026, +) +``` + +**Single parent, 2 children:** +```python +USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}, + {"age": 8, "is_tax_unit_dependent": True}, + {"age": 5, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code_str": "TX"}, + year=2026, +) +``` + +**Married couple, 2 children:** +```python +USHouseholdInput( + people=[ + {"age": 40, "employment_income": 80_000, "is_tax_unit_head": True}, + {"age": 38, "employment_income": 40_000, "is_tax_unit_spouse": True}, + {"age": 10, "is_tax_unit_dependent": True}, + {"age": 7, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "JOINT"}, + household={"state_code_str": "NY"}, + year=2026, +) +``` + +**Senior, retired:** +```python +USHouseholdInput( + people=[ + {"age": 70, "social_security": 24_000, "is_tax_unit_head": True} + ], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code_str": "FL"}, + year=2026, +) +``` + +--- + +## Approach 2: Situation Dict (Simulation) + +The pattern used by existing analysis repos. More verbose but supports axes for income sweeps. + +```python +from policyengine_us import Simulation + +situation = { + "people": { + "adult": { + "age": {"2026": 35}, + "employment_income": {"2026": 50_000}, + }, + "child1": {"age": {"2026": 8}}, + "child2": {"age": {"2026": 5}}, + }, + "families": { + "family": {"members": ["adult", "child1", "child2"]} + }, + "tax_units": { + "tax_unit": {"members": ["adult", "child1", "child2"]} + }, + "spm_units": { + "spm_unit": {"members": ["adult", "child1", "child2"]} + }, + "marital_units": { + "marital_unit": {"members": ["adult"]} + }, + "households": { + "household": { + "members": ["adult", "child1", "child2"], + "state_name": {"2026": "TX"}, + } + }, +} + +sim = Simulation(situation=situation) +net_income = sim.calculate("household_net_income", "2026") +income_tax = sim.calculate("income_tax", "2026") +ctc = sim.calculate("ctc", "2026") +``` + +### Income Sweep with Axes + +Calculate impacts across an income range in a single simulation (much faster than looping): + +```python +situation = { + "people": { + "adult": {"age": {"2026": 35}}, + "child1": {"age": {"2026": 8}}, + }, + "families": {"family": {"members": ["adult", "child1"]}}, + "tax_units": {"tax_unit": {"members": ["adult", "child1"]}}, + "spm_units": {"spm_unit": {"members": ["adult", "child1"]}}, + "marital_units": {"marital_unit": {"members": ["adult"]}}, + "households": { + "household": { + "members": ["adult", "child1"], + "state_name": {"2026": "CA"}, + } + }, + "axes": [[{ + "name": "employment_income", + "count": 501, + "min": 0, + "max": 250_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("household_net_income", "2026") +``` + +--- + +## Approach 3: Reform Comparison + +Compare baseline vs reform for the same household. + +### With calculate_household_impact + +```python +from policyengine.core import Policy, Parameter, ParameterValue +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + calculate_household_impact, + us_latest, +) +import datetime + +# Define reform +param = Parameter( + name="gov.irs.credits.ctc.amount.base_amount", + tax_benefit_model_version=us_latest, + data_type=float, +) +pv = ParameterValue( + parameter=param, + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=5000, +) +policy = Policy(name="CTC Expansion", parameter_values=[pv]) + +household = USHouseholdInput( + people=[ + {"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}, + {"age": 8, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code_str": "CA"}, + year=2026, +) + +baseline = calculate_household_impact(household) +reform = calculate_household_impact(household, policy=policy) + +change = reform.household["household_net_income"] - baseline.household["household_net_income"] +``` + +### With Simulation + Situation Dict + +```python +reform_dict = { + "gov.irs.credits.ctc.amount.base_amount": { + "2026-01-01.2026-12-31": 5000 + } +} + +sim_baseline = Simulation(situation=situation) +sim_reform = Simulation(situation=situation, reform=reform_dict) + +baseline_net = sim_baseline.calculate("household_net_income", "2026") +reform_net = sim_reform.calculate("household_net_income", "2026") +change = reform_net - baseline_net +``` + +--- + +## Impact Types + +### Net income change + +The most common household impact. Shows the bottom-line dollar difference. + +```python +baseline_net = baseline.household["household_net_income"] +reform_net = reform.household["household_net_income"] +change = reform_net - baseline_net +``` + +### Tax component breakdown + +Show which specific taxes/credits change and by how much. + +```python +components = { + "Income tax": reform.tax_unit[0]["income_tax"] - baseline.tax_unit[0]["income_tax"], + "Payroll tax": reform.tax_unit[0]["employee_payroll_tax"] - baseline.tax_unit[0]["employee_payroll_tax"], + "EITC": reform.tax_unit[0]["eitc"] - baseline.tax_unit[0]["eitc"], + "CTC": reform.tax_unit[0]["ctc"] - baseline.tax_unit[0]["ctc"], + "SNAP": reform.spm_unit[0]["snap"] - baseline.spm_unit[0]["snap"], +} +``` + +### Effective marginal tax rate + +How much of an additional dollar of earnings the household keeps. + +```python +# Using axes for smooth marginal rate curve +situation_axes = { + # ... household setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 200_000, + "period": "2026", + }]], +} + +sim = Simulation(situation=situation_axes) +incomes = sim.calculate("employment_income", "2026") +net_incomes = sim.calculate("household_net_income", "2026") + +# Marginal rate = 1 - (change in net income / change in gross income) +marginal_rates = 1 - np.diff(net_incomes) / np.diff(incomes) +``` + +### Benefit eligibility cliff + +Show where benefits phase out sharply. + +```python +sim = Simulation(situation=situation_axes) +incomes = sim.calculate("employment_income", "2026") +snap = sim.calculate("snap", "2026") +medicaid = sim.calculate("medicaid", "2026") +eitc = sim.calculate("eitc", "2026") + +# Plot each benefit against income to show cliffs +``` + +--- + +## Charts for Household Posts + +Produce these charts for household-level analysis. See blog-pipeline-skill for full Plotly styling details and additional chart types. + +### Required charts + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Net income curve** | Baseline vs reform net income across earnings range | Every household post | +| **Marginal tax rate curve** | Effective MTR across earnings range, baseline vs reform | Posts about tax rate changes or benefit cliffs | +| **Component breakdown bar** | Which taxes/credits drive the net income change | Posts where multiple programs interact | + +### Optional charts (use when relevant) + +| Chart | What it shows | When to use | +|-------|---------------|-------------| +| **Benefit cliff chart** | Individual benefit amounts (EITC, CTC, SNAP) vs income | Posts about benefit interactions or phase-outs | +| **State comparison bar** | Same household, different states | Posts about state-level variation | +| **Filing status comparison** | Same income, different filing statuses | Posts about marriage penalties or filing status effects | +| **Waterfall** | Tax component decomposition for one household | Posts breaking down a complex reform | + +### Custom charts + +For topic-specific visuals not listed above, follow these rules: +- Use PE brand colors: `TEAL = "#39C6C0"`, `BLUE = "#2C6496"`, `RED = "#DC2626"` +- Plotly with `template="plotly_white"`, `font=dict(family="Inter, sans-serif")` +- Save as PNG at 1200x600, scale=2 +- Write alt text with chart type and 2-3 key data points +- Include in results.json under `"charts"` with `url`, `alt`, `source_line`, `source_url` + +## Tables for Household Posts + +### Required tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Household impact table** | Household type, Income, Filing status, Net income change | Every household post | +| **Component breakdown table** | Component (income tax, EITC, CTC, SNAP...), Baseline, Reform, Change | Posts where multiple programs interact | + +### Optional tables + +| Table | Columns | When to use | +|-------|---------|-------------| +| **Parameter comparison** | Parameter, Current law, Reform | Posts introducing the reform details | +| **State comparison** | State, Net income change, Key driver | Posts about state variation | +| **Income sweep summary** | Income level, Baseline net, Reform net, Change, MTR | Posts with detailed income range analysis | +| **Benefit eligibility** | Income level, EITC, CTC, SNAP, Medicaid, Total benefits | Posts about benefit cliffs | + +### Custom tables + +For topic-specific tables, follow these rules: +- Include in results.json with `headers`, `rows`, `source_line`, `source_url` +- Pre-format values as display strings ("$1,200", "12.4%") +- Use `{{table:name}}` in blog post markdown +- Keep under 15 rows + +--- + +## Generating results.json for Household Analysis + +Household analyses produce results.json with the same schema as microsimulation analyses, but values come from specific households rather than population aggregates. Use `tracked_value()` for automatic source line tracking and `ResultsJson` for schema validation. + +```python +from policyengine.results import ( + ResultsJson, ResultsMetadata, ValueEntry, TableEntry, tracked_value, +) + +REPO = "PolicyEngine/ctc-expansion" + +households = { + "single_no_kids": {"filing": "SINGLE", "income": 50_000, "children": 0}, + "single_parent_2": {"filing": "HOH", "income": 50_000, "children": 2}, + "married_2": {"filing": "JOINT", "income": 100_000, "children": 2}, +} + +values = {} +rows = [] +for name, params in households.items(): + # ... calculate baseline and reform ... + change = reform_net - baseline_net + + # tracked_value() captures this line number automatically + values[f"{name}_change"] = ValueEntry(**tracked_value( + value=float(change), + display=f"${abs(change):,.0f}", + repo=REPO, + )) + rows.append([name, f"${params['income']:,}", f"${change:,.0f}"]) + +import inspect +table_line = inspect.currentframe().f_lineno +tables = { + "household_impacts": TableEntry( + title="Household impact by family type", + headers=["Household", "Income", "Net income change"], + rows=rows, + source_line=table_line, + source_url=f"https://github.com/{REPO}/blob/main/analysis.py#L{table_line}", + ), +} + +results = ResultsJson( + metadata=ResultsMetadata( + title="CTC Expansion Household Impacts", + repo=REPO, + country_id="us", + year=2026, + ), + values=values, + tables=tables, +) +results.write("results.json") +``` + +--- + +## Writing Household Case Studies + +Follow policyengine-writing-skill for all blog post text. + +**✅ Correct (specific, neutral, active):** +``` +A single parent of two children earning $50,000 sees a $1,000 increase +in net income: $800 from the expanded CTC and $200 from lower income +tax withholding. +``` + +**❌ Wrong (vague, value judgment):** +``` +Working families see significant benefits from the reform, with +substantial increases to their take-home pay. +``` + +**✅ Correct (shows calculation):** +``` +A married couple with two children earning $100,000 receives $10,000 +in CTC under the reform, compared to $4,000 under current law — a +$6,000 increase ($3,000 per child × 2 children). +``` + +**❌ Wrong (hides calculation):** +``` +A married couple sees their CTC more than double under the reform. +``` + +--- + +## Common US Variables + +| Variable | Entity | Description | +|----------|--------|-------------| +| `household_net_income` | household | Total income after taxes and benefits | +| `income_tax` | tax_unit | Federal income tax liability | +| `state_income_tax` | tax_unit | State income tax liability | +| `employee_payroll_tax` | tax_unit | Employee-side payroll taxes | +| `eitc` | tax_unit | Earned Income Tax Credit | +| `ctc` | tax_unit | Child Tax Credit | +| `snap` | spm_unit | SNAP (food stamps) benefits | +| `ssi` | spm_unit | Supplemental Security Income | +| `tanf` | spm_unit | Temporary Assistance for Needy Families | +| `medicaid` | person | Medicaid eligibility value | +| `employment_income` | person | Wages and salary | +| `self_employment_income` | person | Self-employment income | +| `social_security` | person | Social Security benefits | + +--- + +## Checklist + +Before publishing household analysis: + +- [ ] All 6 entity groups defined for each household (household, tax_unit, family, spm_unit, marital_unit, person) +- [ ] Filing status matches household composition (SINGLE, JOINT, HEAD_OF_HOUSEHOLD) +- [ ] State specified via `state_code_str` or `state_name` +- [ ] Representative households cover relevant filing statuses and income ranges +- [ ] Baseline and reform both calculated for the same household +- [ ] Component breakdown shows which taxes/benefits drive the net change +- [ ] All values in results.json with `source_line` and `source_url` +- [ ] Blog text uses `{{}}` template references, not hard-coded numbers +- [ ] Neutral language — no value judgments on who "benefits" or "loses" +- [ ] Calculations shown explicitly (e.g., "$3,000 per child × 2 = $6,000") + +--- + +## Resources + +- blog-pipeline-skill — results.json schema, template syntax, chart generation +- policyengine-writing-skill — neutral tone, active voice, quantitative precision +- policyengine-analysis-skill — population-level patterns, Plotly charts +- policyengine-us-skill — US tax/benefit system domain knowledge