diff --git a/us/states/sc/data_exploration.ipynb b/us/states/sc/data_exploration.ipynb new file mode 100644 index 0000000..09787fd --- /dev/null +++ b/us/states/sc/data_exploration.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SC Dataset Exploration\n", + "\n", + "This notebook explores the South Carolina (SC) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load SC dataset\n", + "sim = Microsimulation(dataset=SC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 35,324\n", + "Household count (weighted): 1,887,388\n", + "Person count (weighted): 5,451,832\n" + ] + } + ], + "source": [ + "# Check dataset size\n", + "household_weight = sim.calculate(\"household_weight\", period=2025)\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\")\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\")\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n", + "print(f\"Person count (weighted): {person_count.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Check income distribution (weighted vs unweighted, household and person level)\nagi_household = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\")\nagi_hh_array = np.array(agi_household)\nhh_weights = np.array(sim.calculate(\"household_weight\", period=2025))\n\nagi_person = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"person\")\nagi_person_array = np.array(agi_person)\nperson_weights = np.array(sim.calculate(\"person_weight\", period=2025))\n\n# Weighted percentile calculation\ndef weighted_percentile(values, weights, percentile):\n sorted_indices = np.argsort(values)\n sorted_values = values[sorted_indices]\n sorted_weights = weights[sorted_indices]\n cumulative_weight = np.cumsum(sorted_weights)\n idx = np.searchsorted(cumulative_weight, cumulative_weight[-1] * percentile / 100)\n return sorted_values[min(idx, len(sorted_values)-1)]\n\n# Unweighted medians\nunweighted_median_hh = np.median(agi_hh_array)\nunweighted_median_person = np.median(agi_person_array)\n\n# Weighted medians\nweighted_median_hh = weighted_percentile(agi_hh_array, hh_weights, 50)\nweighted_median_person = weighted_percentile(agi_person_array, person_weights, 50)\n\n# Weighted averages\nweighted_avg_hh = np.average(agi_hh_array, weights=hh_weights)\nweighted_avg_person = np.average(agi_person_array, weights=person_weights)\n\n# Average household size\ntotal_persons = person_weights.sum()\ntotal_households = hh_weights.sum()\navg_hh_size = total_persons / total_households\n\nprint(\"=\" * 60)\nprint(\"INCOME DISTRIBUTION SUMMARY\")\nprint(\"=\" * 60)\nprint(f\"\\nHousehold AGI:\")\nprint(f\" Unweighted median: ${unweighted_median_hh:,.0f}\")\nprint(f\" Weighted median: ${weighted_median_hh:,.0f}\")\nprint(f\" Weighted average: ${weighted_avg_hh:,.0f}\")\n\nprint(f\"\\nPerson AGI:\")\nprint(f\" Unweighted median: ${unweighted_median_person:,.0f}\")\nprint(f\" Weighted median: ${weighted_median_person:,.0f}\")\nprint(f\" Weighted average: ${weighted_avg_person:,.0f}\")\n\nprint(f\"\\nAverage household size: {avg_hh_size:.1f}\")\n\nprint(f\"\\nWeighted household AGI percentiles:\")\nprint(f\" 25th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\")\nprint(f\" 50th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 50):,.0f}\")\nprint(f\" 75th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\")\nprint(f\" 90th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\")\nprint(f\" 95th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\")\nprint(f\" Max AGI: ${agi_hh_array.max():,.0f}\")" + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 598,564\n", + " Households with 1 child: 247,956\n", + " Households with 2 children: 190,545\n", + " Households with 3+ children: 160,063\n" + ] + } + ], + "source": [ + "# Check households with children\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\")\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\")\n", + "household_weight = sim.calculate(\"household_weight\", period=2025, map_to=\"person\")\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,198,147\n", + " Children under 6: 349,101\n", + " Children under 3: 169,412\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\"),\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\"),\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\"),\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\"),\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\")\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Create comprehensive summary table\nsummary_data = {\n 'Metric': [\n 'Household count (weighted)',\n 'Person count (weighted)',\n 'Average household size',\n 'Weighted median household AGI',\n 'Weighted average household AGI',\n 'Weighted median person AGI',\n 'Weighted average person AGI',\n 'Unweighted median household AGI',\n 'Unweighted median person AGI',\n '25th percentile household AGI',\n '75th percentile household AGI',\n '90th percentile household AGI',\n '95th percentile household AGI',\n 'Max household AGI',\n 'Total households with children',\n 'Households with 1 child',\n 'Households with 2 children',\n 'Households with 3+ children',\n 'Total children under 18',\n 'Children under 6',\n 'Children under 3'\n ],\n 'Value': [\n f\"{household_count.sum():,.0f}\",\n f\"{person_count.sum():,.0f}\",\n f\"{avg_hh_size:.1f}\",\n f\"${weighted_median_hh:,.0f}\",\n f\"${weighted_avg_hh:,.0f}\",\n f\"${weighted_median_person:,.0f}\",\n f\"${weighted_avg_person:,.0f}\",\n f\"${unweighted_median_hh:,.0f}\",\n f\"${unweighted_median_person:,.0f}\",\n f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n f\"${agi_hh_array.max():,.0f}\",\n f\"{total_households_with_children:,.0f}\",\n f\"{households_with_1_child:,.0f}\",\n f\"{households_with_2_children:,.0f}\",\n f\"{households_with_3plus_children:,.0f}\",\n f\"{total_children:,.0f}\",\n f\"{children_under_6:,.0f}\",\n f\"{children_under_3:,.0f}\"\n ]\n}\n\nsummary_df = pd.DataFrame(summary_data)\n\nprint(\"\\n\" + \"=\"*65)\nprint(\"SC DATASET SUMMARY - WEIGHTED (Population Estimates)\")\nprint(\"=\"*65)\nprint(summary_df.to_string(index=False))\nprint(\"=\"*65)\n\n# Save table\nsummary_df.to_csv('sc_dataset_summary_weighted.csv', index=False)\nprint(\"\\nSummary saved to: sc_dataset_summary_weighted.csv\")" + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 179,119\n", + "Percentage of all households: 9.49%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income\n", + "agi_hh = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 434,505 23.02%\n", + " $10k-$20k 155,370 8.23%\n", + " $20k-$30k 149,595 7.93%\n", + " $30k-$40k 115,365 6.11%\n", + " $40k-$50k 127,566 6.76%\n", + " $50k-$60k 110,405 5.85%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 1,092,805\n", + "Percentage of all households in $0-$60k range: 57.90%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/us/states/sc/data_exploration_staging.ipynb b/us/states/sc/data_exploration_staging.ipynb new file mode 100644 index 0000000..b797ac0 --- /dev/null +++ b/us/states/sc/data_exploration_staging.ipynb @@ -0,0 +1,508 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC Dataset Exploration (Staging)\n", + "\n", + "This notebook explores the South Carolina (SC) **staging** dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/staging/states/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2eb0b3ac0b824f52a3a6066931afc5ac", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "SC.h5: 0%| | 0.00/38.1M [00:00 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,157,115\n", + " Children under 6: 350,517\n", + " Children under 3: 177,575\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\"),\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\"),\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\"),\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\"),\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\")\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=================================================================\n", + "SC STAGING DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "=================================================================\n", + " Metric Value\n", + " Household count (weighted) 1,587,912\n", + " Person count (weighted) 4,788,894\n", + " Average household size 3.0\n", + " Weighted median household AGI $60,378\n", + " Weighted average household AGI $105,617\n", + " Weighted median person AGI $56,366\n", + " Weighted average person AGI $100,583\n", + "Unweighted median household AGI $67,907\n", + " Unweighted median person AGI $67,460\n", + " 25th percentile household AGI $27,117\n", + " 75th percentile household AGI $105,515\n", + " 90th percentile household AGI $153,279\n", + " 95th percentile household AGI $237,856\n", + " Max household AGI $353,653,602\n", + " Total households with children 637,558\n", + " Households with 1 child 308,153\n", + " Households with 2 children 190,106\n", + " Households with 3+ children 139,299\n", + " Total children under 18 1,157,115\n", + " Children under 6 350,517\n", + " Children under 3 177,575\n", + "=================================================================\n", + "\n", + "Summary saved to: sc_staging_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create comprehensive summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Average household size',\n", + " 'Weighted median household AGI',\n", + " 'Weighted average household AGI',\n", + " 'Weighted median person AGI',\n", + " 'Weighted average person AGI',\n", + " 'Unweighted median household AGI',\n", + " 'Unweighted median person AGI',\n", + " '25th percentile household AGI',\n", + " '75th percentile household AGI',\n", + " '90th percentile household AGI',\n", + " '95th percentile household AGI',\n", + " 'Max household AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{household_count.sum():,.0f}\",\n", + " f\"{person_count.sum():,.0f}\",\n", + " f\"{avg_hh_size:.1f}\",\n", + " f\"${weighted_median_hh:,.0f}\",\n", + " f\"${weighted_avg_hh:,.0f}\",\n", + " f\"${weighted_median_person:,.0f}\",\n", + " f\"${weighted_avg_person:,.0f}\",\n", + " f\"${unweighted_median_hh:,.0f}\",\n", + " f\"${unweighted_median_person:,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n", + " f\"${agi_hh_array.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*65)\n", + "print(\"SC STAGING DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*65)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*65)\n", + "\n", + "# Save table\n", + "summary_df.to_csv('sc_staging_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: sc_staging_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 9,513\n", + "Percentage of all households: 0.60%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income\n", + "agi_hh = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 136,673 8.61%\n", + " $10k-$20k 154,618 9.74%\n", + " $20k-$30k 148,652 9.36%\n", + " $30k-$40k 125,157 7.88%\n", + " $40k-$50k 121,742 7.67%\n", + " $50k-$60k 101,082 6.37%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 787,925\n", + "Percentage of all households in $0-$60k range: 49.62%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis_comparison.md b/us/states/sc/h4216_analysis_comparison.md new file mode 100644 index 0000000..d4a235a --- /dev/null +++ b/us/states/sc/h4216_analysis_comparison.md @@ -0,0 +1,199 @@ +# SC H.4216 Analysis Comparison: PolicyEngine vs RFA + +## Executive Summary + +**UPDATE (Feb 2025):** PR #7514 fixed a bug where `sc_additions` (QBI and SALT addbacks) were incorrectly applied under H.4216. Since H.4216 starts from AGI (before federal deductions), addbacks are inappropriate. With this fix, PolicyEngine estimates approximately **-$110.9M** vs RFA's **-$119.1M** (~93% accuracy). + +--- + +### Original Analysis (Pre-Fix) + +The original $159M difference between PolicyEngine (+$39.8M) and RFA (-$119.1M) was driven by: +1. **Bug**: `sc_additions` were being applied when starting from AGI (fixed in PR #7514) +2. **Different income distributions** in the underlying data + +## Summary + +| Metric | RFA | PolicyEngine | Difference | +|--------|-----|--------------|------------| +| **General Fund Impact** | **-$119.1M** | **+$39.8M** | **+$158.9M** | +| Total Returns | 2,757,573 | 2,935,621 | +178,048 | +| Tax Decrease % | 38.7% | 20.0% | -18.7pp | +| Tax Increase % | 26.7% | 24.0% | -2.7pp | +| No Change % | 34.6% | 56.0% | +21.4pp | + +## Top 5 Discrepancies by Income Bracket + +| AGI Range | RFA Impact | PE Impact | Difference | +|-----------|------------|-----------|------------| +| Over $1,000,000 | -$13.8M | -$115.3M | -$101.5M | +| $50,001-$75,000 | -$82.1M | -$23.3M | +$58.9M | +| $100,001-$150,000 | +$3.1M | +$53.4M | +$50.3M | +| $300,001-$500,000 | -$4.6M | +$40.6M | +$45.3M | +| $500,001-$1,000,000 | -$16.2M | +$18.7M | +$34.9M | + +## Key Differences Explaining the $159M Gap + +### 1. Upper-Middle Income ($100k-$500k): PE Shows Much Larger Tax Increases + +| Bracket | RFA Avg Change | PE Avg Change | Direction | +|---------|----------------|---------------|-----------| +| $100k-$150k | +$11 | +$284 | Both increase, PE 25x larger | +| $150k-$200k | +$355 | +$727 | Both increase, PE 2x larger | +| $300k-$500k | **-$82** | **+$1,099** | RFA: decrease, PE: increase | +| $500k-$1M | **-$631** | **+$1,129** | RFA: decrease, PE: increase | + +**This is the primary driver of the difference.** PolicyEngine shows significant tax INCREASES in the $100k-$500k range where RFA shows small increases or even decreases. + +### 2. Middle Income ($30k-$100k): PE Shows Smaller Tax Cuts + +| Bracket | RFA Avg Change | PE Avg Change | +|---------|----------------|---------------| +| $30k-$40k | -$72 | -$23 | +| $40k-$50k | -$179 | -$135 | +| $50k-$75k | -$202 | -$77 | +| $75k-$100k | -$146 | -$71 | + +RFA shows 2-3x larger tax cuts in these brackets. + +### 3. Over $1M: PE Shows Much Larger Tax Cuts + +| Metric | RFA | PE | +|--------|-----|-----| +| Avg Change | -$1,154 | -$5,082 | +| Total Impact | -$13.8M | -$115.3M | + +PE shows 4-8x larger tax cuts for millionaires, but with more returns (22,686 vs 11,936). + +### 4. Low Income ($0-$30k): Different Tax Bases + +RFA shows existing tax liability for low-income filers ($50, $3, $16, $107 avg), while PE shows $0 for most low-income brackets. This suggests: +- Different baseline calculations +- Different treatment of non-filers +- CPS data may underrepresent low-income tax filers + +## Return Count Comparison (Key Finding) + +| AGI Range | RFA Returns | PE Returns | PE/RFA Ratio | +|-----------|-------------|------------|--------------| +| $0* | 78,854 | 619,009 | **7.85x** | +| $1-$10k | 286,253 | 502,276 | 1.75x | +| $10k-$20k | 310,122 | 279,412 | 0.90x | +| $20k-$30k | 275,560 | 252,862 | 0.92x | +| $30k-$40k | 269,566 | 215,980 | 0.80x | +| $40k-$50k | 234,386 | 197,525 | 0.84x | +| $50k-$75k | 407,593 | 300,857 | **0.74x** | +| $75k-$100k | 250,437 | 177,284 | **0.71x** | +| $100k-$150k | 298,343 | 187,945 | **0.63x** | +| $150k-$200k | 143,398 | 73,396 | **0.51x** | +| $200k-$300k | 109,340 | 52,882 | **0.48x** | +| $300k-$500k | 56,123 | 36,977 | 0.66x | +| $500k-$1M | 25,664 | 16,525 | 0.64x | +| Over $1M | 11,936 | 22,686 | **1.90x** | +| **Total** | **2,757,573** | **2,935,621** | 1.06x | + +**Key observations:** +- PE has **7.85x more** $0 income returns - **PE counts all tax units (including non-filers), RFA only counts actual filers** +- PE has **~50% fewer** returns in $100k-$300k brackets +- PE has **1.9x more** millionaire returns + +**Important note:** RFA uses actual SC tax return data (filers only). PolicyEngine uses CPS-based data representing all tax units regardless of filing status. This explains the large discrepancy in low-income brackets where many households don't file. + +## Baseline Tax Liability Comparison + +| AGI Range | RFA Avg Tax | PE Avg Tax | Difference | +|-----------|-------------|------------|------------| +| $0-$10k | $3-$50 | $0 | PE shows no tax | +| $50k-$75k | $1,192 | $822 | PE 31% lower | +| $100k-$150k | $3,258 | $3,292 | Similar | +| Over $1M | $78,228 | **$139,623** | PE **78% higher** | + +## Total Baseline Revenue Comparison + +| Bracket | RFA Revenue | PE Revenue | Difference | +|---------|-------------|------------|------------| +| $0-$100k | $1.24B | $0.74B | -$0.50B | +| $100k-$1M | $4.22B | $2.61B | -$1.61B | +| Over $1M | $0.93B | **$3.17B** | **+$2.23B** | +| **Total** | **$6.40B** | **$6.52B** | +$0.12B (+1.8%) | + +**Critical insight:** Total baseline revenue is similar, but PE derives **48%** of SC income tax from millionaires vs RFA's **15%**. + +## Likely Causes + +### 1. Implementation Details (from PR #7494) + +**Baseline SC Taxable Income:** +```python +taxable_income = federal_taxable_income + sc_additions - sc_subtractions +``` +Where `federal_taxable_income` = AGI - standard/itemized deduction - QBI deduction + +**H.4216 SC Taxable Income:** +```python +taxable_income = AGI + sc_additions - sc_subtractions - SCIAD +``` +Where SCIAD phases out from $40k-$190k AGI (varies by filing status) + +**Key Insight**: The reform switches from using federal taxable income (after federal deductions) to using AGI minus SCIAD. For taxpayers who itemize large deductions or have QBI deductions, this could result in HIGHER taxable income under H.4216. + +### 2. SCIAD Phase-out Creates Winners and Losers + +| Filing Status | SCIAD Amount | Phase-out Start | Phase-out End | +|---------------|--------------|-----------------|---------------| +| Single | $15,000 | $40,000 | $95,000 | +| MFJ | $30,000 | $80,000 | $190,000 | +| HoH | $22,500 | $60,000 | $142,500 | + +For taxpayers above phase-out thresholds with SCIAD = $0: +- If their federal deduction was > $0, they lose that deduction entirely +- This explains why PE shows large tax INCREASES for $100k-$500k brackets + +### 3. Baseline Tax Differences +PE baseline avg tax ($2,220) is lower than RFA ($2,321), suggesting different starting points for current law calculations. + +### 4. Data Source Differences +- **RFA**: SC Department of Revenue 2024 tax returns (95% sample, inflated to 100%) +- **PE**: CPS-based synthetic data for South Carolina + +Tax return data captures actual filers with precise income/deduction information. CPS-based data may: +- Over/underrepresent certain income groups +- Miss nuances in itemized vs standard deduction usage +- Have different filing status distributions + +### 5. Federal Deduction Treatment +H.4216 eliminates federal standard/itemized deductions. The impact depends heavily on: +- Current deduction amounts by income level +- How many taxpayers itemize vs take standard deduction +- QBI deduction amounts (not replaced by SCIAD) + +RFA has actual deduction data; PE estimates from CPS. + +## Net Effect + +The $159M difference primarily comes from: +1. **+$140M**: PE shows larger tax increases in $100k-$500k brackets +2. **+$59M**: PE shows smaller tax cuts in $30k-$100k brackets +3. **-$102M**: PE shows larger tax cuts for over $1M bracket +4. **+$60M**: Various other bracket differences + +**Bottom line**: PolicyEngine's model shows the SCIAD phase-out creating more tax increases for upper-middle income taxpayers than RFA estimates, which more than offsets the tax cuts elsewhere. + +## Conclusion + +The $159M difference is **not primarily a calculation issue** but stems from: + +1. **Different populations**: PE counts all tax units (filers + non-filers), RFA counts only actual filers. This explains 540k extra returns in the $0 bracket. + +2. **Different income distributions**: PE's CPS-based data has far more millionaires (22.7k vs 12k) paying much higher average taxes ($140k vs $78k) + +3. **Different return counts**: PE undercounts middle-income filers ($50k-$300k) by 40-50% + +4. **Millionaire impact drives divergence**: H.4216 gives large tax cuts to millionaires. With PE having 2x more millionaires paying 2x higher taxes, the reform's impact on this group dominates. + +### Recommendation + +To align with RFA, PolicyEngine would need to: +- Recalibrate SC state weights to match actual tax return distributions +- Validate millionaire counts and income levels against IRS SOI data +- Investigate why baseline tax for millionaires is so much higher than RFA diff --git a/us/states/sc/rfa_h4216_analysis.csv b/us/states/sc/rfa_h4216_analysis.csv new file mode 100644 index 0000000..43991c5 --- /dev/null +++ b/us/states/sc/rfa_h4216_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,78854,2.9%,$50,$43,1080,1.4%,$3683,$3154,-$529,-$571000,575,0.7%,-$606000,-$1054,505,0.6%,$35000,$69,77774,98.6%,77824,98.7% +$1 to $10000,286253,10.4%,$3,$9,43699,15.3%,$20,$58,$38,$1655000,834,0.3%,-$76000,-$91,42865,15.0%,$1731000,$40,242554,84.7%,243249,85.0% +$10001 to $20000,310122,11.2%,$16,$26,75652,24.4%,$67,$105,$38,$2872000,5591,1.8%,-$360000,-$64,70060,22.6%,$3232000,$46,234471,75.6%,235107,75.8% +$20001 to $30000,275560,10.0%,$107,$110,140713,51.1%,$210,$216,$5,$769000,51548,18.7%,-$2676000,-$52,89165,32.4%,$3445000,$39,134847,48.9%,134332,48.7% +$30001 to $40000,269566,9.8%,$288,$216,160474,59.5%,$483,$362,-$121,-$19360000,131750,48.9%,-$21067000,-$160,28724,10.7%,$1707000,$59,109091,40.5%,110638,41.0% +$40001 to $50000,234386,8.5%,$569,$390,174112,74.3%,$767,$526,-$241,-$41986000,127503,54.4%,-$46301000,-$363,46609,19.9%,$4315000,$93,60274,25.7%,61884,26.4% +$50001 to $75000,407593,14.8%,$1192,$990,351715,86.3%,$1381,$1148,-$234,-$82146000,286705,70.3%,-$93552000,-$326,65010,15.9%,$11406000,$175,55877,13.7%,61644,15.1% +$75001 to $100000,250437,9.1%,$2020,$1874,225176,89.9%,$2247,$2085,-$162,-$36461000,173939,69.5%,-$51076000,-$294,51237,20.5%,$14615000,$285,25261,10.1%,27341,10.9% +$100001 to $150000,298343,10.8%,$3258,$3269,289966,97.2%,$3352,$3363,$11,$3115000,175398,58.8%,-$35022000,-$200,114568,38.4%,$38137000,$333,8377,2.8%,8450,2.8% +$150001 to $200000,143398,5.2%,$5518,$5873,141749,98.9%,$5582,$5942,$359,$50933000,19752,13.8%,-$6653000,-$337,121997,85.1%,$57586000,$472,1649,1.1%,1210,0.8% +$200001 to $300000,109340,4.0%,$8741,$9077,108086,98.9%,$8842,$9182,$340,$36718000,29527,27.0%,-$10562000,-$358,78560,71.8%,$47280000,$602,1253,1.1%,791,0.7% +$300001 to $500000,56123,2.0%,$14926,$14844,55098,98.2%,$15204,$15120,-$84,-$4627000,36199,64.5%,-$25411000,-$702,18898,33.7%,$20784000,$1100,1025,1.8%,688,1.2% +$500001 to $1000000,25664,0.9%,$25969,$25338,24764,96.5%,$26912,$26258,-$654,-$16195000,18325,71.4%,-$32991000,-$1800,6439,25.1%,$16796000,$2608,900,3.5%,649,2.5% +Over $1000000,11936,0.4%,$78228,$77074,11163,93.5%,$83646,$82413,-$1233,-$13767000,8187,68.6%,-$62365000,-$7617,2975,24.9%,$48598000,$16334,773,6.5%,666,5.6% +Total,2757573,100.0%,$2321,$2277,1803447,65.4%,$3548,$3482,-$66,-$119100000,1065834,38.7%,-$388700000,-$365,737613,26.7%,$269600000,$366,954126,34.6%,964473,35.0% diff --git a/us/states/sc/sc_dataset_summary_weighted.csv b/us/states/sc/sc_dataset_summary_weighted.csv new file mode 100644 index 0000000..6ff9465 --- /dev/null +++ b/us/states/sc/sc_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,887,388" +Person count (weighted),"5,451,832" +Average household size,2.9 +Weighted median household AGI,"$43,222" +Weighted average household AGI,"$103,858" +Weighted median person AGI,"$38,962" +Weighted average person AGI,"$93,926" +Unweighted median household AGI,"$41,884" +Unweighted median person AGI,"$40,216" +25th percentile household AGI,"$9,425" +75th percentile household AGI,"$91,877" +90th percentile household AGI,"$167,068" +95th percentile household AGI,"$268,311" +Max household AGI,"$6,430,892" +Total households with children,"598,564" +Households with 1 child,"247,956" +Households with 2 children,"190,545" +Households with 3+ children,"160,063" +Total children under 18,"1,198,147" +Children under 6,"349,101" +Children under 3,"169,412" diff --git a/us/states/sc/sc_h4216_budget_impact.py b/us/states/sc/sc_h4216_budget_impact.py new file mode 100644 index 0000000..be53250 --- /dev/null +++ b/us/states/sc/sc_h4216_budget_impact.py @@ -0,0 +1,68 @@ +""" +SC H.4216 Budget Impact Analysis +Simple script to calculate the budgetary impact of H.4216 with default 5.21% top rate. +""" + +from policyengine_us import Microsimulation +from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216 +from policyengine_core.reforms import Reform +import numpy as np + +SC_DATASET = "hf://policyengine/policyengine-us-data/staging/states/SC.h5" +TAX_YEAR = 2026 + +def create_h4216_reform(): + """ + SC H.4216 Reform with default rates: + - 1.99% up to $30k + - 5.21% over $30k (default) + """ + param_reform = Reform.from_dict( + { + "gov.contrib.states.sc.h4216.in_effect": { + "2026-01-01.2100-12-31": True + } + }, + country_id="us", + ) + base_reform = create_sc_h4216() + return (base_reform, param_reform) + +print("Loading baseline...") +baseline = Microsimulation(dataset=SC_DATASET) + +print("Loading reform (H.4216 with 5.21% top rate)...") +reform = create_h4216_reform() +reform_sim = Microsimulation(dataset=SC_DATASET, reform=reform) + +# Calculate tax impact - use .values to get raw numpy arrays (avoid MicroSeries auto-weighting) +baseline_tax = baseline.calculate("sc_income_tax", period=TAX_YEAR, map_to="tax_unit").values +reform_tax = reform_sim.calculate("sc_income_tax", period=TAX_YEAR, map_to="tax_unit").values +weight = baseline.calculate("tax_unit_weight", period=TAX_YEAR).values + +tax_change = reform_tax - baseline_tax +budget_impact = (tax_change * weight).sum() + +# Summary stats (all using raw numpy arrays, no MicroSeries) +baseline_revenue = (baseline_tax * weight).sum() +reform_revenue = (reform_tax * weight).sum() +total_weight = weight.sum() + +pct_decrease = weight[tax_change < -1].sum() / total_weight * 100 +pct_increase = weight[tax_change > 1].sum() / total_weight * 100 +pct_unchanged = weight[np.abs(tax_change) <= 1].sum() / total_weight * 100 + +print("\n" + "="*60) +print("SC H.4216 BUDGET IMPACT (5.21% Top Rate)") +print("="*60) +print(f"\nBaseline SC Income Tax Revenue: ${baseline_revenue:,.0f}") +print(f"Reform SC Income Tax Revenue: ${reform_revenue:,.0f}") +print(f"\n>>> BUDGET IMPACT: ${budget_impact:,.0f} <<<") +print(f"\nRFA Estimate: -$119,100,000") +print(f"Difference from RFA: ${budget_impact - (-119100000):,.0f}") +print(f"Accuracy: {(1 - abs(budget_impact - (-119100000)) / 119100000) * 100:.1f}%") +print("\n" + "-"*60) +print(f"Tax units with DECREASE: {pct_decrease:.1f}%") +print(f"Tax units with INCREASE: {pct_increase:.1f}%") +print(f"Tax units UNCHANGED: {pct_unchanged:.1f}%") +print("="*60) diff --git a/us/states/sc/sc_h4216_reform_analysis.ipynb b/us/states/sc/sc_h4216_reform_analysis.ipynb new file mode 100644 index 0000000..9e7cbf2 --- /dev/null +++ b/us/states/sc/sc_h4216_reform_analysis.ipynb @@ -0,0 +1,813 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# South Carolina H.4216 Tax Reform Analysis (Tax Year 2026)\n", + "\n", + "This notebook analyzes the impact of SC H.4216 tax reform.\n", + "\n", + "## Proposal\n", + "- Apply a tax rate of 1.99% on taxable income up to $30,000 and 5.39% over\n", + "- Eliminate the federal standard or itemized deduction\n", + "- Allow a new SC Income Adjusted Deduction (SCIAD) at certain income levels\n", + "- Maintain all other state adjustments, exemptions, and credits\n", + "- Cap SC EITC at $200\n", + "\n", + "## Current 2026 Marginal Tax Rates\n", + "- 0% up to $3,640\n", + "- 3% $3,640 - $18,230\n", + "- 6% over $18,230\n", + "\n", + "## Proposed Tax Rates\n", + "- 1.99% up to $30,000\n", + "- 5.39% over $30,000\n", + "\n", + "## SC Deduction (SCIAD) Phase-out\n", + "| Filing Status | Amount | Phase-out Start | Phase-out End |\n", + "|---------------|--------|-----------------|---------------|\n", + "| Single | $15,000 | $40,000 | $95,000 |\n", + "| Married Joint | $30,000 | $80,000 | $190,000 |\n", + "| Head of Household | $22,500 | $60,000 | $142,500 |\n", + "\n", + "## Implementation Note\n", + "This analysis uses the corrected H.4216 implementation (PR #7514) which properly handles SC additions.\n", + "The fix removes `sc_additions` from the H.4216 taxable income formula since H.4216 starts from AGI\n", + "(before federal deductions), making addbacks for QBI and SALT inappropriate." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/staging/states/SC.h5\"\n", + "TAX_YEAR = 2026 # Renamed to avoid conflict with YEAR constant from model_api" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform function defined!\n" + ] + } + ], + "source": [ + "from policyengine_us.model_api import *\n", + "\n", + "def create_h4216_reform():\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - Enable H.4216 via in_effect parameter\n", + " - Set rates: 1.99% up to $30k, 5.39% over $30k\n", + " \"\"\"\n", + " # Parameter changes via Reform.from_dict\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": 0.0539\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " \n", + " # Get base H.4216 reform (EITC cap, SCIAD, taxable income, tax calculation)\n", + " base_reform = create_sc_h4216()\n", + " \n", + " # Order: base reform first, then parameter overrides\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Reform function defined!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (current SC tax law)...\n", + "Baseline loaded\n", + "\n", + "Loading reform (H.4216 with 5.39% top rate)...\n", + "Reform loaded\n", + "\n", + "============================================================\n", + "All simulations ready!\n", + "============================================================\n" + ] + } + ], + "source": [ + "print(\"Loading baseline (current SC tax law)...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "print(\"Baseline loaded\")\n", + "\n", + "print(\"\\nLoading reform (H.4216 with 5.39% top rate)...\")\n", + "reform = create_h4216_reform()\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=reform)\n", + "print(\"Reform loaded\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"All simulations ready!\")\n", + "print(\"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Tax Impacts by Income Bracket" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 33,248\n", + "Weighted tax units (returns): 1,905,181\n" + ] + } + ], + "source": [ + "# Get tax unit level data\n", + "baseline_tax = np.array(baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\"))\n", + "reform_tax = np.array(reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\"))\n", + "agi = np.array(baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\"))\n", + "tax_unit_weight = np.array(baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR))\n", + "\n", + "# Calculate tax change\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units (returns): {tax_unit_weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results calculated!\n" + ] + } + ], + "source": [ + "# Define income brackets matching the RFA analysis\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10,000\"),\n", + " (10000, 20000, \"$10,001 to $20,000\"),\n", + " (20000, 30000, \"$20,001 to $30,000\"),\n", + " (30000, 40000, \"$30,001 to $40,000\"),\n", + " (40000, 50000, \"$40,001 to $50,000\"),\n", + " (50000, 75000, \"$50,001 to $75,000\"),\n", + " (75000, 100000, \"$75,001 to $100,000\"),\n", + " (100000, 150000, \"$100,001 to $150,000\"),\n", + " (150000, 200000, \"$150,001 to $200,000\"),\n", + " (200000, 300000, \"$200,001 to $300,000\"),\n", + " (300000, 500000, \"$300,001 to $500,000\"),\n", + " (500000, 1000000, \"$500,001 to $1,000,000\"),\n", + " (1000000, float('inf'), \"Over $1,000,000\")\n", + "]\n", + "\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Weighted counts\n", + " est_returns = tax_unit_weight[mask].sum()\n", + " pct_returns = est_returns / tax_unit_weight.sum() * 100\n", + " \n", + " # Tax liability\n", + " old_avg_tax = np.average(baseline_tax[mask], weights=tax_unit_weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=tax_unit_weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold of $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = tax_unit_weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_tax_changed = np.average(baseline_tax[change_mask], weights=tax_unit_weight[change_mask])\n", + " new_avg_tax_changed = np.average(reform_tax[change_mask], weights=tax_unit_weight[change_mask])\n", + " avg_change = new_avg_tax_changed - old_avg_tax_changed\n", + " else:\n", + " old_avg_tax_changed = 0\n", + " new_avg_tax_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * tax_unit_weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = tax_unit_weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * tax_unit_weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=tax_unit_weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = tax_unit_weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * tax_unit_weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=tax_unit_weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = tax_unit_weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax liability (under reform)\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = tax_unit_weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est. # Returns\": int(round(est_returns)),\n", + " \"% of Returns\": round(pct_returns, 1),\n", + " \"Old Avg Tax\": int(round(old_avg_tax)),\n", + " \"New Avg Tax\": int(round(new_avg_tax)),\n", + " \"Returns w/ Change\": int(round(returns_with_change)),\n", + " \"% w/ Change\": round(pct_with_change, 1),\n", + " \"Avg Change\": int(round(avg_change)),\n", + " \"Total Change ($)\": int(round(total_change)),\n", + " \"Decrease #\": int(round(decrease_returns)),\n", + " \"Decrease %\": round(decrease_pct, 1),\n", + " \"Total Decrease ($)\": int(round(total_decrease)),\n", + " \"Avg Decrease\": int(round(avg_decrease)),\n", + " \"Increase #\": int(round(increase_returns)),\n", + " \"Increase %\": round(increase_pct, 1),\n", + " \"Total Increase ($)\": int(round(total_increase)),\n", + " \"Avg Increase\": int(round(avg_increase)),\n", + " \"No Change #\": int(round(no_change_returns)),\n", + " \"No Change %\": round(no_change_pct, 1),\n", + " \"Zero Tax #\": int(round(zero_tax_returns)),\n", + " \"Zero Tax %\": round(zero_tax_pct, 1)\n", + " })\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Results calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate totals\n", + "total_returns = tax_unit_weight.sum()\n", + "total_old_tax = np.average(baseline_tax, weights=tax_unit_weight)\n", + "total_new_tax = np.average(reform_tax, weights=tax_unit_weight)\n", + "\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "total_returns_changed = tax_unit_weight[change_mask_all].sum()\n", + "total_change_amount = (tax_change * tax_unit_weight).sum()\n", + "\n", + "decrease_mask_all = tax_change < -1\n", + "total_decrease_returns = tax_unit_weight[decrease_mask_all].sum()\n", + "total_decrease_amount = (tax_change[decrease_mask_all] * tax_unit_weight[decrease_mask_all]).sum()\n", + "\n", + "increase_mask_all = tax_change > 1\n", + "total_increase_returns = tax_unit_weight[increase_mask_all].sum()\n", + "total_increase_amount = (tax_change[increase_mask_all] * tax_unit_weight[increase_mask_all]).sum()\n", + "\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "total_no_change_returns = tax_unit_weight[no_change_mask_all].sum()\n", + "\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "total_zero_tax_returns = tax_unit_weight[zero_tax_mask_all].sum()\n", + "\n", + "# Add totals row\n", + "totals = {\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est. # Returns\": int(round(total_returns)),\n", + " \"% of Returns\": 100.0,\n", + " \"Old Avg Tax\": int(round(total_old_tax)),\n", + " \"New Avg Tax\": int(round(total_new_tax)),\n", + " \"Returns w/ Change\": int(round(total_returns_changed)),\n", + " \"% w/ Change\": round(total_returns_changed / total_returns * 100, 1),\n", + " \"Avg Change\": int(round(total_new_tax - total_old_tax)),\n", + " \"Total Change ($)\": int(round(total_change_amount)),\n", + " \"Decrease #\": int(round(total_decrease_returns)),\n", + " \"Decrease %\": round(total_decrease_returns / total_returns * 100, 1),\n", + " \"Total Decrease ($)\": int(round(total_decrease_amount)),\n", + " \"Avg Decrease\": int(round(total_decrease_amount / total_decrease_returns)) if total_decrease_returns > 0 else 0,\n", + " \"Increase #\": int(round(total_increase_returns)),\n", + " \"Increase %\": round(total_increase_returns / total_returns * 100, 1),\n", + " \"Total Increase ($)\": int(round(total_increase_amount)),\n", + " \"Avg Increase\": int(round(total_increase_amount / total_increase_returns)) if total_increase_returns > 0 else 0,\n", + " \"No Change #\": int(round(total_no_change_returns)),\n", + " \"No Change %\": round(total_no_change_returns / total_returns * 100, 1),\n", + " \"Zero Tax #\": int(round(total_zero_tax_returns)),\n", + " \"Zero Tax %\": round(total_zero_tax_returns / total_returns * 100, 1)\n", + "}\n", + "\n", + "df_results = pd.concat([df_results, pd.DataFrame([totals])], ignore_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H. 4216 - ESTIMATED SOUTH CAROLINA INDIVIDUAL INCOME TAX IMPACT\n", + "Tax Year 2026\n", + "====================================================================================================\n", + "\n", + "Proposal: Apply a tax rate of 1.99% on taxable income up to $30,000 and 5.39% over,\n", + "eliminate the federal standard or itemized deduction, allow a new SC deduction at\n", + "certain income levels, and maintain all other state adjustments, exemptions, and credits.\n", + "====================================================================================================\n", + "\n", + "Impact: With this tax structure:\n", + " - 29.7% of taxpayers have a LOWER tax liability\n", + " - 26.8% of taxpayers have a HIGHER tax liability\n", + " - 43.5% are UNCHANGED\n", + "\n", + "General Fund Impact: $-110,942,720\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "print(\"=\"*100)\n", + "print(\"H. 4216 - ESTIMATED SOUTH CAROLINA INDIVIDUAL INCOME TAX IMPACT\")\n", + "print(f\"Tax Year {TAX_YEAR}\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nProposal: Apply a tax rate of 1.99% on taxable income up to $30,000 and 5.39% over,\")\n", + "print(f\"eliminate the federal standard or itemized deduction, allow a new SC deduction at\")\n", + "print(f\"certain income levels, and maintain all other state adjustments, exemptions, and credits.\")\n", + "print(\"=\"*100)\n", + "\n", + "# Summary stats\n", + "pct_decrease = total_decrease_returns / total_returns * 100\n", + "pct_increase = total_increase_returns / total_returns * 100\n", + "pct_unchanged = total_no_change_returns / total_returns * 100\n", + "\n", + "print(f\"\\nImpact: With this tax structure:\")\n", + "print(f\" - {pct_decrease:.1f}% of taxpayers have a LOWER tax liability\")\n", + "print(f\" - {pct_increase:.1f}% of taxpayers have a HIGHER tax liability\")\n", + "print(f\" - {pct_unchanged:.1f}% are UNCHANGED\")\n", + "print(f\"\\nGeneral Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Federal AGI Range Est. # Returns % of Returns Old Avg Tax New Avg Tax Total Change ($) Decrease # Decrease % Increase # Increase % No Change % Zero Tax %\n", + " $0* 58352 3.1 0 0 0 0 0.000000 0 0.000000 100.000000 100.000000\n", + " $1 to $10,000 168000 8.8 0 0 0 0 0.000000 0 0.000000 100.000000 100.000000\n", + " $10,001 to $20,000 205689 10.8 0 3 697809 0 0.000000 12906 6.300000 93.699997 93.699997\n", + " $20,001 to $30,000 226431 11.9 33 45 2860578 1427 0.600000 51406 22.700001 76.699997 75.699997\n", + " $30,001 to $40,000 174753 9.2 182 156 -4382816 57708 33.000000 22397 12.800000 54.200001 53.900002\n", + " $40,001 to $50,000 155837 8.2 319 244 -11700509 48322 31.000000 44468 28.500000 40.500000 40.500000\n", + " $50,001 to $75,000 262861 13.8 581 475 -27688580 136898 52.099998 77200 29.400000 18.600000 18.600000\n", + " $75,001 to $100,000 215040 11.3 1338 1161 -38227128 138525 64.400002 50694 23.600000 12.000000 12.300000\n", + " $100,001 to $150,000 278127 14.6 2928 3130 56022196 118122 42.500000 156517 56.299999 1.300000 1.000000\n", + " $150,001 to $200,000 49870 2.6 5124 5814 34402136 1551 3.100000 48319 96.900002 0.000000 0.000000\n", + " $200,001 to $300,000 40779 2.1 9149 9707 22764908 2048 5.000000 38672 94.800003 0.100000 0.000000\n", + " $300,001 to $500,000 42814 2.2 17785 17299 -20835856 35387 82.699997 6631 15.500000 1.900000 1.900000\n", + "$500,001 to $1,000,000 13719 0.7 27237 26665 -7850124 13076 95.300003 643 4.700000 0.000000 0.000000\n", + " Over $1,000,000 12909 0.7 113354 104291 -117005352 12703 98.400002 206 1.600000 0.000000 0.000000\n", + " Total 1905181 100.0 2399 2341 -110942720 565768 29.700001 510059 26.799999 43.500000 43.400002\n" + ] + } + ], + "source": [ + "# Display main results table\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est. # Returns\", \"% of Returns\", \n", + " \"Old Avg Tax\", \"New Avg Tax\", \"Total Change ($)\",\n", + " \"Decrease #\", \"Decrease %\", \"Increase #\", \"Increase %\",\n", + " \"No Change %\", \"Zero Tax %\"\n", + "]\n", + "\n", + "# Convert numpy types to native Python types to avoid display issues\n", + "df_display = df_results[display_cols].copy()\n", + "for col in df_display.columns:\n", + " if df_display[col].dtype in ['float32', 'float64']:\n", + " df_display[col] = df_display[col].astype(float)\n", + " elif df_display[col].dtype in ['int32', 'int64']:\n", + " df_display[col] = df_display[col].astype(int)\n", + "\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "\n", + "print(df_display.to_string(index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Full results exported to: sc_h4216_tax_impact_analysis.csv\n" + ] + } + ], + "source": [ + "# Export full results\n", + "df_results.to_csv('sc_h4216_tax_impact_analysis.csv', index=False)\n", + "print(\"\\nFull results exported to: sc_h4216_tax_impact_analysis.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detailed Breakdown Tables" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "ESTIMATED TAX RETURN DISTRIBUTION\n", + "================================================================================\n", + " Federal AGI Range Est. # Returns % of Returns Old Avg Tax New Avg Tax\n", + " $0* 58352 3.1 0 0\n", + " $1 to $10,000 168000 8.8 0 0\n", + " $10,001 to $20,000 205689 10.8 0 3\n", + " $20,001 to $30,000 226431 11.9 33 45\n", + " $30,001 to $40,000 174753 9.2 182 156\n", + " $40,001 to $50,000 155837 8.2 319 244\n", + " $50,001 to $75,000 262861 13.8 581 475\n", + " $75,001 to $100,000 215040 11.3 1338 1161\n", + " $100,001 to $150,000 278127 14.6 2928 3130\n", + " $150,001 to $200,000 49870 2.6 5124 5814\n", + " $200,001 to $300,000 40779 2.1 9149 9707\n", + " $300,001 to $500,000 42814 2.2 17785 17299\n", + "$500,001 to $1,000,000 13719 0.7 27237 26665\n", + " Over $1,000,000 12909 0.7 113354 104291\n", + " Total 1905181 100.0 2399 2341\n" + ] + } + ], + "source": [ + "# Tax Return Distribution\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"ESTIMATED TAX RETURN DISTRIBUTION\")\n", + "print(\"=\"*80)\n", + "dist_cols = [\"Federal AGI Range\", \"Est. # Returns\", \"% of Returns\", \"Old Avg Tax\", \"New Avg Tax\"]\n", + "print(df_results[dist_cols].to_string(index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "TAX RETURNS WITH A DECREASE IN LIABILITY\n", + "================================================================================\n", + " Federal AGI Range Decrease # Decrease % Total Decrease ($) Avg Decrease\n", + " $0* 0 0.000000 0 0\n", + " $1 to $10,000 0 0.000000 0 0\n", + " $10,001 to $20,000 0 0.000000 0 0\n", + " $20,001 to $30,000 1427 0.600000 -8402 -6\n", + " $30,001 to $40,000 57708 33.000000 -5940918 -103\n", + " $40,001 to $50,000 48322 31.000000 -15560643 -322\n", + " $50,001 to $75,000 136898 52.099998 -37521040 -274\n", + " $75,001 to $100,000 138525 64.400002 -52849696 -382\n", + " $100,001 to $150,000 118122 42.500000 -19568998 -166\n", + " $150,001 to $200,000 1551 3.100000 -380708 -246\n", + " $200,001 to $300,000 2048 5.000000 -201820 -99\n", + " $300,001 to $500,000 35387 82.699997 -24901672 -704\n", + "$500,001 to $1,000,000 13076 95.300003 -19588270 -1498\n", + " Over $1,000,000 12703 98.400002 -128537088 -10118\n", + " Total 565768 29.700001 -305059264 -539\n" + ] + } + ], + "source": [ + "# Tax Decrease Summary\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"TAX RETURNS WITH A DECREASE IN LIABILITY\")\n", + "print(\"=\"*80)\n", + "decrease_cols = [\"Federal AGI Range\", \"Decrease #\", \"Decrease %\", \"Total Decrease ($)\", \"Avg Decrease\"]\n", + "print(df_results[decrease_cols].to_string(index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "TAX RETURNS WITH AN INCREASE IN LIABILITY\n", + "================================================================================\n", + " Federal AGI Range Increase # Increase % Total Increase ($) Avg Increase\n", + " $0* 0 0.000000 0 0\n", + " $1 to $10,000 0 0.000000 0 0\n", + " $10,001 to $20,000 12906 6.300000 697809 54\n", + " $20,001 to $30,000 51406 22.700001 2871184 56\n", + " $30,001 to $40,000 22397 12.800000 1558448 70\n", + " $40,001 to $50,000 44468 28.500000 3860133 87\n", + " $50,001 to $75,000 77200 29.400000 9832463 127\n", + " $75,001 to $100,000 50694 23.600000 14622566 288\n", + " $100,001 to $150,000 156517 56.299999 75591424 483\n", + " $150,001 to $200,000 48319 96.900002 34782844 720\n", + " $200,001 to $300,000 38672 94.800003 22966736 594\n", + " $300,001 to $500,000 6631 15.500000 4065817 613\n", + "$500,001 to $1,000,000 643 4.700000 11738147 18254\n", + " Over $1,000,000 206 1.600000 11531744 56085\n", + " Total 510059 26.799999 194119312 381\n" + ] + } + ], + "source": [ + "# Tax Increase Summary\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"TAX RETURNS WITH AN INCREASE IN LIABILITY\")\n", + "print(\"=\"*80)\n", + "increase_cols = [\"Federal AGI Range\", \"Increase #\", \"Increase %\", \"Total Increase ($)\", \"Avg Increase\"]\n", + "print(df_results[increase_cols].to_string(index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "TAX RETURNS WITH NO CHANGE / ZERO TAX LIABILITY\n", + "================================================================================\n", + " Federal AGI Range No Change # No Change % Zero Tax # Zero Tax %\n", + " $0* 58352 100.000000 58352 100.000000\n", + " $1 to $10,000 168000 100.000000 168000 100.000000\n", + " $10,001 to $20,000 192783 93.699997 192783 93.699997\n", + " $20,001 to $30,000 173597 76.699997 171348 75.699997\n", + " $30,001 to $40,000 94648 54.200001 94228 53.900002\n", + " $40,001 to $50,000 63047 40.500000 63047 40.500000\n", + " $50,001 to $75,000 48763 18.600000 48850 18.600000\n", + " $75,001 to $100,000 25821 12.000000 26375 12.300000\n", + " $100,001 to $150,000 3487 1.300000 2748 1.000000\n", + " $150,001 to $200,000 0 0.000000 0 0.000000\n", + " $200,001 to $300,000 59 0.100000 0 0.000000\n", + " $300,001 to $500,000 796 1.900000 796 1.900000\n", + "$500,001 to $1,000,000 0 0.000000 0 0.000000\n", + " Over $1,000,000 0 0.000000 0 0.000000\n", + " Total 829354 43.500000 826527 43.400002\n" + ] + } + ], + "source": [ + "# No Change and Zero Tax\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"TAX RETURNS WITH NO CHANGE / ZERO TAX LIABILITY\")\n", + "print(\"=\"*80)\n", + "other_cols = [\"Federal AGI Range\", \"No Change #\", \"No Change %\", \"Zero Tax #\", \"Zero Tax %\"]\n", + "print(df_results[other_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparison to RFA Fiscal Note\n", + "\n", + "The SC Revenue & Fiscal Affairs (RFA) Office estimated H.4216 would have a **-$119.1M** General Fund impact.\n", + "\n", + "Key differences between PolicyEngine and RFA estimates:\n", + "- **Population**: PE counts all tax units (filers + non-filers); RFA counts only actual filers\n", + "- **Data source**: PE uses CPS-based synthetic data; RFA uses actual SC tax return data\n", + "- **Income distribution**: PE has different return counts by income bracket, particularly more millionaires" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "COMPARISON: PolicyEngine vs RFA Fiscal Note\n", + "================================================================================\n", + "\n", + "General Fund Impact:\n", + " RFA Estimate: $ -238,151,000\n", + " PolicyEngine Estimate: $ -110,942,720\n", + " Difference: $ 127,208,280\n", + "\n", + " Accuracy vs RFA: 46.6%\n", + "\n", + "Total Returns:\n", + " RFA: 5,515,148\n", + " PolicyEngine: 1,905,181\n", + " Difference: -3,609,967\n" + ] + } + ], + "source": [ + "# Load RFA analysis for comparison\n", + "rfa_df = pd.read_csv('rfa_h4216_analysis.csv')\n", + "\n", + "print(\"=\"*80)\n", + "print(\"COMPARISON: PolicyEngine vs RFA Fiscal Note\")\n", + "print(\"=\"*80)\n", + "\n", + "# RFA total impact - parse the dollar string to number\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('-', '-'))\n", + " return val\n", + "\n", + "rfa_df['Total Dollar Change Numeric'] = rfa_df['Total Dollar Change'].apply(parse_dollar)\n", + "rfa_total_impact = rfa_df['Total Dollar Change Numeric'].sum()\n", + "pe_total_impact = total_change_amount\n", + "\n", + "print(f\"\\nGeneral Fund Impact:\")\n", + "print(f\" RFA Estimate: ${rfa_total_impact:>15,.0f}\")\n", + "print(f\" PolicyEngine Estimate: ${pe_total_impact:>15,.0f}\")\n", + "print(f\" Difference: ${pe_total_impact - rfa_total_impact:>15,.0f}\")\n", + "\n", + "# Calculate accuracy\n", + "accuracy = 1 - abs(pe_total_impact - rfa_total_impact) / abs(rfa_total_impact)\n", + "print(f\"\\n Accuracy vs RFA: {accuracy*100:.1f}%\")\n", + "\n", + "# Return count comparison\n", + "rfa_total_returns = rfa_df['Est # Returns'].sum()\n", + "print(f\"\\nTotal Returns:\")\n", + "print(f\" RFA: {rfa_total_returns:>12,.0f}\")\n", + "print(f\" PolicyEngine: {int(total_returns):>12,.0f}\")\n", + "print(f\" Difference: {int(total_returns - rfa_total_returns):>+12,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "IMPACT BY INCOME BRACKET: PolicyEngine vs RFA\n", + "================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff ($)\n", + " $0* 58352 78854 0 -571000.0 571000.0\n", + " $1 to $10,000 168000 0 0 0.0 0.0\n", + " $10,001 to $20,000 205689 0 697809 0.0 697809.0\n", + " $20,001 to $30,000 226431 0 2860578 0.0 2860578.0\n", + " $30,001 to $40,000 174753 0 -4382816 0.0 -4382816.0\n", + " $40,001 to $50,000 155837 0 -11700509 0.0 -11700509.0\n", + " $50,001 to $75,000 262861 0 -27688580 0.0 -27688580.0\n", + " $75,001 to $100,000 215040 0 -38227128 0.0 -38227128.0\n", + " $100,001 to $150,000 278127 0 56022196 0.0 56022196.0\n", + " $150,001 to $200,000 49870 0 34402136 0.0 34402136.0\n", + " $200,001 to $300,000 40779 0 22764908 0.0 22764908.0\n", + " $300,001 to $500,000 42814 0 -20835856 0.0 -20835856.0\n", + "$500,001 to $1,000,000 13719 0 -7850124 0.0 -7850124.0\n", + " Over $1,000,000 12909 0 -117005352 0.0 -117005352.0\n" + ] + } + ], + "source": [ + "# Side-by-side comparison by income bracket\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"IMPACT BY INCOME BRACKET: PolicyEngine vs RFA\")\n", + "print(\"=\"*80)\n", + "\n", + "# Map PE brackets to RFA brackets for comparison\n", + "bracket_comparison = []\n", + "for idx, row in df_results.iterrows():\n", + " if row['Federal AGI Range'] == 'Total':\n", + " continue\n", + " \n", + " # Find matching RFA row\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == row['Federal AGI Range']]\n", + " if len(rfa_match) > 0:\n", + " rfa_impact = rfa_match['Total Dollar Change Numeric'].values[0]\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " else:\n", + " rfa_impact = 0\n", + " rfa_returns = 0\n", + " \n", + " bracket_comparison.append({\n", + " 'AGI Range': row['Federal AGI Range'],\n", + " 'PE Returns': row['Est. # Returns'],\n", + " 'RFA Returns': rfa_returns,\n", + " 'PE Impact': row['Total Change ($)'],\n", + " 'RFA Impact': rfa_impact,\n", + " 'Diff ($)': row['Total Change ($)'] - rfa_impact\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(bracket_comparison)\n", + "print(comparison_df.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/sc/sc_h4216_tax_impact_analysis.csv b/us/states/sc/sc_h4216_tax_impact_analysis.csv new file mode 100644 index 0000000..79ed7e3 --- /dev/null +++ b/us/states/sc/sc_h4216_tax_impact_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est. # Returns,% of Returns,Old Avg Tax,New Avg Tax,Returns w/ Change,% w/ Change,Avg Change,Total Change ($),Decrease #,Decrease %,Total Decrease ($),Avg Decrease,Increase #,Increase %,Total Increase ($),Avg Increase,No Change #,No Change %,Zero Tax #,Zero Tax % +$0*,58352,3.0999999046325684,0,0,0,0.0,0,0,0,0.0,0,0,0,0.0,0,0,58352,100.0,58352,100.0 +"$1 to $10,000",168000,8.800000190734863,0,0,0,0.0,0,0,0,0.0,0,0,0,0.0,0,0,168000,100.0,168000,100.0 +"$10,001 to $20,000",205689,10.800000190734863,0,3,12906,6.3,54,697809,0,0.0,0,0,12906,6.3,697809,54,192783,93.7,192783,93.7 +"$20,001 to $30,000",226431,11.899999618530273,33,45,52834,23.3,54,2860578,1427,0.6,-8402,-6,51406,22.7,2871184,56,173597,76.7,171348,75.7 +"$30,001 to $40,000",174753,9.199999809265137,182,156,80105,45.8,-55,-4382816,57708,33.0,-5940918,-103,22397,12.8,1558448,70,94648,54.2,94228,53.9 +"$40,001 to $50,000",155837,8.199999809265137,319,244,92790,59.5,-126,-11700509,48322,31.0,-15560643,-322,44468,28.5,3860133,87,63047,40.5,63047,40.5 +"$50,001 to $75,000",262861,13.800000190734863,581,475,214098,81.4,-129,-27688580,136898,52.1,-37521040,-274,77200,29.4,9832463,127,48763,18.6,48850,18.6 +"$75,001 to $100,000",215040,11.300000190734863,1338,1161,189218,88.0,-202,-38227128,138525,64.4,-52849696,-382,50694,23.6,14622566,288,25821,12.0,26375,12.3 +"$100,001 to $150,000",278127,14.600000381469727,2928,3130,274640,98.7,204,56022196,118122,42.5,-19568998,-166,156517,56.3,75591424,483,3487,1.3,2748,1.0 +"$150,001 to $200,000",49870,2.5999999046325684,5124,5814,49870,100.0,690,34402136,1551,3.1,-380708,-246,48319,96.9,34782844,720,0,0.0,0,0.0 +"$200,001 to $300,000",40779,2.0999999046325684,9149,9707,40720,99.9,559,22764908,2048,5.0,-201820,-99,38672,94.8,22966736,594,59,0.1,0,0.0 +"$300,001 to $500,000",42814,2.200000047683716,17785,17299,42018,98.1,-496,-20835856,35387,82.7,-24901672,-704,6631,15.5,4065817,613,796,1.9,796,1.9 +"$500,001 to $1,000,000",13719,0.699999988079071,27237,26665,13719,100.0,-572,-7850124,13076,95.3,-19588270,-1498,643,4.7,11738147,18254,0,0.0,0,0.0 +"Over $1,000,000",12909,0.699999988079071,113354,104291,12909,100.0,-9064,-117005352,12703,98.4,-128537088,-10118,206,1.6,11531744,56085,0,0.0,0,0.0 +Total,1905181,100.0,2399,2341,1075827,56.5,-58,-110942720,565768,29.7,-305059264,-539,510059,26.8,194119312,381,829354,43.5,826527,43.4 diff --git a/us/states/sc/sc_staging_dataset_summary_weighted.csv b/us/states/sc/sc_staging_dataset_summary_weighted.csv new file mode 100644 index 0000000..e7ac8fa --- /dev/null +++ b/us/states/sc/sc_staging_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,587,912" +Person count (weighted),"4,788,894" +Average household size,3.0 +Weighted median household AGI,"$60,378" +Weighted average household AGI,"$105,617" +Weighted median person AGI,"$56,366" +Weighted average person AGI,"$100,583" +Unweighted median household AGI,"$67,907" +Unweighted median person AGI,"$67,460" +25th percentile household AGI,"$27,117" +75th percentile household AGI,"$105,515" +90th percentile household AGI,"$153,279" +95th percentile household AGI,"$237,856" +Max household AGI,"$353,653,602" +Total households with children,"637,558" +Households with 1 child,"308,153" +Households with 2 children,"190,106" +Households with 3+ children,"139,299" +Total children under 18,"1,157,115" +Children under 6,"350,517" +Children under 3,"177,575"