From 24ae7433c5e457f5d9281b589ffb3923cbc5a8ba Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 19 Feb 2026 23:25:13 -0500 Subject: [PATCH 1/4] Add capital income predictors to SIPP asset imputation The QRF model for imputing liquid assets (bank accounts, stocks, bonds) previously used only employment_income, age, demographics. This adds interest_income, dividend_income, and rental_income as predictors, which are strongly correlated with asset holdings and available in both SIPP (TINC_BANK, TINC_STMF, TINC_BOND, TINC_RENT) and CPS. Updated in three places to keep them consistent: - sipp.py (standalone model training) - cps.py (CPS variable extraction) - source_impute.py (calibration-time imputation) Co-Authored-By: Claude Opus 4.6 --- .../calibration/source_impute.py | 36 ++++++++++++++++++- policyengine_us_data/datasets/cps/cps.py | 3 ++ policyengine_us_data/datasets/sipp/sipp.py | 19 ++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/policyengine_us_data/calibration/source_impute.py b/policyengine_us_data/calibration/source_impute.py index 339e038e..086cdd67 100644 --- a/policyengine_us_data/calibration/source_impute.py +++ b/policyengine_us_data/calibration/source_impute.py @@ -72,6 +72,9 @@ SIPP_ASSETS_PREDICTORS = [ "employment_income", + "interest_income", + "dividend_income", + "rental_income", "age", "is_female", "is_married", @@ -458,6 +461,10 @@ def _impute_sipp( "TVAL_BANK", "TVAL_STMF", "TVAL_BOND", + "TINC_BANK", + "TINC_STMF", + "TINC_BOND", + "TINC_RENT", ] asset_df = pd.read_csv( STORAGE_FOLDER / "pu2023.csv", @@ -473,6 +480,11 @@ def _impute_sipp( asset_df["is_female"] = asset_df.ESEX == 2 asset_df["is_married"] = asset_df.EMS == 1 asset_df["employment_income"] = asset_df.TPTOTINC * 12 + asset_df["interest_income"] = ( + asset_df["TINC_BANK"].fillna(0) + asset_df["TINC_BOND"].fillna(0) + ) * 12 + asset_df["dividend_income"] = asset_df["TINC_STMF"].fillna(0) * 12 + asset_df["rental_income"] = asset_df["TINC_RENT"].fillna(0) * 12 asset_df["household_weight"] = asset_df.WPFINWGT asset_df["is_under_18"] = asset_df.TAGE < 18 asset_df["count_under_18"] = ( @@ -484,6 +496,9 @@ def _impute_sipp( asset_train_cols = [ "employment_income", + "interest_income", + "dividend_income", + "rental_income", "bank_account_assets", "stock_assets", "bond_assets", @@ -510,7 +525,14 @@ def _impute_sipp( data, time_period, dataset_path, - ["employment_income", "age", "is_male"], + [ + "employment_income", + "interest_income", + "dividend_income", + "rental_income", + "age", + "is_male", + ], ) if "is_male" in cps_asset_df.columns: cps_asset_df["is_female"] = ( @@ -529,6 +551,18 @@ def _impute_sipp( if "count_under_18" in cps_tip_df.columns else 0.0 ) + for cap_var in [ + "interest_income", + "dividend_income", + "rental_income", + ]: + if cap_var not in cps_asset_df.columns: + if cap_var in data: + cps_asset_df[cap_var] = data[cap_var][time_period].astype( + np.float32 + ) + else: + cps_asset_df[cap_var] = 0.0 asset_vars = [ "bank_account_assets", diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index 76e55d4a..a1dbca0e 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -1779,6 +1779,9 @@ def add_tips(self, cps: h5py.File): "person_id", "household_id", "employment_income", + "interest_income", + "dividend_income", + "rental_income", "age", "household_weight", "is_female", diff --git a/policyengine_us_data/datasets/sipp/sipp.py b/policyengine_us_data/datasets/sipp/sipp.py index bf8b75dd..ecda1834 100644 --- a/policyengine_us_data/datasets/sipp/sipp.py +++ b/policyengine_us_data/datasets/sipp/sipp.py @@ -156,6 +156,11 @@ def get_tip_model() -> QRF: "TVAL_BANK", # Checking, savings, money market "TVAL_STMF", # Stocks and mutual funds "TVAL_BOND", # Bonds and government securities + # Income from assets (monthly, person-level) + "TINC_BANK", # Interest from bank accounts + "TINC_STMF", # Dividends from stocks/mutual funds + "TINC_BOND", # Interest from bonds + "TINC_RENT", # Rental income # SSI receipt (for validation) "RSSI_YRYN", # Received SSI in at least one month ] @@ -200,6 +205,14 @@ def train_asset_model(): df["household_weight"] = df.WPFINWGT df["household_id"] = df.SSUID + # Capital income predictors (annualized from monthly SIPP) + # Maps to CPS: interest_income, dividend_income, rental_income + df["interest_income"] = ( + df["TINC_BANK"].fillna(0) + df["TINC_BOND"].fillna(0) + ) * 12 + df["dividend_income"] = df["TINC_STMF"].fillna(0) * 12 + df["rental_income"] = df["TINC_RENT"].fillna(0) * 12 + # Calculate household-level counts df["is_under_18"] = df.TAGE < 18 df["count_under_18"] = ( @@ -210,6 +223,9 @@ def train_asset_model(): [ "household_id", "employment_income", + "interest_income", + "dividend_income", + "rental_income", "bank_account_assets", "stock_assets", "bond_assets", @@ -239,6 +255,9 @@ def train_asset_model(): X_train=sipp, predictors=[ "employment_income", + "interest_income", + "dividend_income", + "rental_income", "age", "is_female", "is_married", From 107f87bdc23a9659c3aa3ff21dca5cde82ab83b5 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 19 Feb 2026 23:26:15 -0500 Subject: [PATCH 2/4] Add changelog entry Co-Authored-By: Claude Opus 4.6 --- changelog_entry.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 6688a794..96c8d687 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,4 +1,4 @@ -- bump: patch +- bump: minor changes: - fixed: - - Add TANF takeup (22%) assignment to CPS data pipeline so takes_up_tanf_if_eligible is persisted in the dataset. + enhanced: + - Add capital income predictors (interest, dividend, rental income) to SIPP liquid asset imputation model for improved accuracy. From 82f893d223b4190a814cb0a4b7f475cfb77c238b Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 19 Feb 2026 23:36:40 -0500 Subject: [PATCH 3/4] Retrigger CI From 84354dbd6af158bbc6216c63ad300ecd9402d4c3 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 19 Feb 2026 23:47:21 -0500 Subject: [PATCH 4/4] Retrigger CI (2)