pip install "balance[did]"

# Standard scientific stack
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# balance - reweighting against a target frame
import balance
from balance import Sample

# balance.interop.diff_diff - the thin adapter (added in balance 0.21)
from balance.interop import diff_diff as bd
from balance.interop.diff_diff import (
    as_balance_diagnostic,
    fit_did,
    to_survey_design,
)

# diff-diff - survey-aware DiD estimators + sensitivity + DGP
import diff_diff as dd
from diff_diff import (
    CallawaySantAnna,
    SurveyDesign,
    aggregate_survey,
    compute_honest_did,
    generate_survey_did_data,
)

# Reproducibility: every random draw in this notebook flows from this seed.
RNG = np.random.default_rng(20260430)

# Quiet a benign FutureWarning from balance 0.20 about
# Sample.weight_column returning str|None - superseded in 0.21.
warnings.filterwarnings("ignore", category=FutureWarning, module="balance")

print(f"balance:   {balance.__version__}")
print(f"diff-diff: {dd.__version__}")

INFO (2026-05-07 22:06:12,901) [__init__/<module> (line 76)]: Using balance version 0.20.0

INFO (2026-05-07 22:06:12,902) [__init__/<module> (line 81)]: 
balance (Version 0.20.0) loaded:
    📖 Documentation: https://import-balance.org/
    🛠️ Help / Issues: https://github.com/facebookresearch/balance/issues/
    📄 Citation:
        Sarig, T., Galili, T., & Eilat, R. (2023).
        balance - a Python package for balancing biased data samples.
        https://arxiv.org/abs/2307.06024

    Tip: You can view this message anytime with balance.help()

balance:   0.20.0
diff-diff: 3.3.2

# diff-diff's built-in survey-DiD generator. Returns microdata as a flat
# DataFrame with columns (unit, period, outcome, first_treat, treated,
# true_effect, stratum, psu, fpc, weight, x1, x2).
brfss = generate_survey_did_data(
    n_units=50,                    # 50 states
    n_periods=7,                   # panel periods 1-7 (mapped to 2018-2024)
    cohort_periods=[3, 5],         # treated states adopt in periods 3 and 5
    never_treated_frac=0.6,        # 60% never-treated, 40% treated
    treatment_effect=-0.06,        # ~6 pp drop in asthnow for treated
    n_strata=5,
    psu_per_stratum=8,
    fpc_per_stratum=200.0,
    weight_variation="moderate",
    add_covariates=True,           # ships x1, x2 continuous covariates
    informative_sampling=True,     # selection bias on covariates
    seed=20260430,
)

# Re-name to BRFSS-style column names so the rest of the tutorial reads
# like a real epidemiologist's notebook.
df = brfss.rename(
    columns={
        "unit": "state",
        "period": "year",
        "first_treat": "first_treat_year",
        "outcome": "asthnow",
        "weight": "design_weight",
        "x1": "age_band",
        "x2": "educa",
    }
).copy()

# Map period index 1..7 onto calendar years 2018..2024 so plots and
# df.query("year == 2018") read naturally.
df["year"] = df["year"] + 2017
treated_mask = df["first_treat_year"] > 0
df.loc[treated_mask, "first_treat_year"] = (
    df.loc[treated_mask, "first_treat_year"] + 2017
)

df["sex"] = RNG.choice(["male", "female"], size=len(df))
df["race"] = RNG.choice(
    ["white", "black", "hispanic", "asian", "other"],
    p=[0.60, 0.15, 0.18, 0.05, 0.02],
    size=len(df),
)
# Quarter index for the panel - for this synthetic data, one quarter per year.
df["quarter"] = df["year"]
df["id"] = np.arange(len(df))

print("Microdata shape:", df.shape)
df.head()

Microdata shape: (350, 16)

# Cell counts per state-year (should be ~150)
cell_counts = (
    df.groupby(["state", "year"]).size().unstack("year")
)
print("Cell counts (state x year), first 5 states:")
print(cell_counts.head())

# Treatment cohorts
print("\nFirst-treat-year distribution:")
print(df.drop_duplicates("state")["first_treat_year"].value_counts().sort_index())

# A quick visual: outcome trends by treated/untreated
fig, ax = plt.subplots(figsize=(7, 4))
for label, sub in df.assign(
    cohort=np.where(df["first_treat_year"] > 0, "treated", "control")
).groupby("cohort"):
    sub.groupby("year")["asthnow"].mean().plot(
        ax=ax, marker="o", label=label,
    )
ax.set_ylabel("asthnow (proportion)")
ax.set_xlabel("year")
ax.set_title("Raw outcome trends - pre-balance, pre-DiD")
ax.legend(title="cohort")
plt.tight_layout()
plt.show()

Cell counts (state x year), first 5 states:
year   2018  2019  2020  2021  2022  2023  2024
state                                          
0         1     1     1     1     1     1     1
1         1     1     1     1     1     1     1
2         1     1     1     1     1     1     1
3         1     1     1     1     1     1     1
4         1     1     1     1     1     1     1

First-treat-year distribution:
first_treat_year
0       30
2020    10
2022    10
Name: count, dtype: int64

# Build the ACS-like target as the first-year demographic distribution.
target_df = (
    df.query("year == 2018")
    [["age_band", "sex", "race", "educa"]]
    .assign(weight=1.0)
    .reset_index(drop=True)
)
target_df["id"] = np.arange(len(target_df))

# Build a balance.Sample from the full panel
sample = Sample.from_frame(
    df,
    weight_column="design_weight",
    outcome_columns=["asthnow"],
)
target = Sample.from_frame(target_df)

# IPW adjustment - logistic regression with LASSO regularization, the default.
adj = sample.set_target(target).adjust(
    method="ipw",
    variables=["age_band", "sex", "race", "educa"],
)

# Pre/post ASMD, Kish ESS, design effect
print(adj.summary())

# Love-style ASMD plot - the diagnostic epidemiologists expect to see in
# the methods appendix.
adj.covars().plot()

WARNING (2026-05-07 22:06:13,198) [input_validation/guess_id_column (line 336)]: Guessed id column name id for the data

WARNING (2026-05-07 22:06:13,199) [sample_frame/from_frame (line 280)]: Casting id column to string

WARNING (2026-05-07 22:06:13,212) [pandas_utils/_warn_of_df_dtypes_change (line 519)]: The dtypes of SampleFrame._df were changed from the original dtypes of the input df, here are the differences -

WARNING (2026-05-07 22:06:13,213) [pandas_utils/_warn_of_df_dtypes_change (line 530)]: The (old) dtypes that changed for df (before the change):

WARNING (2026-05-07 22:06:13,215) [pandas_utils/_warn_of_df_dtypes_change (line 533)]: 
treated             int64
psu                 int64
quarter             int64
first_treat_year    int64
stratum             int64
educa               int64
state               int64
id                  int64
year                int64
dtype: object

WARNING (2026-05-07 22:06:13,215) [pandas_utils/_warn_of_df_dtypes_change (line 534)]: The (new) dtypes saved in df (after the change):

WARNING (2026-05-07 22:06:13,217) [pandas_utils/_warn_of_df_dtypes_change (line 535)]: 
treated             float64
psu                 float64
quarter             float64
first_treat_year    float64
stratum             float64
educa               float64
state               float64
id                      str
year                float64
dtype: object

WARNING (2026-05-07 22:06:13,218) [input_validation/guess_id_column (line 336)]: Guessed id column name id for the data

WARNING (2026-05-07 22:06:13,219) [sample_frame/from_frame (line 280)]: Casting id column to string

WARNING (2026-05-07 22:06:13,230) [pandas_utils/_warn_of_df_dtypes_change (line 519)]: The dtypes of SampleFrame._df were changed from the original dtypes of the input df, here are the differences -

# Love plot - the canonical covariate-balance visual, in the spirit of R's
# `cobalt::love.plot`. New in balance 0.21 (companion to `covars().plot()`
# above: that shows per-covariate distribution kdes, while `love_plot`
# shows per-covariate ASMD before-vs-after on a single sorted scatter, with
# a 0.1 reference line for the conventional "balance achieved" cutoff).
adj.covars().love_plot()

WARNING (2026-05-07 22:06:30,154) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

WARNING (2026-05-07 22:06:30,199) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

<Axes: xlabel='ASMD', ylabel='Covariate'>

# Use the adapter helper. It builds the first-stage SurveyDesign from
# adj's active weight column, drops weight_pre_adjust / weight_adjusted_*
# bookkeeping cols, and calls aggregate_survey with the right second-stage
# weight type ("pweight", required by CallawaySantAnna).
panel_df, second_stage_design = bd.to_panel_for_did(
    adj,
    by=["state", "year"],
    outcomes="asthnow",
    covariates=["age_band", "educa"],  # carried as state-year means
    second_stage_weights="pweight",
)

# Merge first-treat-year onto the panel (one row per state-year so we can
# join on state).
first_treat = (
    df.drop_duplicates("state")[["state", "first_treat_year"]]
    .rename(columns={"first_treat_year": "g"})
)
panel_df = panel_df.merge(first_treat, on="state", how="left")
panel_df["g"] = panel_df["g"].fillna(0).astype(int)
panel_df["id"] = np.arange(len(panel_df))

print("Panel shape:", panel_df.shape)
print("Auto-generated second-stage weight column:", second_stage_design.weights)
print("Second-stage PSU:", second_stage_design.psu)
panel_df.head()

INFO (2026-05-07 22:06:30,350) [diff_diff/_resolve_design_columns (line 274)]: balance.interop.diff_diff: auto-populating SurveyDesign field 'strata' from sample.df column 'stratum' (matched the default convention name). Pass an explicit design_columns mapping (or design_columns={}) to suppress this.

INFO (2026-05-07 22:06:30,351) [diff_diff/_resolve_design_columns (line 274)]: balance.interop.diff_diff: auto-populating SurveyDesign field 'psu' from sample.df column 'psu' (matched the default convention name). Pass an explicit design_columns mapping (or design_columns={}) to suppress this.

INFO (2026-05-07 22:06:30,351) [diff_diff/_resolve_design_columns (line 274)]: balance.interop.diff_diff: auto-populating SurveyDesign field 'fpc' from sample.df column 'fpc' (matched the default convention name). Pass an explicit design_columns mapping (or design_columns={}) to suppress this.

Panel shape: (350, 15)
Auto-generated second-stage weight column: asthnow_weight
Second-stage PSU: state

/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/prep.py:1660: UserWarning: pweight weights normalized to mean=1 (sum=350). Original sum was 50.
  full_resolved = effective_design.resolve(data)

# Build a balance.Sample wrapping the panel so we can keep using the
# adapter (the Sample.weight_column is the second-stage weight column).
panel_sample = Sample.from_frame(
    panel_df,
    weight_column=second_stage_design.weights,
    outcome_columns=["asthnow_mean"],
)

# Run the Callaway-Sant'Anna doubly-robust ATT(g, t) estimator. Under the
# hood, fit_did wires up the survey design and the kwargs.
res = fit_did(
    panel_sample,
    estimator="CallawaySantAnna",
    outcome="asthnow_mean",
    time="year",
    unit="state",
    treatment_first="g",
    covariates=["age_band_mean", "educa_mean"],
    estimation_method="dr",
    control_group="not_yet_treated",
    base_period="universal",
    cluster="state",
    aggregate="all",
)
print(res.summary())

# Event-study plot - what the methods appendix gets in the paper.
ax = dd.plot_event_study(res)
ax.set_title("Callaway-Sant'Anna doubly-robust event study (balance-weighted)")
plt.tight_layout()
plt.show()

WARNING (2026-05-07 22:06:30,400) [input_validation/guess_id_column (line 336)]: Guessed id column name id for the data

WARNING (2026-05-07 22:06:30,401) [sample_frame/from_frame (line 280)]: Casting id column to string

WARNING (2026-05-07 22:06:30,412) [pandas_utils/_warn_of_df_dtypes_change (line 519)]: The dtypes of SampleFrame._df were changed from the original dtypes of the input df, here are the differences -

WARNING (2026-05-07 22:06:30,413) [pandas_utils/_warn_of_df_dtypes_change (line 530)]: The (old) dtypes that changed for df (before the change):

WARNING (2026-05-07 22:06:30,414) [pandas_utils/_warn_of_df_dtypes_change (line 533)]: 
id           int64
asthnow_n    int64
cell_n       int64
g            int64
dtype: object

WARNING (2026-05-07 22:06:30,415) [pandas_utils/_warn_of_df_dtypes_change (line 534)]: The (new) dtypes saved in df (after the change):

WARNING (2026-05-07 22:06:30,417) [pandas_utils/_warn_of_df_dtypes_change (line 535)]: 
id               str
asthnow_n    float64
cell_n       float64
g            float64
dtype: object

/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/staggered.py:2530: UserWarning: Low Events Per Variable (EPV = 5.0) in propensity score model for cohort g=2020.0. 10 minority-class observations for 2 predictor variable(s). Peduzzi et al. (1996) recommend EPV >= 10. Estimates may be unreliable (overfitting, biased coefficients, inflated standard errors). Consider estimation_method='reg' to avoid propensity scores.
  beta_logistic, pscore = solve_logit(
/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/staggered.py:2530: UserWarning: Low Events Per Variable (EPV = 5.0) in propensity score model for cohort g=2022.0. 10 minority-class observations for 2 predictor variable(s). Peduzzi et al. (1996) recommend EPV >= 10. Estimates may be unreliable (overfitting, biased coefficients, inflated standard errors). Consider estimation_method='reg' to avoid propensity scores.
  beta_logistic, pscore = solve_logit(
/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/balance/interop/diff_diff.py:691: UserWarning: Low Events Per Variable (EPV) detected in propensity score estimation for 12 of 12 cell(s). Minimum EPV = 5.0 (cohort g=2020.0). Consider estimation_method='reg' (avoids propensity scores) or reducing the number of covariates. See results.epv_summary() for details.
  results: object = instance.fit(df, **common_fit_kwargs)

=====================================================================================
            Callaway-Sant'Anna Staggered Difference-in-Differences Results           
=====================================================================================

Total observations:                   350
Treated units:                         20
Never-treated units:                   30
Treatment cohorts:                      2
Time periods:                           7
Control group:                 not_yet_treated
Base period:                    universal


-------------------------------------------------------------------------------------
                                    Survey Design                                    
-------------------------------------------------------------------------------------
Weight type:                      pweight
PSU/Cluster:                           50
Effective sample size:               37.3
Kish DEFF (weights):                 1.34
Survey d.f.:                           49
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
                   Overall Average Treatment Effect on the Treated                   
-------------------------------------------------------------------------------------
Parameter           Estimate    Std. Err.     t-stat      P>|t|   Sig.
-------------------------------------------------------------------------------------
ATT                  -0.6386       0.3728     -1.713     0.0930      .
-------------------------------------------------------------------------------------

95% Confidence Interval: [-1.3877, 0.1105]
CV (SE/|ATT|):                0.5837

-------------------------------------------------------------------------------------
                             Propensity Score Diagnostics                            
-------------------------------------------------------------------------------------
WARNING: Low Events Per Variable (EPV) in 12 of 12 cohort-time cell(s).
Minimum EPV: 5.0 (cohort g=2020.0). Threshold: 10.
Consider: estimation_method='reg' or fewer covariates.
Call results.epv_summary() for per-cohort details.
-------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------
                            Event Study (Dynamic) Effects                            
-------------------------------------------------------------------------------------
Rel. Period         Estimate    Std. Err.     t-stat      P>|t|   Sig.
-------------------------------------------------------------------------------------
-4.0                  0.7171       0.7156      1.002     0.3212       
-3.0                  0.0071       0.5070      0.014     0.9889       
-2.0                 -0.1907       0.3837     -0.497     0.6214       
-1                    0.0000          nan        nan        nan       
0.0                  -0.7104       0.4625     -1.536     0.1310       
1.0                  -0.0423       0.4882     -0.087     0.9313       
2.0                  -0.5983       0.4668     -1.282     0.2059       
3.0                  -0.8808       0.5401     -1.631     0.1094       
4.0                  -1.6014       0.6719     -2.383     0.0211      *
-------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------
                             Effects by Treatment Cohort                             
-------------------------------------------------------------------------------------
Cohort              Estimate    Std. Err.     t-stat      P>|t|   Sig.
-------------------------------------------------------------------------------------
2020.0               -0.9888       0.4707     -2.101     0.0408      *
2022.0               -0.1238       0.4719     -0.262     0.7942       
-------------------------------------------------------------------------------------

Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
=====================================================================================

<Figure size 640x480 with 0 Axes>

# HonestDiD smoothness bound at M=1.0 - the canonical "one pre-period of
# parallel-trends violation" sensitivity check (Roth, Sant'Anna, Bilinski &
# Poe 2023). compute_honest_did accepts a scalar M and returns a
# HonestDiDResults with the robust CI; for a multi-M sensitivity sleeve
# you'd build SensitivityResults manually and pass it to dd.plot_sensitivity.
honest = compute_honest_did(
    res,
    method="relative_magnitude",
    M=1.0,
)

print("HonestDiD sensitivity (Roth-Sant'Anna-Bilinski-Poe 2023):")
print(f"  Method:      relative_magnitude, M=1.0")
print(f"  Robust CI:   [{honest.ci_lb:.4f}, {honest.ci_ub:.4f}]")
print(f"  Original ATT CI: see CallawaySantAnna summary in the previous cell.")

HonestDiD sensitivity (Roth-Sant'Anna-Bilinski-Poe 2023):
  Method:      relative_magnitude, M=1.0
  Robust CI:   [-3.6616, 2.1283]
  Original ATT CI: see CallawaySantAnna summary in the previous cell.

diag = as_balance_diagnostic(adj, res)

# Display as a one-row pandas DataFrame for easy copy-paste into a methods
# appendix. None entries surface as NaN in the printed table.
diag_df = pd.DataFrame([diag]).T.rename(columns={0: "value"})
print(diag_df)

INFO (2026-05-07 22:06:30,609) [balance_frame/diagnostics (line 3209)]: Starting computation of diagnostics of the fitting

WARNING (2026-05-07 22:06:30,661) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

WARNING (2026-05-07 22:06:30,700) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

WARNING (2026-05-07 22:06:30,749) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

WARNING (2026-05-07 22:06:30,792) [weighted_comparisons_stats/asmd (line 595)]: sample_df and target_df must have the same column names.
sample_df column names: ['age_band', 'educa', 'first_treat_year', 'fpc', 'psu', 'quarter', 'race[T.black]', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]', 'state', 'stratum', 'treated', 'true_effect', 'year']
target_df column names: ['age_band', 'educa', 'race[T.hispanic]', 'race[T.other]', 'race[T.white]', 'sex[female]', 'sex[male]']

INFO (2026-05-07 22:06:30,840) [balance_frame/diagnostics (line 3236)]: Done computing diagnostics

                              value
att                       -0.638612
se                             None
conf_int                       None
n_obs                           350
diff_diff_design_effect    1.340294
diff_diff_effective_n     37.305252
diff_diff_sum_weights          50.0
balance_kish_ess         261.131495
balance_design_effect      1.340321
balance_asmd_mean_post     0.029234
balance_asmd_max_post      0.176139

# Build a fresh panel from the *raw* design weights (no balance step).
unweighted_panel, unweighted_design = aggregate_survey(
    df,
    by=["state", "year"],
    outcomes="asthnow",
    survey_design=SurveyDesign(weights="design_weight", weight_type="pweight"),
    covariates=["age_band", "educa"],
    second_stage_weights="pweight",
)
unweighted_panel = unweighted_panel.merge(first_treat, on="state", how="left")
unweighted_panel["g"] = unweighted_panel["g"].fillna(0).astype(int)

# Direct CallawaySantAnna call (no adapter needed since we're not coming
# from a balance.Sample). NOTE: the Step 4 call goes through `fit_did`,
# which introspects the estimator's __init__ vs fit() signatures and
# routes `base_period="universal"` / `cluster="state"` to whichever slot
# accepts each. Replicating that routing manually here is fragile against
# upstream diff-diff API drift -- an earlier ablation-parity attempt that
# pinned `base_period` to __init__ and `cluster` to fit() broke the
# notebook CI execute step on a diff-diff version where the placement
# differs. So we keep this direct call to a minimal, robust subset
# (estimation_method, control_group, aggregate, plus the panel kwargs
# and the survey_design) and let small default-parameter differences
# show up in the printed delta. For pixel-perfect parameter parity in
# an ablation, wrap `unweighted_panel` in a balance.Sample and route
# through `fit_did` exactly as Step 4 does -- that path inherits the
# same signature-introspection and is robust across diff-diff releases.
res_unweighted = CallawaySantAnna(
    estimation_method="dr",
    control_group="not_yet_treated",
).fit(
    unweighted_panel,
    outcome="asthnow_mean",
    time="year",
    unit="state",
    first_treat="g",
    covariates=["age_band_mean", "educa_mean"],
    aggregate="all",
    survey_design=SurveyDesign(
        weights=unweighted_design.weights,
        psu="state",
        weight_type="pweight",
    ),
)

# Side-by-side comparison. The printed delta below is *primarily* the
# effect of the balance reweighting step, but it also includes minor
# default-parameter differences (Step 4 sets base_period="universal"
# and cluster="state" via fit_did, the direct call above does not).
# See the comment block above the CallawaySantAnna call for the rationale
# and for the apples-to-apples-via-fit_did alternative.
print("ATT (balance-weighted) :", res.overall_att)
print("ATT (no balance step)  :", res_unweighted.overall_att)
print(
    "Difference (mostly balance reweighting; also includes minor "
    "estimator-default differences -- see comment above):",
    res.overall_att - res_unweighted.overall_att,
)

/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/prep.py:1660: UserWarning: pweight weights normalized to mean=1 (sum=350). Original sum was 525.
  full_resolved = effective_design.resolve(data)

/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/staggered.py:2530: UserWarning: Low Events Per Variable (EPV = 5.0) in propensity score model for cohort g=2020. 10 minority-class observations for 2 predictor variable(s). Peduzzi et al. (1996) recommend EPV >= 10. Estimates may be unreliable (overfitting, biased coefficients, inflated standard errors). Consider estimation_method='reg' to avoid propensity scores.
  beta_logistic, pscore = solve_logit(

ATT (balance-weighted) : -0.6386118057757502
ATT (no balance step)  : -0.6389086149015202
Difference (mostly balance reweighting; also includes minor estimator-default differences -- see comment above): 0.0002968091257700145

/opt/hostedtoolcache/Python/3.12.13/x64/lib/python3.12/site-packages/diff_diff/staggered.py:2530: UserWarning: Low Events Per Variable (EPV = 5.0) in propensity score model for cohort g=2022. 10 minority-class observations for 2 predictor variable(s). Peduzzi et al. (1996) recommend EPV >= 10. Estimates may be unreliable (overfitting, biased coefficients, inflated standard errors). Consider estimation_method='reg' to avoid propensity scores.
  beta_logistic, pscore = solve_logit(
/tmp/ipykernel_2919/2787343228.py:31: UserWarning: Low Events Per Variable (EPV) detected in propensity score estimation for 12 of 12 cell(s). Minimum EPV = 5.0 (cohort g=2020). Consider estimation_method='reg' (avoids propensity scores) or reducing the number of covariates. See results.epv_summary() for details.
  ).fit(

Survey-weighted Difference-in-Differences: combining balance and diff-diff on a BRFSS-style smoking-ban policy¶

Setup¶

The dataset¶

Step 1 - Inspect the survey data¶

Step 2 - Reweight to ACS demographic marginals via balance¶

Step 3 - Aggregate microdata into a state-quarter panel¶

Step 4 - Callaway-Sant'Anna doubly-robust DiD¶

Step 5 - HonestDiD sensitivity to parallel-trends violation¶

Step 6 - Combined diagnostic report¶

Step 7 - Contrast: what happens if we skip the balance step?¶

Discussion¶

References¶

Methodology¶

Data¶

	state	year	asthnow	psu	fpc	design_weight	age_band	educa	sex	race	quarter	id
0	0	2018	-1.269343	0	200.0	0.727273	0.968506	1	female	white	2018	0
1	1	2018	4.930174	1	200.0	1.818182	0.984487	1	female	white	2018	1
2	2	2018	-2.085724	2	200.0	0.181818	-0.353586	1	male	black	2018	2
3	3	2018	3.763752	3	200.0	1.636364	0.347321	0	female	hispanic	2018	3
4	4	2018	-1.140357	4	200.0	0.545455	-1.660916	0	female	white	2018	4

	year	cell_n	cell_n_eff	cell_sum_w	asthnow_mean	asthnow_se	asthnow_n	asthnow_precision	age_band_mean	educa_mean	srs_fallback	asthnow_weight	id
0	2018.0	1	1.0	0.486572	-1.269343	NaN	1	NaN	0.968506	1.0	False	0.485426	0
1	2019.0	1	1.0	0.486572	-1.705741	NaN	1	NaN	0.968506	1.0	False	0.485426	1
2	2020.0	1	1.0	0.484363	0.516803	NaN	1	NaN	0.968506	1.0	False	0.485426	2
3	2021.0	1	1.0	0.485180	1.642508	NaN	1	NaN	0.968506	1.0	False	0.485426	3
4	2022.0	1	1.0	0.485752	1.087244	NaN	1	NaN	0.968506	1.0	False	0.485426	4

Survey-weighted Difference-in-Differences: combining balance and diff-diff on a BRFSS-style smoking-ban policy¶

Setup¶

The dataset¶

Step 1 - Inspect the survey data¶

Step 2 - Reweight to ACS demographic marginals via balance¶

Step 3 - Aggregate microdata into a state-quarter panel¶

Step 4 - Callaway-Sant'Anna doubly-robust DiD¶

Step 5 - HonestDiD sensitivity to parallel-trends violation¶

Step 6 - Combined diagnostic report¶

Step 7 - Contrast: what happens if we skip the balance step?¶

Discussion¶

References¶

Methodology¶

Data¶

Related tutorials¶