Skip to content

Commit 3b553ed

Browse files
authored
Merge pull request #134 from jdebacker/cbo_forecast
Merging
2 parents 7d369fd + a308213 commit 3b553ed

File tree

2 files changed

+133
-68
lines changed

2 files changed

+133
-68
lines changed

ogusa/utils.py

Lines changed: 132 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,69 @@
77
import ssl
88

99

10-
def read_cbo_forecast():
10+
def read_cbo_forecast(
11+
lt_econ_url="https://www.cbo.gov/system/files/2025-03/57054-2025-03-LTBO-econ.xlsx",
12+
lt_budget_url="https://www.cbo.gov/system/files/2025-03/51119-2025-03-LTBO-budget.xlsx",
13+
ten_year_budget_url="https://www.cbo.gov/system/files/2025-01/51118-2025-01-Budget-Projections.xlsx",
14+
ten_year_macro_url="https://www.cbo.gov/system/files/2025-01/51135-2025-01-Economic-Projections.xlsx",
15+
lt_start_year=1995,
16+
lt_end_year=2055,
17+
st_start_year=2024,
18+
st_end_year=2035,
19+
):
1120
"""
1221
This function reads the CBO Long-Term Budget Projections document
1322
from https://www.cbo.gov/about/products/budget-economic-data#1
14-
and then formats the relevant data for use with OG-Core
23+
and then formats the relevant data for use with OG-Core.
24+
25+
Warning: CBO spreadsheets are not consistent across years so you may
26+
run into errors passing different URLs to this function.
1527
"""
16-
CBO_LT_URL = (
17-
"https://www.cbo.gov/system/files/2020-09/51119-2020-09-ltbo_0.xlsx"
18-
)
19-
# Read in data
20-
df = pd.read_excel(
21-
CBO_LT_URL, sheet_name="3. Economic Vars", skiprows=7, nrows=45
22-
)
23-
df.drop(columns=["Unnamed: 3", "Unnamed: 4"], inplace=True)
24-
df[
25-
~(
26-
(pd.isnull(df["Unnamed: 0"]))
27-
& (pd.isnull(df["Unnamed: 1"]))
28-
& (pd.isnull(df["Unnamed: 2"]))
29-
)
30-
]
31-
# df.fillna(value=np.nan, inplace=True)
32-
df.fillna(value="", inplace=True)
33-
df["full_var_name"] = (
34-
df["Unnamed: 0"] + df["Unnamed: 1"] + df["Unnamed: 2"]
35-
)
3628
CBO_VAR_NAMES = {
37-
"Real GDP (Billions of 2019 dollars) ": "Y",
38-
"On 10-year Treasury notes and the OASDI trust funds": "r",
39-
"Growth of Real Earnings per Worker": "w_growth",
40-
"Growth of Total Hours Worked": "L_growth",
41-
"Hours of All Persons (Nonfarm Business Sector)": "L",
42-
"Personal Consumption Expenditures": "C",
43-
"Gross Private Domestic Investment": "I_total",
29+
"Real GDP (trillions of 2017 dollars)": "Y",
30+
"Real rates": "r",
31+
"Growth of real earnings per worker": "w_growth",
32+
"Growth of total hours worked": "L_growth",
33+
"Hours of All Persons (nonfarm business sector)": "L",
34+
"Personal consumption expenditures": "C",
35+
"Gross private domestic investment": "I_total",
4436
"Government Consumption Expenditures and Gross Investment": "G",
4537
"Old-Age and Survivors Insurance": "agg_pension_outlays",
4638
"Individual income taxes": "iit_revenue",
4739
"Payroll taxes": "payroll_tax_revenue",
4840
"Corporate income taxes": "business_tax_revenue",
49-
"Wages and Salaries": "wL",
41+
"U.S. wage and salary disbursements\n (trillions of dollars)": "wL",
5042
}
51-
df["var_name"] = df["full_var_name"].replace(CBO_VAR_NAMES)
43+
44+
# Econ data in levels
45+
# Read in data
46+
df = pd.read_excel(
47+
lt_econ_url,
48+
sheet_name="3. Econ Vars_Annual Levels",
49+
skiprows=6,
50+
nrows=62,
51+
)
52+
# replace column names with full variable names
53+
df.rename(columns=CBO_VAR_NAMES, inplace=True)
54+
# keep only variables that map to model variables
55+
df.set_index("Year", inplace=True)
56+
df_levels = df.loc[:, df.columns.isin(CBO_VAR_NAMES.values())]
57+
df_levels.reset_index(inplace=True)
58+
59+
# Econ data in rates
60+
# Read in data
61+
df = pd.read_excel(
62+
lt_econ_url,
63+
sheet_name="1. Econ Vars_Annual Rates",
64+
skiprows=7,
65+
nrows=39,
66+
)
67+
df[~((pd.isnull(df["Unnamed: 0"])))]
68+
df.rename(columns={"Unnamed: 0": "variable"}, inplace=True)
69+
df["var_name"] = df["variable"].replace(CBO_VAR_NAMES)
5270
# keep just variables of interest
5371
df.drop(
54-
columns=["Unnamed: 0", "Unnamed: 1", "Unnamed: 2", "full_var_name"],
72+
columns=["variable"],
5573
inplace=True,
5674
)
5775
df = df[df["var_name"].isin(CBO_VAR_NAMES.values())]
@@ -60,82 +78,129 @@ def read_cbo_forecast():
6078
df.drop_duplicates(subset="var_name", inplace=True)
6179
# reshape so that variable names down column
6280
df = pd.melt(
63-
df, id_vars="var_name", value_vars=[i for i in range(1990, 2051)]
81+
df,
82+
id_vars="var_name",
83+
value_vars=[i for i in range(lt_start_year, lt_end_year + 1)],
6484
)
65-
df = df.pivot(index="variable", columns="var_name", values="value")
66-
df.reset_index(inplace=True)
67-
df.rename(columns={"variable": "year"}, inplace=True)
68-
# add debt forcast
85+
df_rates = df.pivot(index="variable", columns="var_name", values="value")
86+
df_rates.reset_index(inplace=True)
87+
df_rates.rename(columns={"variable": "Year"}, inplace=True)
88+
89+
# add debt forecast
6990
df_fiscal = pd.read_excel(
70-
CBO_LT_URL,
71-
sheet_name="1. Summary Extended Baseline",
91+
lt_budget_url, # Need to define this variable in args or at the top
92+
sheet_name="1. Summary Ext Baseline",
7293
skiprows=9,
7394
nrows=32,
7495
)
7596
df_fiscal = df_fiscal[
76-
["Fiscal Year", "Revenues", "Federal Debt Held by the Public"]
97+
["Fiscal year", "Total", "Federal debt held by the public"]
7798
]
78-
df_lt = df.merge(
79-
df_fiscal, left_on="year", right_on="Fiscal Year", how="left"
99+
# rename Total to "Revenues"
100+
df_fiscal.rename(columns={"Total": "Revenues"}, inplace=True)
101+
# merge to macro levels data
102+
df_lt = df_fiscal.merge(
103+
df_levels, left_on="Fiscal year", right_on="Year", how="left"
104+
)
105+
# merge to macro rates data
106+
df_lt = df_lt.merge(
107+
df_rates, left_on="Fiscal year", right_on="Year", how="left"
80108
)
81109
df_lt.rename(
82-
columns={"Federal Debt Held by the Public": "D/Y"}, inplace=True
110+
columns={"Federal debt held by the public": "D/Y"}, inplace=True
83111
)
84112
df_lt["D"] = df_lt["Y"] * df_lt["D/Y"]
85-
86-
CBO_10yr_budget_URL = (
87-
"https://www.cbo.gov/system/files/2021-02/51118-2021-02-11-"
88-
+ "budgetprojections.xlsx"
89-
)
113+
# drop Year_x, Year_y columns
114+
df_lt.drop(columns=["Year_x", "Year_y"], inplace=True)
115+
# rename Fiscal year to year
116+
df_lt.rename(columns={"Fiscal year": "year"}, inplace=True)
117+
# %%
118+
# 10 year budget
90119
df = pd.read_excel(
91-
CBO_10yr_budget_URL, sheet_name="Table 1-1", skiprows=8, nrows=7
120+
ten_year_budget_url, sheet_name="Table B-1", skiprows=7, nrows=7
92121
)
93122
df.rename(
94-
columns={"Unnamed: 0": "variable", "Actual, \n2020": 2020},
123+
columns={
124+
"Unnamed: 0": "variable",
125+
"Actual, 2024": st_start_year,
126+
},
95127
inplace=True,
96128
)
97-
df.drop(columns=["2026.1", "2031.1"], inplace=True)
98-
df1 = df[~((pd.isnull(df.variable)) | (df.variable == "Other"))]
99-
129+
df.drop(columns=["2026–2030", "2026–2035"], inplace=True)
130+
df1 = df[
131+
~(
132+
(pd.isnull(df.variable))
133+
| (df.variable == "Other")
134+
| (df.variable == "Revenues")
135+
)
136+
]
137+
# cast all year columns to float
138+
df1.iloc[:, 1:] = df1.iloc[:, 1:].astype(float)
139+
# cast all year column names to int
140+
df1.columns = [
141+
int(i) if isinstance(i, str) and i.isdigit() else i
142+
for i in df1.columns
143+
]
144+
# data from other table
100145
df = pd.read_excel(
101-
CBO_10yr_budget_URL, sheet_name="Table 1-3", skiprows=9, nrows=22
146+
ten_year_budget_url, sheet_name="Table B-4", skiprows=8, nrows=18
102147
)
103-
df.rename(columns={"Unnamed: 0": "variable"}, inplace=True)
104-
df.drop(columns=["2026.1", "2031.1"], inplace=True)
148+
df.rename(
149+
columns={
150+
"Unnamed: 0": "variable",
151+
"Actual, 2024": st_start_year,
152+
},
153+
inplace=True,
154+
)
155+
df.drop(columns=["2026–2030", "2026–2035"], inplace=True)
105156
df.drop_duplicates(subset="variable", keep="last", inplace=True)
106157
df2 = df[~pd.isnull(df.variable)]
158+
# cast all year columns to float
159+
df2.iloc[:, 1:] = df2.iloc[:, 1:].astype(float)
160+
# cast all year column names to int
161+
df2.columns = [
162+
int(i) if isinstance(i, str) and i.isdigit() else i
163+
for i in df2.columns
164+
]
107165

108-
CBO_10yr_macro_URL = (
109-
"https://www.cbo.gov/system/files/2021-02/51135-2021-02-"
110-
+ "economicprojections.xlsx"
111-
)
166+
# %%
167+
# 10 year macro forecast
112168
df = pd.read_excel(
113-
CBO_10yr_macro_URL,
169+
ten_year_macro_url,
114170
sheet_name="2. Calendar Year",
115171
skiprows=6,
116172
nrows=131,
117173
)
118-
df.rename(columns={"Unnamed: 1": "variable"}, inplace=True)
119-
df.drop(columns=["Unnamed: 0", "Unnamed: 2", "Units"], inplace=True)
174+
df.rename(columns={"Unnamed: 0": "variable"}, inplace=True)
120175
# Note that real values come second (after nominal values)
121176
df.drop_duplicates(subset="variable", keep="last", inplace=True)
177+
df.drop(columns=["Units"], inplace=True)
122178
df3 = df[~pd.isnull(df.variable)]
179+
# cast all year columns to float
180+
df3.iloc[:, 1:] = df3.iloc[:, 1:].astype(float)
181+
# cast all year column names to int
182+
df3.columns = [
183+
int(i) if isinstance(i, str) and i.isdigit() else i
184+
for i in df3.columns
185+
]
186+
# it's creating a lot of NaN values in the final dataframe
123187
df_st = pd.concat([df1, df2, df3], sort=False, ignore_index=True)
124-
# df_st = df1.append(df2, sort=False, ignore_index=True).append(
125-
# df3, sort=False, ignore_index=True
126-
# )
188+
127189
df_st["var_name"] = df_st["variable"].replace(CBO_VAR_NAMES)
128190
df_st = df_st[~pd.isnull(df_st.var_name)]
129191
df_st.drop(columns=["variable"], inplace=True)
130192
# reshape so each row a year
131193
df_st = pd.melt(
132-
df_st, id_vars="var_name", value_vars=[i for i in range(2017, 2031)]
194+
df_st,
195+
id_vars="var_name",
196+
value_vars=[i for i in range(st_start_year, st_end_year + 1)],
133197
)
134198
df_st = df_st.pivot(
135199
index="variable", columns="var_name", values="value"
136200
).reset_index()
137201
df_st.rename(columns={"variable": "year"}, inplace=True)
138202

203+
# %%
139204
# merge with long term data
140205
df_cbo = df_lt.merge(
141206
df_st, how="outer", on="year", suffixes=("_lt", "_st")

tests/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ def test_read_cbo_forecast():
99
test_df = read_cbo_forecast()
1010

1111
assert np.allclose(
12-
test_df.loc[test_df["year"] == 2017, "Y"].values[0], 20344
12+
test_df.loc[test_df["year"] == 2026, "Y"].values[0], 24.2205
1313
)

0 commit comments

Comments
 (0)