77import ssl
88
99
10- def read_cbo_forecast ():
10+ def read_cbo_forecast (
11+ lt_econ_url = "https://www.cbo.gov/system/files/2025-03/57054-2025-03-LTBO-econ.xlsx" ,
12+ lt_budget_url = "https://www.cbo.gov/system/files/2025-03/51119-2025-03-LTBO-budget.xlsx" ,
13+ ten_year_budget_url = "https://www.cbo.gov/system/files/2025-01/51118-2025-01-Budget-Projections.xlsx" ,
14+ ten_year_macro_url = "https://www.cbo.gov/system/files/2025-01/51135-2025-01-Economic-Projections.xlsx" ,
15+ lt_start_year = 1995 ,
16+ lt_end_year = 2055 ,
17+ st_start_year = 2024 ,
18+ st_end_year = 2035 ,
19+ ):
1120 """
1221 This function reads the CBO Long-Term Budget Projections document
1322 from https://www.cbo.gov/about/products/budget-economic-data#1
14- and then formats the relevant data for use with OG-Core
23+ and then formats the relevant data for use with OG-Core.
24+
25+ Warning: CBO spreadsheets are not consistent across years so you may
26+ run into errors passing different URLs to this function.
1527 """
16- CBO_LT_URL = (
17- "https://www.cbo.gov/system/files/2020-09/51119-2020-09-ltbo_0.xlsx"
18- )
19- # Read in data
20- df = pd .read_excel (
21- CBO_LT_URL , sheet_name = "3. Economic Vars" , skiprows = 7 , nrows = 45
22- )
23- df .drop (columns = ["Unnamed: 3" , "Unnamed: 4" ], inplace = True )
24- df [
25- ~ (
26- (pd .isnull (df ["Unnamed: 0" ]))
27- & (pd .isnull (df ["Unnamed: 1" ]))
28- & (pd .isnull (df ["Unnamed: 2" ]))
29- )
30- ]
31- # df.fillna(value=np.nan, inplace=True)
32- df .fillna (value = "" , inplace = True )
33- df ["full_var_name" ] = (
34- df ["Unnamed: 0" ] + df ["Unnamed: 1" ] + df ["Unnamed: 2" ]
35- )
3628 CBO_VAR_NAMES = {
37- "Real GDP (Billions of 2019 dollars) " : "Y" ,
38- "On 10-year Treasury notes and the OASDI trust funds " : "r" ,
39- "Growth of Real Earnings per Worker " : "w_growth" ,
40- "Growth of Total Hours Worked " : "L_growth" ,
41- "Hours of All Persons (Nonfarm Business Sector )" : "L" ,
42- "Personal Consumption Expenditures " : "C" ,
43- "Gross Private Domestic Investment " : "I_total" ,
29+ "Real GDP (trillions of 2017 dollars)" : "Y" ,
30+ "Real rates " : "r" ,
31+ "Growth of real earnings per worker " : "w_growth" ,
32+ "Growth of total hours worked " : "L_growth" ,
33+ "Hours of All Persons (nonfarm business sector )" : "L" ,
34+ "Personal consumption expenditures " : "C" ,
35+ "Gross private domestic investment " : "I_total" ,
4436 "Government Consumption Expenditures and Gross Investment" : "G" ,
4537 "Old-Age and Survivors Insurance" : "agg_pension_outlays" ,
4638 "Individual income taxes" : "iit_revenue" ,
4739 "Payroll taxes" : "payroll_tax_revenue" ,
4840 "Corporate income taxes" : "business_tax_revenue" ,
49- "Wages and Salaries " : "wL" ,
41+ "U.S. wage and salary disbursements \n (trillions of dollars) " : "wL" ,
5042 }
51- df ["var_name" ] = df ["full_var_name" ].replace (CBO_VAR_NAMES )
43+
44+ # Econ data in levels
45+ # Read in data
46+ df = pd .read_excel (
47+ lt_econ_url ,
48+ sheet_name = "3. Econ Vars_Annual Levels" ,
49+ skiprows = 6 ,
50+ nrows = 62 ,
51+ )
52+ # replace column names with full variable names
53+ df .rename (columns = CBO_VAR_NAMES , inplace = True )
54+ # keep only variables that map to model variables
55+ df .set_index ("Year" , inplace = True )
56+ df_levels = df .loc [:, df .columns .isin (CBO_VAR_NAMES .values ())]
57+ df_levels .reset_index (inplace = True )
58+
59+ # Econ data in rates
60+ # Read in data
61+ df = pd .read_excel (
62+ lt_econ_url ,
63+ sheet_name = "1. Econ Vars_Annual Rates" ,
64+ skiprows = 7 ,
65+ nrows = 39 ,
66+ )
67+ df [~ ((pd .isnull (df ["Unnamed: 0" ])))]
68+ df .rename (columns = {"Unnamed: 0" : "variable" }, inplace = True )
69+ df ["var_name" ] = df ["variable" ].replace (CBO_VAR_NAMES )
5270 # keep just variables of interest
5371 df .drop (
54- columns = ["Unnamed: 0" , "Unnamed: 1" , "Unnamed: 2" , "full_var_name " ],
72+ columns = ["variable " ],
5573 inplace = True ,
5674 )
5775 df = df [df ["var_name" ].isin (CBO_VAR_NAMES .values ())]
@@ -60,82 +78,129 @@ def read_cbo_forecast():
6078 df .drop_duplicates (subset = "var_name" , inplace = True )
6179 # reshape so that variable names down column
6280 df = pd .melt (
63- df , id_vars = "var_name" , value_vars = [i for i in range (1990 , 2051 )]
81+ df ,
82+ id_vars = "var_name" ,
83+ value_vars = [i for i in range (lt_start_year , lt_end_year + 1 )],
6484 )
65- df = df .pivot (index = "variable" , columns = "var_name" , values = "value" )
66- df .reset_index (inplace = True )
67- df .rename (columns = {"variable" : "year" }, inplace = True )
68- # add debt forcast
85+ df_rates = df .pivot (index = "variable" , columns = "var_name" , values = "value" )
86+ df_rates .reset_index (inplace = True )
87+ df_rates .rename (columns = {"variable" : "Year" }, inplace = True )
88+
89+ # add debt forecast
6990 df_fiscal = pd .read_excel (
70- CBO_LT_URL ,
71- sheet_name = "1. Summary Extended Baseline" ,
91+ lt_budget_url , # Need to define this variable in args or at the top
92+ sheet_name = "1. Summary Ext Baseline" ,
7293 skiprows = 9 ,
7394 nrows = 32 ,
7495 )
7596 df_fiscal = df_fiscal [
76- ["Fiscal Year " , "Revenues " , "Federal Debt Held by the Public " ]
97+ ["Fiscal year " , "Total " , "Federal debt held by the public " ]
7798 ]
78- df_lt = df .merge (
79- df_fiscal , left_on = "year" , right_on = "Fiscal Year" , how = "left"
99+ # rename Total to "Revenues"
100+ df_fiscal .rename (columns = {"Total" : "Revenues" }, inplace = True )
101+ # merge to macro levels data
102+ df_lt = df_fiscal .merge (
103+ df_levels , left_on = "Fiscal year" , right_on = "Year" , how = "left"
104+ )
105+ # merge to macro rates data
106+ df_lt = df_lt .merge (
107+ df_rates , left_on = "Fiscal year" , right_on = "Year" , how = "left"
80108 )
81109 df_lt .rename (
82- columns = {"Federal Debt Held by the Public " : "D/Y" }, inplace = True
110+ columns = {"Federal debt held by the public " : "D/Y" }, inplace = True
83111 )
84112 df_lt ["D" ] = df_lt ["Y" ] * df_lt ["D/Y" ]
85-
86- CBO_10yr_budget_URL = (
87- "https://www.cbo.gov/system/files/2021-02/51118-2021-02-11-"
88- + "budgetprojections.xlsx"
89- )
113+ # drop Year_x, Year_y columns
114+ df_lt .drop (columns = ["Year_x" , "Year_y" ], inplace = True )
115+ # rename Fiscal year to year
116+ df_lt .rename (columns = {"Fiscal year" : "year" }, inplace = True )
117+ # %%
118+ # 10 year budget
90119 df = pd .read_excel (
91- CBO_10yr_budget_URL , sheet_name = "Table 1 -1" , skiprows = 8 , nrows = 7
120+ ten_year_budget_url , sheet_name = "Table B -1" , skiprows = 7 , nrows = 7
92121 )
93122 df .rename (
94- columns = {"Unnamed: 0" : "variable" , "Actual, \n 2020" : 2020 },
123+ columns = {
124+ "Unnamed: 0" : "variable" ,
125+ "Actual, 2024" : st_start_year ,
126+ },
95127 inplace = True ,
96128 )
97- df .drop (columns = ["2026.1" , "2031.1" ], inplace = True )
98- df1 = df [~ ((pd .isnull (df .variable )) | (df .variable == "Other" ))]
99-
129+ df .drop (columns = ["2026–2030" , "2026–2035" ], inplace = True )
130+ df1 = df [
131+ ~ (
132+ (pd .isnull (df .variable ))
133+ | (df .variable == "Other" )
134+ | (df .variable == "Revenues" )
135+ )
136+ ]
137+ # cast all year columns to float
138+ df1 .iloc [:, 1 :] = df1 .iloc [:, 1 :].astype (float )
139+ # cast all year column names to int
140+ df1 .columns = [
141+ int (i ) if isinstance (i , str ) and i .isdigit () else i
142+ for i in df1 .columns
143+ ]
144+ # data from other table
100145 df = pd .read_excel (
101- CBO_10yr_budget_URL , sheet_name = "Table 1-3 " , skiprows = 9 , nrows = 22
146+ ten_year_budget_url , sheet_name = "Table B-4 " , skiprows = 8 , nrows = 18
102147 )
103- df .rename (columns = {"Unnamed: 0" : "variable" }, inplace = True )
104- df .drop (columns = ["2026.1" , "2031.1" ], inplace = True )
148+ df .rename (
149+ columns = {
150+ "Unnamed: 0" : "variable" ,
151+ "Actual, 2024" : st_start_year ,
152+ },
153+ inplace = True ,
154+ )
155+ df .drop (columns = ["2026–2030" , "2026–2035" ], inplace = True )
105156 df .drop_duplicates (subset = "variable" , keep = "last" , inplace = True )
106157 df2 = df [~ pd .isnull (df .variable )]
158+ # cast all year columns to float
159+ df2 .iloc [:, 1 :] = df2 .iloc [:, 1 :].astype (float )
160+ # cast all year column names to int
161+ df2 .columns = [
162+ int (i ) if isinstance (i , str ) and i .isdigit () else i
163+ for i in df2 .columns
164+ ]
107165
108- CBO_10yr_macro_URL = (
109- "https://www.cbo.gov/system/files/2021-02/51135-2021-02-"
110- + "economicprojections.xlsx"
111- )
166+ # %%
167+ # 10 year macro forecast
112168 df = pd .read_excel (
113- CBO_10yr_macro_URL ,
169+ ten_year_macro_url ,
114170 sheet_name = "2. Calendar Year" ,
115171 skiprows = 6 ,
116172 nrows = 131 ,
117173 )
118- df .rename (columns = {"Unnamed: 1" : "variable" }, inplace = True )
119- df .drop (columns = ["Unnamed: 0" , "Unnamed: 2" , "Units" ], inplace = True )
174+ df .rename (columns = {"Unnamed: 0" : "variable" }, inplace = True )
120175 # Note that real values come second (after nominal values)
121176 df .drop_duplicates (subset = "variable" , keep = "last" , inplace = True )
177+ df .drop (columns = ["Units" ], inplace = True )
122178 df3 = df [~ pd .isnull (df .variable )]
179+ # cast all year columns to float
180+ df3 .iloc [:, 1 :] = df3 .iloc [:, 1 :].astype (float )
181+ # cast all year column names to int
182+ df3 .columns = [
183+ int (i ) if isinstance (i , str ) and i .isdigit () else i
184+ for i in df3 .columns
185+ ]
186+ # it's creating a lot of NaN values in the final dataframe
123187 df_st = pd .concat ([df1 , df2 , df3 ], sort = False , ignore_index = True )
124- # df_st = df1.append(df2, sort=False, ignore_index=True).append(
125- # df3, sort=False, ignore_index=True
126- # )
188+
127189 df_st ["var_name" ] = df_st ["variable" ].replace (CBO_VAR_NAMES )
128190 df_st = df_st [~ pd .isnull (df_st .var_name )]
129191 df_st .drop (columns = ["variable" ], inplace = True )
130192 # reshape so each row a year
131193 df_st = pd .melt (
132- df_st , id_vars = "var_name" , value_vars = [i for i in range (2017 , 2031 )]
194+ df_st ,
195+ id_vars = "var_name" ,
196+ value_vars = [i for i in range (st_start_year , st_end_year + 1 )],
133197 )
134198 df_st = df_st .pivot (
135199 index = "variable" , columns = "var_name" , values = "value"
136200 ).reset_index ()
137201 df_st .rename (columns = {"variable" : "year" }, inplace = True )
138202
203+ # %%
139204 # merge with long term data
140205 df_cbo = df_lt .merge (
141206 df_st , how = "outer" , on = "year" , suffixes = ("_lt" , "_st" )
0 commit comments