|
4 | 4 | import scipy |
5 | 5 | from statsmodels.nonparametric.kernel_regression import KernelReg |
6 | 6 | from scipy.interpolate import UnivariateSpline |
| 7 | +from scipy.linalg import lstsq |
7 | 8 |
|
8 | 9 |
|
9 | 10 | class IOT: |
@@ -127,33 +128,49 @@ def compute_mtr_dist( |
127 | 128 | * mtr_prime (array_like): rate of change in marginal tax rates |
128 | 129 | for each income bin |
129 | 130 | """ |
130 | | - bins = 1000 # number of equal-width bins |
131 | | - data.loc[:, ["z_bin"]] = pd.cut( |
132 | | - data[income_measure], bins, include_lowest=True |
133 | | - ) |
134 | | - binned_data = pd.DataFrame( |
135 | | - data[["mtr", income_measure, "z_bin", weight_var]] |
136 | | - .groupby(["z_bin"], observed=False) |
137 | | - .apply(lambda x: wm(x[["mtr", income_measure]], x[weight_var])) |
138 | | - ) |
139 | | - # make column 0 into two columns |
140 | | - binned_data[["mtr", income_measure]] = pd.DataFrame( |
141 | | - binned_data[0].tolist(), index=binned_data.index |
142 | | - ) |
143 | | - binned_data.drop(columns=0, inplace=True) |
144 | | - binned_data.reset_index(inplace=True) |
| 131 | + |
145 | 132 | if mtr_smoother == "kreg": |
| 133 | + bins = 1000 # number of equal-width bins |
| 134 | + data.loc[:, ["z_bin"]] = pd.cut( |
| 135 | + data[income_measure], bins, include_lowest=True |
| 136 | + ) |
| 137 | + binned_data = pd.DataFrame( |
| 138 | + data[["mtr", income_measure, "z_bin", weight_var]] |
| 139 | + .groupby(["z_bin"], observed=False) |
| 140 | + .apply(lambda x: wm(x[["mtr", income_measure]], x[weight_var])) |
| 141 | + ) |
| 142 | + # make column 0 into two columns |
| 143 | + binned_data[["mtr", income_measure]] = pd.DataFrame( |
| 144 | + binned_data[0].tolist(), index=binned_data.index |
| 145 | + ) |
| 146 | + binned_data.drop(columns=0, inplace=True) |
| 147 | + binned_data.reset_index(inplace=True) |
146 | 148 | mtr_function = KernelReg( |
147 | 149 | binned_data["mtr"].dropna(), |
148 | 150 | binned_data[income_measure].dropna(), |
149 | 151 | var_type="c", |
150 | 152 | reg_type="ll", |
151 | 153 | ) |
152 | 154 | mtr, _ = mtr_function.fit(self.z) |
| 155 | + mtr_prime = np.gradient(mtr, edge_order=2) |
| 156 | + elif mtr_smoother == "HSV": |
| 157 | + # estimate the HSV function on mtrs via weighted least squares |
| 158 | + X = np.log(data[income_measure].values) |
| 159 | + X = np.column_stack((np.ones(len(X)), X)) |
| 160 | + w = np.array(data[weight_var].values) |
| 161 | + w_sqrt = np.sqrt(w) |
| 162 | + y = np.log(1-data["mtr"].values) |
| 163 | + X_weighted = X * w_sqrt[:, np.newaxis] |
| 164 | + y_weighted = y * w_sqrt |
| 165 | + coef, _, _, _ = lstsq(X_weighted, y_weighted) |
| 166 | + tau = -coef[1] |
| 167 | + lambda_param = np.exp(coef[0]) / (1-tau) |
| 168 | + mtr = 1 - lambda_param * (1-tau) * self.z ** (-tau) |
| 169 | + mtr_prime = lambda_param * tau * (1-tau) * self.z ** (-tau - 1) |
153 | 170 | else: |
154 | 171 | print("Please enter a value mtr_smoother method") |
155 | 172 | assert False |
156 | | - mtr_prime = np.gradient(mtr, edge_order=2) |
| 173 | + |
157 | 174 |
|
158 | 175 | return mtr, mtr_prime |
159 | 176 |
|
@@ -336,20 +353,22 @@ def sw_weights(self): |
336 | 353 | + ((self.theta_z * self.eti * self.mtr) / (1 - self.mtr)) |
337 | 354 | + ((self.eti * self.z * self.mtr_prime) / (1 - self.mtr) ** 2) |
338 | 355 | ) |
339 | | - integral = np.trapz(g_z, self.z) |
340 | | - # g_z = g_z / integral |
| 356 | + integral = np.trapz(g_z * self.f, self.z) |
| 357 | + g_z = g_z / integral |
| 358 | + |
341 | 359 | # use Lockwood and Weinzierl formula, which should be equivalent but using numerical differentiation |
342 | 360 | bracket_term = ( |
343 | 361 | 1 |
344 | 362 | - self.F |
345 | 363 | - (self.mtr / (1 - self.mtr)) * self.eti * self.z * self.f |
346 | 364 | ) |
347 | | - # d_dz_bracket = np.gradient(bracket_term, edge_order=2) |
348 | | - d_dz_bracket = np.diff(bracket_term) / np.diff(self.z) |
349 | | - d_dz_bracket = np.append(d_dz_bracket, d_dz_bracket[-1]) |
| 365 | + d_dz_bracket = np.gradient(bracket_term, edge_order=2) |
| 366 | + # d_dz_bracket = np.diff(bracket_term) / np.diff(self.z) |
| 367 | + # d_dz_bracket = np.append(d_dz_bracket, d_dz_bracket[-1]) |
350 | 368 | g_z_numerical = -(1 / self.f) * d_dz_bracket |
351 | | - integral = np.trapz(g_z_numerical, self.z) |
352 | | - # g_z_numerical = g_z_numerical / integral |
| 369 | + integral = np.trapz(g_z_numerical * self.f, self.z) |
| 370 | + g_z_numerical = g_z_numerical / integral |
| 371 | + |
353 | 372 | return g_z, g_z_numerical |
354 | 373 |
|
355 | 374 |
|
|
0 commit comments