From 25070d0f215a15d4c4e8145ca0ff8524f8eeeda3 Mon Sep 17 00:00:00 2001 From: eirki Date: Thu, 27 Feb 2020 10:45:59 +0100 Subject: [PATCH 1/9] ENH: Add Hausman specification test --- linearmodels/panel/results.py | 125 ++++++++++++++++++++++- linearmodels/tests/panel/test_results.py | 30 +++++- 2 files changed, 152 insertions(+), 3 deletions(-) diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 9e48922a0e..70dbc9b6e5 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -1,7 +1,8 @@ from linearmodels.compat.statsmodels import Summary import datetime as dt -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, Tuple +import warnings import numpy as np from pandas import DataFrame, Series, concat @@ -11,7 +12,11 @@ from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat from linearmodels.shared.base import _ModelComparison, _SummaryStr -from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test +from linearmodels.shared.hypotheses import ( + WaldTestStatistic, + InvalidTestStatistic, + quadratic_form_test, +) from linearmodels.shared.io import _str, pval_format from linearmodels.shared.utility import AttrDict from linearmodels.typing import NDArray, OptionalArrayLike @@ -1002,3 +1007,119 @@ def compare( The model comparison object. """ return PanelModelComparison(results, precision=precision) + + +def hausman( + consistent: PanelResults, + efficient: PanelResults, + include_constant: bool = False, + sigmamore: bool = False, + sigmaless: bool = False, +) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]: + r""" + Perform Hausman specification test on two models. + + Parameters + ---------- + consistent : PanelResults + Result from panel regression, known to be consistent. Typically + fixed effects regression. + efficient : PanelResults + Result from panel regression, known to be efficient. Typically + random effects regression. + include_constant : bool, optional + Flag indicating whether to include the constant term in the comparison. + sigmamore : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the efficient model. + sigmaless : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the consistent model. + + Returns + ------- + WaldTestStatistic + Object containing test statistic, p-value, distribution and null + DataFrame + Overview of coefficients used in the test, and their differences and standard errors + + Notes + ----- + The test is computed by + .. math:: + H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0}) + + where :math:`b_{1}` is the array of coefficients from the model known to be consistent, and + :math:`b_{1}` is the array of coefficients from the model known to be efficient. + + """ + + def alt_cov(res: PanelResults, sigma: float) -> DataFrame: + """ + Calculate covariance using the supplied error variance. Based on + https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119 + """ + cov_obj = res._deferred_cov.__self__ + x = cov_obj._x + out = sigma * np.linalg.inv(x.T @ x) + out = (out + out.T) / 2 + return DataFrame(out, columns=res.model.exog.vars, index=res.model.exog.vars) + + def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: + """ + Check if matrix is positive definite. + """ + if np.array_equal(mat, mat.T): + try: + np.linalg.cholesky(mat) + return True + except np.linalg.LinAlgError: + pass + return False + + if sigmamore and sigmaless: + raise ValueError("Conflicting test parameters") + + common_cols = set(consistent.params.index) & set(efficient.params.index) + if not include_constant: + if consistent.model.has_constant: + common_cols.discard(consistent.model.exog.vars[consistent.model._constant_index]) + if efficient.model.has_constant: + common_cols.discard(efficient.model.exog.vars[efficient.model._constant_index]) + + b0 = consistent.params[common_cols] + b1 = efficient.params[common_cols] + if sigmamore or sigmaless: + s2 = efficient.s2 if sigmamore else consistent.s2 + var0 = alt_cov(consistent, s2).loc[common_cols, common_cols] + var1 = alt_cov(efficient, s2).loc[common_cols, common_cols] + else: + var0 = consistent.cov.loc[common_cols, common_cols] + var1 = efficient.cov.loc[common_cols, common_cols] + + var_diff = var0 - var1 + b_diff = b0 - b1 + std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index) + estimates = DataFrame( + data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors} + ) + if not matrix_positive_definite(var_diff): + warnings.warn("(Var(b0) - Var(b1) is not positive definite)") + inv = np.linalg.inv + else: + inv = np.linalg.pinv + test_stat = b_diff.T @ inv(var_diff) @ b_diff + + test: Union[InvalidTestStatistic, WaldTestStatistic] + if test_stat >= 0: + test = WaldTestStatistic( + test_stat, + null="No systematic difference in coefficients between models", + df=b0.size, + name="Hausman specification test", + ) + else: + test = InvalidTestStatistic( + "chi2<0. Model does not meet the assumptions of the Hausman test." + ) + return test, estimates diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index 3f0876c2af..4d28f84aaa 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -1,3 +1,4 @@ +from functools import partial from itertools import product import numpy as np @@ -10,7 +11,7 @@ from linearmodels.iv.model import IV2SLS from linearmodels.panel.data import PanelData from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects -from linearmodels.panel.results import compare +from linearmodels.panel.results import compare, hausman from linearmodels.tests.panel._utility import datatypes, generate_data @@ -170,3 +171,30 @@ def test_wald_test(data): with pytest.raises(ValueError): res.wald_test(restriction, np.zeros(2), formula=formula) + + +@pytest.mark.parametrize("include_constant", (False, True), ids=("", "include_constant")) +@pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore")) +@pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless")) +def test_hausman_test(recwarn, data, include_constant, sigmamore, sigmaless): + dependent = data.set_index(["nr", "year"]).lwage + exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]]) + fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() + re_res = RandomEffects(dependent, exog).fit() + func = partial( + hausman, + consistent=fe_res, + efficient=re_res, + include_constant=include_constant, + sigmamore=sigmamore, + sigmaless=sigmaless, + ) + if sigmamore and sigmaless: + with pytest.raises(ValueError): + func() + else: + wald, estimates = func() + if include_constant: + warnings = {str(warn.message) for warn in recwarn} + assert 'invalid value encountered in sqrt' in warnings + assert '(Var(b0) - Var(b1) is not positive definite)' in warnings From b0104dec5b46167449782faed1f0acee85237010 Mon Sep 17 00:00:00 2001 From: eirki Date: Thu, 27 Feb 2020 12:55:51 +0100 Subject: [PATCH 2/9] Hausman: remove uneeded cov calculations --- linearmodels/panel/results.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 70dbc9b6e5..dff3ab9144 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -1054,14 +1054,14 @@ def hausman( """ - def alt_cov(res: PanelResults, sigma: float) -> DataFrame: + def alt_cov(res: PanelResults, s2: float) -> DataFrame: """ Calculate covariance using the supplied error variance. Based on https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119 """ cov_obj = res._deferred_cov.__self__ x = cov_obj._x - out = sigma * np.linalg.inv(x.T @ x) + out = s2 * np.linalg.inv(x.T @ x) out = (out + out.T) / 2 return DataFrame(out, columns=res.model.exog.vars, index=res.model.exog.vars) @@ -1089,13 +1089,16 @@ def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: b0 = consistent.params[common_cols] b1 = efficient.params[common_cols] - if sigmamore or sigmaless: - s2 = efficient.s2 if sigmamore else consistent.s2 - var0 = alt_cov(consistent, s2).loc[common_cols, common_cols] - var1 = alt_cov(efficient, s2).loc[common_cols, common_cols] - else: - var0 = consistent.cov.loc[common_cols, common_cols] - var1 = efficient.cov.loc[common_cols, common_cols] + var0 = ( + consistent.cov + if not sigmamore + else alt_cov(consistent, s2=efficient.s2) + ).loc[common_cols, common_cols] + var1 = ( + efficient.cov + if not sigmaless + else alt_cov(efficient, s2=consistent.s2) + ).loc[common_cols, common_cols] var_diff = var0 - var1 b_diff = b0 - b1 From 29bb00cf5578b99843a752b005a11fb5e0d25b86 Mon Sep 17 00:00:00 2001 From: eirki Date: Thu, 27 Feb 2020 14:32:53 +0100 Subject: [PATCH 3/9] Hausman: raise error if clustered/robust cov. Formatting changes. --- linearmodels/panel/results.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index dff3ab9144..aa9ee6f357 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -17,6 +17,7 @@ InvalidTestStatistic, quadratic_form_test, ) +from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance from linearmodels.shared.io import _str, pval_format from linearmodels.shared.utility import AttrDict from linearmodels.typing import NDArray, OptionalArrayLike @@ -27,6 +28,7 @@ "RandomEffectsResults", "FamaMacBethResults", "compare", + "hausman", ] @@ -1080,24 +1082,33 @@ def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: if sigmamore and sigmaless: raise ValueError("Conflicting test parameters") + invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance) + if any( + isinstance(res._deferred_cov.__self__, invalid_cov) + for res in (consistent, efficient) + ): + raise TypeError( + "Hausman test cannot be used with clustered or robust covariance" + ) + common_cols = set(consistent.params.index) & set(efficient.params.index) if not include_constant: if consistent.model.has_constant: - common_cols.discard(consistent.model.exog.vars[consistent.model._constant_index]) + common_cols.discard( + consistent.model.exog.vars[consistent.model._constant_index] + ) if efficient.model.has_constant: - common_cols.discard(efficient.model.exog.vars[efficient.model._constant_index]) + common_cols.discard( + efficient.model.exog.vars[efficient.model._constant_index] + ) b0 = consistent.params[common_cols] b1 = efficient.params[common_cols] var0 = ( - consistent.cov - if not sigmamore - else alt_cov(consistent, s2=efficient.s2) + consistent.cov if not sigmamore else alt_cov(consistent, s2=efficient.s2) ).loc[common_cols, common_cols] var1 = ( - efficient.cov - if not sigmaless - else alt_cov(efficient, s2=consistent.s2) + efficient.cov if not sigmaless else alt_cov(efficient, s2=consistent.s2) ).loc[common_cols, common_cols] var_diff = var0 - var1 From 25bb7e4a3d6ca34945f6bc0c007123ceec49d7d9 Mon Sep 17 00:00:00 2001 From: eirki Date: Fri, 28 Feb 2020 14:33:32 +0100 Subject: [PATCH 4/9] Wu-Hausman: move to RandomEffectsResults method. Rename --- linearmodels/panel/results.py | 251 +++++++++++------------ linearmodels/tests/panel/test_results.py | 7 +- 2 files changed, 125 insertions(+), 133 deletions(-) diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index aa9ee6f357..2bca27b963 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -28,7 +28,6 @@ "RandomEffectsResults", "FamaMacBethResults", "compare", - "hausman", ] @@ -797,6 +796,128 @@ def theta(self) -> DataFrame: """Values used in generalized demeaning""" return self._theta + def wu_hausman( + self, + other: PanelResults, + include_constant: bool = False, + sigmamore: bool = False, + sigmaless: bool = False, + ) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]: + r""" + Perform Hausman specification test against other regression results. + + Parameters + ---------- + other : PanelResults + Result from panel regression known to be consistent. Typically + fixed effects regression. + include_constant : bool, optional + Flag indicating whether to include the constant term in the comparison. + sigmamore : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the efficient model. + sigmaless : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the consistent model. + + Returns + ------- + WaldTestStatistic + Object containing test statistic, p-value, distribution and null + DataFrame + Overview of coefficients used in the test, and their differences and standard errors + + Notes + ----- + The test is computed by + .. math:: + H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0}) + + where :math:`b_{1}` is the array of coefficients from the model known to be consistent, + and :math:`b_{1}` is the array of coefficients from the model known to be efficient + (this model). + + """ + + def alt_cov(res: PanelResults, s2: float) -> DataFrame: + """ + Calculate covariance using the supplied error variance. Based on + https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119 + """ + cov_obj = res._deferred_cov.__self__ + x = cov_obj._x + out = s2 * np.linalg.inv(x.T @ x) + out = (out + out.T) / 2 + return DataFrame( + out, columns=res.model.exog.vars, index=res.model.exog.vars + ) + + def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: + """ + Check if matrix is positive definite. + """ + if np.array_equal(mat, mat.T): + try: + np.linalg.cholesky(mat) + return True + except np.linalg.LinAlgError: + pass + return False + + if sigmamore and sigmaless: + raise ValueError("Conflicting test parameters") + + invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance) + if any( + isinstance(res._deferred_cov.__self__, invalid_cov) for res in (other, self) + ): + raise TypeError( + "Hausman test cannot be used with clustered or robust covariance" + ) + + common_cols = set(other.params.index) & set(self.params.index) + if not include_constant: + if other.model.has_constant: + common_cols.discard(other.model.exog.vars[other.model._constant_index]) + if self.model.has_constant: + common_cols.discard(self.model.exog.vars[self.model._constant_index]) + + b0 = other.params[common_cols] + b1 = self.params[common_cols] + var0 = (other.cov if not sigmamore else alt_cov(other, s2=self.s2)).loc[ + common_cols, common_cols + ] + var1 = (self.cov if not sigmaless else alt_cov(self, s2=other.s2)).loc[ + common_cols, common_cols + ] + + var_diff = var0 - var1 + b_diff = b0 - b1 + std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index) + estimates = DataFrame( + data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors} + ) + if not matrix_positive_definite(var_diff): + warnings.warn("(Var(b0) - Var(b1) is not positive definite)") + inv = np.linalg.inv + else: + inv = np.linalg.pinv + test_stat = b_diff.T @ inv(var_diff) @ b_diff + + test: Union[InvalidTestStatistic, WaldTestStatistic] + if test_stat >= 0: + test = WaldTestStatistic( + test_stat, + null="No systematic difference in coefficients between models", + df=b0.size, + name="Hausman specification test", + ) + else: + test = InvalidTestStatistic( + "chi2<0. Model does not meet the assumptions of the Hausman test." + ) + return test, estimates + PanelModelResults = Union[PanelEffectsResults, PanelResults, RandomEffectsResults] @@ -1009,131 +1130,3 @@ def compare( The model comparison object. """ return PanelModelComparison(results, precision=precision) - - -def hausman( - consistent: PanelResults, - efficient: PanelResults, - include_constant: bool = False, - sigmamore: bool = False, - sigmaless: bool = False, -) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]: - r""" - Perform Hausman specification test on two models. - - Parameters - ---------- - consistent : PanelResults - Result from panel regression, known to be consistent. Typically - fixed effects regression. - efficient : PanelResults - Result from panel regression, known to be efficient. Typically - random effects regression. - include_constant : bool, optional - Flag indicating whether to include the constant term in the comparison. - sigmamore : bool, optional - Flag indicating whether to base the test on the estimated parameter - covariance from the efficient model. - sigmaless : bool, optional - Flag indicating whether to base the test on the estimated parameter - covariance from the consistent model. - - Returns - ------- - WaldTestStatistic - Object containing test statistic, p-value, distribution and null - DataFrame - Overview of coefficients used in the test, and their differences and standard errors - - Notes - ----- - The test is computed by - .. math:: - H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0}) - - where :math:`b_{1}` is the array of coefficients from the model known to be consistent, and - :math:`b_{1}` is the array of coefficients from the model known to be efficient. - - """ - - def alt_cov(res: PanelResults, s2: float) -> DataFrame: - """ - Calculate covariance using the supplied error variance. Based on - https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119 - """ - cov_obj = res._deferred_cov.__self__ - x = cov_obj._x - out = s2 * np.linalg.inv(x.T @ x) - out = (out + out.T) / 2 - return DataFrame(out, columns=res.model.exog.vars, index=res.model.exog.vars) - - def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: - """ - Check if matrix is positive definite. - """ - if np.array_equal(mat, mat.T): - try: - np.linalg.cholesky(mat) - return True - except np.linalg.LinAlgError: - pass - return False - - if sigmamore and sigmaless: - raise ValueError("Conflicting test parameters") - - invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance) - if any( - isinstance(res._deferred_cov.__self__, invalid_cov) - for res in (consistent, efficient) - ): - raise TypeError( - "Hausman test cannot be used with clustered or robust covariance" - ) - - common_cols = set(consistent.params.index) & set(efficient.params.index) - if not include_constant: - if consistent.model.has_constant: - common_cols.discard( - consistent.model.exog.vars[consistent.model._constant_index] - ) - if efficient.model.has_constant: - common_cols.discard( - efficient.model.exog.vars[efficient.model._constant_index] - ) - - b0 = consistent.params[common_cols] - b1 = efficient.params[common_cols] - var0 = ( - consistent.cov if not sigmamore else alt_cov(consistent, s2=efficient.s2) - ).loc[common_cols, common_cols] - var1 = ( - efficient.cov if not sigmaless else alt_cov(efficient, s2=consistent.s2) - ).loc[common_cols, common_cols] - - var_diff = var0 - var1 - b_diff = b0 - b1 - std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index) - estimates = DataFrame( - data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors} - ) - if not matrix_positive_definite(var_diff): - warnings.warn("(Var(b0) - Var(b1) is not positive definite)") - inv = np.linalg.inv - else: - inv = np.linalg.pinv - test_stat = b_diff.T @ inv(var_diff) @ b_diff - - test: Union[InvalidTestStatistic, WaldTestStatistic] - if test_stat >= 0: - test = WaldTestStatistic( - test_stat, - null="No systematic difference in coefficients between models", - df=b0.size, - name="Hausman specification test", - ) - else: - test = InvalidTestStatistic( - "chi2<0. Model does not meet the assumptions of the Hausman test." - ) - return test, estimates diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index 4d28f84aaa..b0a7d422ef 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -11,7 +11,7 @@ from linearmodels.iv.model import IV2SLS from linearmodels.panel.data import PanelData from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects -from linearmodels.panel.results import compare, hausman +from linearmodels.panel.results import compare from linearmodels.tests.panel._utility import datatypes, generate_data @@ -182,9 +182,8 @@ def test_hausman_test(recwarn, data, include_constant, sigmamore, sigmaless): fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() re_res = RandomEffects(dependent, exog).fit() func = partial( - hausman, - consistent=fe_res, - efficient=re_res, + re_res.wu_hausman, + other=fe_res, include_constant=include_constant, sigmamore=sigmamore, sigmaless=sigmaless, From 4929c0c65dbeb1ce55360bee25649f4c5e1b7cfd Mon Sep 17 00:00:00 2001 From: eirki Date: Fri, 28 Feb 2020 16:10:18 +0100 Subject: [PATCH 5/9] Wu-Hausman: Test results against pre-calculated values --- linearmodels/tests/panel/test_results.py | 25 ++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index b0a7d422ef..a9bf6da8b1 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -176,7 +176,7 @@ def test_wald_test(data): @pytest.mark.parametrize("include_constant", (False, True), ids=("", "include_constant")) @pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore")) @pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless")) -def test_hausman_test(recwarn, data, include_constant, sigmamore, sigmaless): +def test_wu_husman(request, recwarn, data, include_constant, sigmamore, sigmaless): dependent = data.set_index(["nr", "year"]).lwage exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]]) fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() @@ -191,9 +191,22 @@ def test_hausman_test(recwarn, data, include_constant, sigmamore, sigmaless): if sigmamore and sigmaless: with pytest.raises(ValueError): func() + return + wald, estimates = func() + if include_constant: + warnings = {str(warn.message) for warn in recwarn} + assert 'invalid value encountered in sqrt' in warnings + assert '(Var(b0) - Var(b1) is not positive definite)' in warnings + assert estimates.shape == (4, 4) else: - wald, estimates = func() - if include_constant: - warnings = {str(warn.message) for warn in recwarn} - assert 'invalid value encountered in sqrt' in warnings - assert '(Var(b0) - Var(b1) is not positive definite)' in warnings + assert estimates.shape == (3, 4) + pre_calculated_results = { + "test_wu_husman[data0]": 112.1182236555156, + "test_wu_husman[data0-include_constant]": 112.1182236555168, + "test_wu_husman[data0-sigmamore]": 86.79072977130164, + "test_wu_husman[data0-sigmamore-include_constant]": 86.79072977130375, + "test_wu_husman[data0-sigmaless]": 88.49415027018209, + "test_wu_husman[data0-sigmaless-include_constant]": 88.49415027018102, + } + expected = pre_calculated_results[request.node.name] + assert wald.stat == pytest.approx(expected, abs=1e-9) From 1e9ba40bd82a787ea4c85fd8b7e333dd9a99858d Mon Sep 17 00:00:00 2001 From: eirki Date: Fri, 28 Feb 2020 16:38:39 +0100 Subject: [PATCH 6/9] Wu-Hausman: Omit from RandomEffectsResults access test --- linearmodels/tests/panel/_utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py index e46b5eb525..fd83f9a2a9 100644 --- a/linearmodels/tests/panel/_utility.py +++ b/linearmodels/tests/panel/_utility.py @@ -236,7 +236,7 @@ def assert_frame_similar(result, expected): def access_attributes(result): d = dir(result) for key in d: - if not key.startswith("_") and key not in ("wald_test",): + if not key.startswith("_") and key not in ("wald_test", "wu_hausman"): val = getattr(result, key) if callable(val): val() From d91465ae7c21576a2bc6834227fabe4caff65004 Mon Sep 17 00:00:00 2001 From: eirki Date: Mon, 9 Mar 2020 12:56:34 +0100 Subject: [PATCH 7/9] Wu-Hausman: Remove sqrt warning assertion from test --- linearmodels/tests/panel/test_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index a9bf6da8b1..f39b7f43de 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -195,7 +195,6 @@ def test_wu_husman(request, recwarn, data, include_constant, sigmamore, sigmales wald, estimates = func() if include_constant: warnings = {str(warn.message) for warn in recwarn} - assert 'invalid value encountered in sqrt' in warnings assert '(Var(b0) - Var(b1) is not positive definite)' in warnings assert estimates.shape == (4, 4) else: From 70a50ef57975ddb473714da9596aea60f37de433 Mon Sep 17 00:00:00 2001 From: eirki Date: Tue, 28 Apr 2020 11:10:22 +0200 Subject: [PATCH 8/9] Wu-Hausman: Include p-values in tests --- linearmodels/tests/panel/test_results.py | 57 ++++++++++++------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index f39b7f43de..914b82bf0f 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -173,39 +173,38 @@ def test_wald_test(data): res.wald_test(restriction, np.zeros(2), formula=formula) -@pytest.mark.parametrize("include_constant", (False, True), ids=("", "include_constant")) +@pytest.mark.parametrize("constant", (False, True), ids=("", "constant")) @pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore")) @pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless")) -def test_wu_husman(request, recwarn, data, include_constant, sigmamore, sigmaless): - dependent = data.set_index(["nr", "year"]).lwage - exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]]) - fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() - re_res = RandomEffects(dependent, exog).fit() - func = partial( - re_res.wu_hausman, - other=fe_res, - include_constant=include_constant, - sigmamore=sigmamore, - sigmaless=sigmaless, - ) +def test_wu_hausman(recwarn, data, constant, sigmamore, sigmaless): if sigmamore and sigmaless: - with pytest.raises(ValueError): - func() return - wald, estimates = func() - if include_constant: + data = data.set_index(["nr", "year"]) + dependent = data["hours"] + exog = add_constant(data[["exper", "expersq"]]) + re_res = RandomEffects(dependent, exog).fit() + fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() + opts = { + "include_constant": constant, + "sigmamore": sigmamore, + "sigmaless": sigmaless, + } + wald, estimates = re_res.wu_hausman(other=fe_res, **opts) + if constant: warnings = {str(warn.message) for warn in recwarn} - assert '(Var(b0) - Var(b1) is not positive definite)' in warnings - assert estimates.shape == (4, 4) - else: + assert "(Var(b0) - Var(b1) is not positive definite)" in warnings assert estimates.shape == (3, 4) - pre_calculated_results = { - "test_wu_husman[data0]": 112.1182236555156, - "test_wu_husman[data0-include_constant]": 112.1182236555168, - "test_wu_husman[data0-sigmamore]": 86.79072977130164, - "test_wu_husman[data0-sigmamore-include_constant]": 86.79072977130375, - "test_wu_husman[data0-sigmaless]": 88.49415027018209, - "test_wu_husman[data0-sigmaless-include_constant]": 88.49415027018102, + else: + assert estimates.shape == (2, 4) + stata_results = { + "": (7.190854126884934, 0.0274489582259534), + "include_constant": (7.190854126885962, 0.0660570908629669), + "sigmaless": (6.953506564342694, 0.0309075965524561), + "include_constant-sigmaless": (6.953506564340507, 0.0733945334224529), + "sigmamore": (6.945610047252053, 0.0310298689573541), + "include_constant-sigmamore": (6.94561004725098, 0.0736517192483979), } - expected = pre_calculated_results[request.node.name] - assert wald.stat == pytest.approx(expected, abs=1e-9) + test_id = "-".join([key for key, val in opts.items() if val is True]) + expected_stat, expected_pval = stata_results[test_id] + assert wald.stat == pytest.approx(expected_stat, abs=1e-9) + assert wald.pval == pytest.approx(expected_pval, abs=1e-9) From 3bf7264172293d271183e29925e79ba4c5299a93 Mon Sep 17 00:00:00 2001 From: eirki Date: Tue, 28 Apr 2020 12:17:52 +0200 Subject: [PATCH 9/9] Remove unused import --- linearmodels/tests/panel/test_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index 914b82bf0f..ce9e5405e8 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -1,4 +1,3 @@ -from functools import partial from itertools import product import numpy as np