Merge pull request #654 from bashtage/bug-dummy

bashtage · web-flow · commit 50408aaa03cf · 2025-09-09T15:16:39.000+01:00
BUG: Remove array from dummy
diff --git a/linearmodels/asset_pricing/model.py b/linearmodels/asset_pricing/model.py
@@ -634,7 +634,7 @@ def fit(
 
         # Step 1, n regressions to get B
         fc = np.c_[np.ones((nobs, 1)), f]
-        b = lstsq(fc, p, rcond=None)[0]  # nf+1 by np
+        b = lstsq(fc, p, rcond=None)[0].astype(float)  # nf+1 by np
         eps = p - fc @ b
         if excess_returns:
             betas = b[1:].T
diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py
@@ -286,7 +286,7 @@ def conf_int(self, level: float = 0.95) -> DataFrame:
         -----
         Uses a t(df_resid) if ``debiased`` is True, else normal.
         """
-        ci_quantiles = [(1 - level) / 2, 1 - (1 - level) / 2]
+        ci_quantiles = array([(1 - level) / 2, 1 - (1 - level) / 2])
         if self._debiased:
             q = stats.t.ppf(ci_quantiles, self.df_resid)
         else:
@@ -1348,6 +1348,7 @@ def anderson_rubin(self) -> InvalidTestStatistic | WaldTestStatistic:
                 "Test requires more instruments than " "endogenous variables.",
                 name=name,
             )
+        assert self._liml_kappa is not None
         stat = nobs * log(self._liml_kappa)
         df = ninstr - nendog
         null = "The model is not overidentified."
@@ -1385,6 +1386,7 @@ def basmann_f(self) -> InvalidTestStatistic | WaldTestStatistic:
             )
         df = ninstr - nendog
         df_denom = nobs - (nexog + ninstr)
+        assert self._liml_kappa is not None
         stat = (self._liml_kappa - 1) * df_denom / df
         null = "The model is not overidentified."
         return WaldTestStatistic(stat, null, df, df_denom=df_denom, name=name)
diff --git a/linearmodels/panel/model.py b/linearmodels/panel/model.py
@@ -1521,6 +1521,8 @@ def _lsmr_path(
             cond_mean = lsmr(wd, wx[:, i], atol=1e-8, btol=1e-8)[0]
             cond_mean /= cond
             wx_mean_l.append(cond_mean)
+        wx_mean: linearmodels.typing.data.Float64Array | csc_matrix
+        wy_mean: linearmodels.typing.data.Float64Array | csc_matrix
         wx_mean = np.column_stack(wx_mean_l)
         wy_mean = lsmr(wd, wy, atol=1e-8, btol=1e-8)[0]
         wy_mean /= cond
diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py
@@ -333,7 +333,7 @@ def conf_int(self, level: float = 0.95) -> DataFrame:
         -----
         Uses a t(df_resid) if ``debiased`` is True, else normal.
         """
-        ci_quantiles = [(1 - level) / 2, 1 - (1 - level) / 2]
+        ci_quantiles = np.array([(1 - level) / 2, 1 - (1 - level) / 2])
         if self._debiased:
             q = stats.t.ppf(ci_quantiles, self.df_resid)
         else:
diff --git a/linearmodels/panel/utility.py b/linearmodels/panel/utility.py
@@ -3,7 +3,7 @@
 from linearmodels.compat.pandas import ANNUAL_FREQ
 
 from collections import defaultdict
-from typing import NamedTuple, TypeVar, cast
+from typing import Literal, NamedTuple, TypeVar, cast
 
 import numpy as np
 import numpy.random
@@ -122,15 +122,12 @@ def preconditioner(
 def dummy_matrix(
     cats: linearmodels.typing.data.ArrayLike,
     *,
-    output_format: str = "csc",
-    drop: str = "first",
+    output_format: Literal["csc", "csr", "coo"] = "csc",
+    drop: Literal["first", "last"] = "first",
     drop_all: bool = False,
     precondition: bool = True,
 ) -> tuple[
-    sp.csc_matrix
-    | sp.csr_matrix
-    | sp.coo_matrix
-    | linearmodels.typing.data.Float64Array,
+    sp.csc_matrix | sp.csr_matrix | sp.coo_matrix,
     linearmodels.typing.data.Float64Array,
 ]:
     """
@@ -146,7 +143,6 @@ def dummy_matrix(
         * "csc" - sparse matrix in compressed column form
         * "csr" - sparse matrix in compressed row form
         * "coo" - sparse matrix in coordinate form
-        * "array" - dense numpy ndarray
 
     drop: {"first", "last"}
         Exclude either the first or last category. This only applies when
@@ -199,7 +195,7 @@ def dummy_matrix(
         data["cols"].append(cols)
         total_dummies += ncategories - (i > 0)
 
-    if output_format in ("csc", "array"):
+    if output_format == "csc":
         fmt = sp.csc_matrix
     elif output_format == "csr":
         fmt = sp.csr_matrix
@@ -213,9 +209,6 @@ def dummy_matrix(
             (np.concatenate(data["rows"]), np.concatenate(data["cols"])),
         )
     )
-    if output_format == "array":
-        out = out.toarray()
-
     if precondition:
         out, cond = preconditioner(out, copy=False)
     else:
diff --git a/linearmodels/tests/panel/test_utility.py b/linearmodels/tests/panel/test_utility.py
@@ -20,7 +20,6 @@
     "csc": csc_matrix,
     "csr": csr_matrix,
     "coo": coo_matrix,
-    "array": np.ndarray,
 }
 
 pytestmark = pytest.mark.filterwarnings(
@@ -83,19 +82,15 @@ def test_dummy_precondition():
     c1 = pd.Series(pd.Categorical(["a"] * 5 + ["b"] * 5 + ["c"] * 5))
     c2 = pd.Series(pd.Categorical(["A", "B", "C", "D", "E"] * 3))
     cats = pd.concat([c1, c2], axis=1)
-    out_arr, cond_arr = dummy_matrix(
-        cats, output_format="array", drop="last", precondition=True
-    )
     csc = dummy_matrix(cats, output_format="csc", drop="last", precondition=True)
     out_csc: csc_matrix = csc[0]
     cond_csc: np.ndarray = csc[1]
     csr = dummy_matrix(cats, output_format="csr", drop="last", precondition=True)
     out_csr: csr_matrix = csr[0]
     cond_csr: np.ndarray = csr[1]
-    assert_allclose((out_arr**2).sum(0), np.ones(out_arr.shape[1]))
-    assert_allclose((out_csc.multiply(out_csc)).sum(0).A1, np.ones(out_arr.shape[1]))
-    assert_allclose(cond_arr, cond_csc)
+    assert_allclose((out_csc.multiply(out_csc)).sum(0).A1, np.ones(out_csc.shape[1]))
     assert_allclose(cond_csr, cond_csc)
+    assert isinstance(out_csc, csc_matrix)
     assert isinstance(out_csr, csr_matrix)