Skip to content

Commit 2a24bd9

Browse files
added rank testing of design matrix when passed through central dmat + constraint building function
1 parent 5ff0c6a commit 2a24bd9

File tree

1 file changed

+35
-8
lines changed

1 file changed

+35
-8
lines changed

batchglm/data.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def preview_coef_names(
106106
sample_description: pd.DataFrame,
107107
formula: str,
108108
as_categorical: Union[bool, list] = True
109-
) -> np.ndarray:
109+
) -> List[str]:
110110
"""
111111
Return coefficient names of model.
112112
@@ -126,21 +126,22 @@ def preview_coef_names(
126126
Set to false, if columns should not be changed.
127127
:return: A list of coefficient names.
128128
"""
129-
return view_coef_names(dmat=design_matrix(
129+
_, coef_names = design_matrix(
130130
sample_description=sample_description,
131131
formula=formula,
132132
as_categorical=as_categorical,
133133
return_type="patsy"
134-
))
134+
)
135+
return coef_names
135136

136137

137138
def constraint_system_from_star(
138-
dmat: Union[None, patsy.design_info.DesignMatrix] = None,
139+
dmat: Union[None, patsy.design_info.DesignMatrix, pd.DataFrame] = None,
139140
sample_description: Union[None, pd.DataFrame] = None,
140141
formula: Union[None, str] = None,
141142
as_categorical: Union[bool, list] = True,
142143
constraints: Union[None, List[str], Tuple[str], dict, np.ndarray] = None,
143-
return_type: str = "patsy",
144+
return_type: str = "patsy"
144145
) -> Tuple:
145146
"""
146147
Wrap different constraint matrix building formats with building of design matrix.
@@ -231,12 +232,32 @@ def constraint_system_from_star(
231232
elif isinstance(constraints, np.ndarray):
232233
cmat = constraints
233234
term_names = None
235+
if isinstance(dmat, pd.DataFrame):
236+
coef_names = dmat.columns
237+
dmat = dmat.values
234238
elif constraints is None:
235239
cmat = None
236240
term_names = None
241+
if isinstance(dmat, pd.DataFrame):
242+
coef_names = dmat.columns
243+
dmat = dmat.values
237244
else:
238245
raise ValueError("constraint format %s not recognized" % type(constraints))
239246

247+
# Test full design matrix for being full rank before returning:
248+
if cmat is None:
249+
if np.linalg.matrix_rank(dmat) != dmat.shape[1]:
250+
raise ValueError(
251+
"constrained design matrix is not full rank: %i %i" %
252+
(np.linalg.matrix_rank(dmat), dmat.shape[1])
253+
)
254+
else:
255+
if np.linalg.matrix_rank(np.matmul(dmat, cmat)) != cmat.shape[1]:
256+
raise ValueError(
257+
"constrained design matrix is not full rank: %i %i" %
258+
(np.linalg.matrix_rank(np.matmul(dmat, cmat)), cmat.shape[1])
259+
)
260+
240261
return dmat, coef_names, cmat, term_names
241262

242263

@@ -402,6 +423,10 @@ def constraint_matrix_from_string(
402423

403424
di = patsy.DesignInfo(coef_names)
404425
constraint_ls = [di.linear_constraint(x).coefs[0] for x in constraints]
426+
# Check that constraints are sensible:
427+
for constraint_i in constraint_ls:
428+
if np.sum(constraint_i != 0) == 1:
429+
raise ValueError("a zero-equality constraint only involved one parameter: remove this parameter")
405430
idx_constr = np.asarray([np.where(x == 1)[0][0] for x in constraint_ls])
406431
idx_depending = [np.where(x == 1)[0][1:] for x in constraint_ls]
407432
idx_unconstr = np.asarray(list(
@@ -421,8 +446,10 @@ def constraint_matrix_from_string(
421446
constraint_mat[i, idx_unconstr_i] = 1
422447

423448
# Test unconstrained subset design matrix for being full rank before returning constraints:
424-
dmat_var = dmat[:, idx_unconstr]
425-
if np.linalg.matrix_rank(dmat_var) != np.linalg.matrix_rank(dmat_var.T):
426-
logging.getLogger("batchglm").error("constrained design matrix is not full rank")
449+
if np.linalg.matrix_rank(dmat[:, idx_unconstr]) != np.linalg.matrix_rank(dmat[:, idx_unconstr].T):
450+
raise ValueError(
451+
"unconstrained sub-design matrix is not full rank" %
452+
np.linalg.matrix_rank(dmat[:, idx_unconstr]), np.linalg.matrix_rank(dmat[:, idx_unconstr].T)
453+
)
427454

428455
return constraint_mat

0 commit comments

Comments
 (0)