Skip to content

Commit cc611fb

Browse files
authored
move statsmodels.jl types and function stubs from StatsBase (#4)
1 parent e3e696a commit cc611fb

File tree

3 files changed

+340
-0
lines changed

3 files changed

+340
-0
lines changed

src/StatsAPI.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
module StatsAPI
22

3+
include("statisticalmodel.jl")
4+
include("regressionmodel.jl")
5+
6+
"""
7+
params(model)
8+
9+
Return all parameters of a model.
10+
"""
11+
function params end
12+
13+
function params! end
14+
315
# pairwise(f, x[, y])
416
#
517
# Return a matrix holding the result of applying `f` to all possible pairs

src/regressionmodel.jl

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
RegressionModel <: StatisticalModel
3+
4+
Abstract supertype for all regression models.
5+
"""
6+
abstract type RegressionModel <: StatisticalModel end
7+
8+
"""
9+
fitted(model::RegressionModel)
10+
11+
Return the fitted values of the model.
12+
"""
13+
function fitted end
14+
15+
"""
16+
response(model::RegressionModel)
17+
18+
Return the model response (a.k.a. the dependent variable).
19+
"""
20+
function response end
21+
22+
"""
23+
responsename(model::RegressionModel)
24+
25+
Return the name of the model response (a.k.a. the dependent variable).
26+
"""
27+
function responsename end
28+
29+
"""
30+
meanresponse(model::RegressionModel)
31+
32+
Return the mean of the response.
33+
"""
34+
function meanresponse end
35+
36+
"""
37+
modelmatrix(model::RegressionModel)
38+
39+
Return the model matrix (a.k.a. the design matrix).
40+
"""
41+
function modelmatrix end
42+
43+
"""
44+
crossmodelmatrix(model::RegressionModel)
45+
46+
Return `X'X` where `X` is the model matrix of `model`.
47+
This function will return a pre-computed matrix stored in `model` if possible.
48+
"""
49+
crossmodelmatrix(model::RegressionModel) = (x = modelmatrix(model); Symmetric(x' * x))
50+
51+
"""
52+
leverage(model::RegressionModel)
53+
54+
Return the diagonal of the projection matrix of the model.
55+
"""
56+
function leverage end
57+
58+
"""
59+
cooksdistance(model::RegressionModel)
60+
61+
Compute [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance)
62+
for each observation in linear model `model`, giving an estimate of the influence
63+
of each data point.
64+
"""
65+
function cooksdistance end
66+
67+
"""
68+
residuals(model::RegressionModel)
69+
70+
Return the residuals of the model.
71+
"""
72+
function residuals end
73+
74+
"""
75+
predict(model::RegressionModel, [newX])
76+
77+
Form the predicted response of `model`. An object with new covariate values `newX` can be supplied,
78+
which should have the same type and structure as that used to fit `model`; e.g. for a GLM
79+
it would generally be a `DataFrame` with the same variable names as the original predictors.
80+
"""
81+
function predict end
82+
83+
"""
84+
predict!
85+
86+
In-place version of [`predict`](@ref).
87+
"""
88+
function predict! end
89+
90+
"""
91+
dof_residual(model::RegressionModel)
92+
93+
Return the residual degrees of freedom of the model.
94+
"""
95+
function dof_residual end

src/statisticalmodel.jl

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
"""
2+
StatisticalModel
3+
4+
Abstract supertype for all statistical models.
5+
"""
6+
abstract type StatisticalModel end
7+
8+
"""
9+
coef(model::StatisticalModel)
10+
11+
Return the coefficients of the model.
12+
"""
13+
function coef end
14+
15+
"""
16+
coefnames(model::StatisticalModel)
17+
18+
Return the names of the coefficients.
19+
"""
20+
function coefnames end
21+
22+
"""
23+
coeftable(model::StatisticalModel; level::Real=0.95)
24+
25+
Return a table with coefficients and related statistics of the model.
26+
`level` determines the level for confidence intervals (by default, 95%).
27+
28+
The returned `CoefTable` object implements the
29+
[Tables.jl](https://github.com/JuliaData/Tables.jl/) interface, and can be
30+
converted e.g. to a `DataFrame` via `using DataFrames; DataFrame(coeftable(model))`.
31+
"""
32+
function coeftable end
33+
34+
"""
35+
confint(model::StatisticalModel; level::Real=0.95)
36+
37+
Compute confidence intervals for coefficients, with confidence level `level` (by default 95%).
38+
"""
39+
function confint end
40+
41+
"""
42+
deviance(model::StatisticalModel)
43+
44+
Return the deviance of the model relative to a reference, which is usually when applicable
45+
the saturated model. It is equal, *up to a constant*, to ``-2 \\log L``, with ``L``
46+
the likelihood of the model.
47+
"""
48+
function deviance end
49+
50+
"""
51+
islinear(model::StatisticalModel)
52+
53+
Indicate whether the model is linear.
54+
"""
55+
function islinear end
56+
57+
"""
58+
nulldeviance(model::StatisticalModel)
59+
60+
Return the deviance of the null model, that is the one including only the intercept.
61+
"""
62+
function nulldeviance end
63+
64+
"""
65+
loglikelihood(model::StatisticalModel)
66+
loglikelihood(model::StatisticalModel, observation)
67+
68+
Return the log-likelihood of the model.
69+
70+
With an `observation` argument, return the contribution of `observation` to the
71+
log-likelihood of `model`.
72+
73+
If `observation` is a `Colon`, return a vector of each observation's contribution
74+
to the log-likelihood of the model. In other words, this is the vector of the
75+
pointwise log-likelihood contributions.
76+
77+
In general, `sum(loglikehood(model, :)) == loglikelihood(model)`.
78+
"""
79+
function loglikelihood end
80+
81+
"""
82+
nullloglikelihood(model::StatisticalModel)
83+
84+
Return the log-likelihood of the null model corresponding to `model`.
85+
This is usually the model containing only the intercept.
86+
"""
87+
function nullloglikelihood end
88+
89+
"""
90+
score(model::StatisticalModel)
91+
92+
Return the score of the model, that is the gradient of the
93+
log-likelihood with respect to the coefficients.
94+
"""
95+
function score end
96+
97+
"""
98+
nobs(model::StatisticalModel)
99+
100+
Return the number of independent observations on which the model was fitted. Be careful
101+
when using this information, as the definition of an independent observation may vary
102+
depending on the model, on the format used to pass the data, on the sampling plan
103+
(if specified), etc.
104+
"""
105+
function nobs end
106+
107+
"""
108+
dof(model::StatisticalModel)
109+
110+
Return the number of degrees of freedom consumed in the model, including
111+
when applicable the intercept and the distribution's dispersion parameter.
112+
"""
113+
function dof end
114+
115+
"""
116+
mss(model::StatisticalModel)
117+
118+
Return the model sum of squares.
119+
"""
120+
function mss end
121+
122+
"""
123+
rss(model::StatisticalModel)
124+
125+
Return the residual sum of squares of the model.
126+
"""
127+
function rss end
128+
129+
"""
130+
informationmatrix(model::StatisticalModel; expected::Bool = true)
131+
132+
Return the information matrix of the model. By default the Fisher information matrix
133+
is returned, while the observed information matrix can be requested with `expected = false`.
134+
"""
135+
function informationmatrix end
136+
137+
"""
138+
stderror(model::StatisticalModel)
139+
140+
Return the standard errors for the coefficients of the model.
141+
"""
142+
function stderror end
143+
144+
"""
145+
vcov(model::StatisticalModel)
146+
147+
Return the variance-covariance matrix for the coefficients of the model.
148+
"""
149+
function vcov end
150+
151+
"""
152+
weights(model::StatisticalModel)
153+
154+
Return the weights used in the model.
155+
"""
156+
function weights end
157+
158+
"""
159+
isfitted(model::StatisticalModel)
160+
161+
Indicate whether the model has been fitted.
162+
"""
163+
function isfitted end
164+
165+
"""
166+
Fit a statistical model.
167+
"""
168+
function fit end
169+
170+
"""
171+
Fit a statistical model in-place.
172+
"""
173+
function fit! end
174+
175+
"""
176+
aic(model::StatisticalModel)
177+
178+
Akaike's Information Criterion, defined as ``-2 \\log L + 2k``, with ``L`` the likelihood
179+
of the model, and `k` its number of consumed degrees of freedom
180+
(as returned by [`dof`](@ref)).
181+
"""
182+
aic(model::StatisticalModel) = -2loglikelihood(model) + 2dof(model)
183+
184+
"""
185+
aicc(model::StatisticalModel)
186+
187+
Corrected Akaike's Information Criterion for small sample sizes (Hurvich and Tsai 1989),
188+
defined as ``-2 \\log L + 2k + 2k(k-1)/(n-k-1)``, with ``L`` the likelihood of the model,
189+
``k`` its number of consumed degrees of freedom (as returned by [`dof`](@ref)),
190+
and ``n`` the number of observations (as returned by [`nobs`](@ref)).
191+
"""
192+
function aicc(model::StatisticalModel)
193+
k = dof(model)
194+
n = nobs(model)
195+
-2loglikelihood(model) + 2k + 2k*(k+1)/(n-k-1)
196+
end
197+
198+
"""
199+
bic(model::StatisticalModel)
200+
201+
Bayesian Information Criterion, defined as ``-2 \\log L + k \\log n``, with ``L``
202+
the likelihood of the model, ``k`` its number of consumed degrees of freedom
203+
(as returned by [`dof`](@ref)), and ``n`` the number of observations
204+
(as returned by [`nobs`](@ref)).
205+
"""
206+
bic(model::StatisticalModel) = -2loglikelihood(model) + dof(model)*log(nobs(model))
207+
208+
"""
209+
r2(model::StatisticalModel)
210+
r²(model::StatisticalModel)
211+
212+
Coefficient of determination (R-squared).
213+
214+
For a linear model, the R² is defined as ``ESS/TSS``, with ``ESS`` the explained sum of squares
215+
and ``TSS`` the total sum of squares.
216+
"""
217+
function r2 end
218+
219+
const= r2
220+
221+
"""
222+
adjr2(model::StatisticalModel)
223+
adjr²(model::StatisticalModel)
224+
225+
Adjusted coefficient of determination (adjusted R-squared).
226+
227+
For linear models, the adjusted R² is defined as ``1 - (1 - (1-R^2)(n-1)/(n-p))``, with ``R^2``
228+
the coefficient of determination, ``n`` the number of observations, and ``p`` the number of
229+
coefficients (including the intercept). This definition is generally known as the Wherry Formula I.
230+
"""
231+
function adjr2 end
232+
233+
const adjr² = adjr2

0 commit comments

Comments
 (0)