2
2
CurrentModule = StatsModels
3
3
DocTestSetup = quote
4
4
using StatsModels
5
- using Random
6
- Random.seed!(1)
7
5
end
8
6
```
9
7
@@ -42,6 +40,8 @@ Here is an example of the `@formula` in action:
42
40
``` jldoctest 1
43
41
julia> using StatsModels, DataFrames
44
42
43
+ julia> using StableRNGs; rng = StableRNG(1);
44
+
45
45
julia> f = @formula(y ~ 1 + a + b + c + b&c)
46
46
FormulaTerm
47
47
Response:
@@ -53,20 +53,20 @@ Predictors:
53
53
c(unknown)
54
54
b(unknown) & c(unknown)
55
55
56
- julia> df = DataFrame(y = rand(9), a = 1:9, b = rand(9), c = repeat(["d","e","f"], 3))
57
- 9×4 DataFrames. DataFrame
58
- │ Row │ y │ a │ b │ c │
59
- │ │ Float64 │ Int64 │ Float64 │ String │
60
- ├ ─────┼────────────┼ ───────┼ ───────────┼ ────────┤
61
- │ 1 │ 0.236033 │ 1 │ 0.986666 │ d │
62
- │ 2 │ 0.346517 │ 2 │ 0.555751 │ e │
63
- │ 3 │ 0.312707 │ 3 │ 0.437108 │ f │
64
- │ 4 │ 0.00790928 │ 4 │ 0.424718 │ d │
65
- │ 5 │ 0.488613 │ 5 │ 0.773223 │ e │
66
- │ 6 │ 0.210968 │ 6 │ 0.28119 │ f │
67
- │ 7 │ 0.951916 │ 7 │ 0.209472 │ d │
68
- │ 8 │ 0.999905 │ 8 │ 0.251379 │ e │
69
- │ 9 │ 0.251662 │ 9 │ 0.0203749 │ f │
56
+ julia> df = DataFrame(y = rand(rng, 9), a = 1:9, b = rand(rng, 9), c = repeat(["d","e","f"], 3))
57
+ 9×4 DataFrame
58
+ Row │ y a b c
59
+ │ Float64 Int64 Float64 String
60
+ ─────┼────────────────────────────────────
61
+ 1 │ 0.585195 1 0.236782 d
62
+ 2 │ 0.0773379 2 0.943741 e
63
+ 3 │ 0.716628 3 0.445671 f
64
+ 4 │ 0.320357 4 0.763679 d
65
+ 5 │ 0.653093 5 0.145071 e
66
+ 6 │ 0.236639 6 0.021124 f
67
+ 7 │ 0.709684 7 0.152545 d
68
+ 8 │ 0.557787 8 0.617492 e
69
+ 9 │ 0.05079 9 0.481531 f
70
70
71
71
julia> f = apply_schema(f, schema(f, df))
72
72
FormulaTerm
@@ -83,15 +83,15 @@ julia> resp, pred = modelcols(f, df);
83
83
84
84
julia> pred
85
85
9×7 Array{Float64,2}:
86
- 1.0 1.0 0.986666 0.0 0.0 0.0 0.0
87
- 1.0 2.0 0.555751 1.0 0.0 0.555751 0.0
88
- 1.0 3.0 0.437108 0.0 1.0 0.0 0.437108
89
- 1.0 4.0 0.424718 0.0 0.0 0.0 0.0
90
- 1.0 5.0 0.773223 1.0 0.0 0.773223 0.0
91
- 1.0 6.0 0.28119 0.0 1.0 0.0 0.28119
92
- 1.0 7.0 0.209472 0.0 0.0 0.0 0.0
93
- 1.0 8.0 0.251379 1.0 0.0 0.251379 0.0
94
- 1.0 9.0 0.0203749 0.0 1.0 0.0 0.0203749
86
+ 1.0 1.0 0.236782 0.0 0.0 0.0 0.0
87
+ 1.0 2.0 0.943741 1.0 0.0 0.943741 0.0
88
+ 1.0 3.0 0.445671 0.0 1.0 0.0 0.445671
89
+ 1.0 4.0 0.763679 0.0 0.0 0.0 0.0
90
+ 1.0 5.0 0.145071 1.0 0.0 0.145071 0.0
91
+ 1.0 6.0 0.021124 0.0 1.0 0.0 0.021124
92
+ 1.0 7.0 0.152545 0.0 0.0 0.0 0.0
93
+ 1.0 8.0 0.617492 1.0 0.0 0.617492 0.0
94
+ 1.0 9.0 0.481531 0.0 1.0 0.0 0.481531
95
95
96
96
julia> coefnames(f)
97
97
("y", ["(Intercept)", "a", "b", "c: e", "c: f", "b & c: e", "b & c: f"])
@@ -224,13 +224,13 @@ StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.De
224
224
:(log(y)) ~ 1 + a + b
225
225
226
226
Coefficients:
227
- ──────────────────────────────────────────────────────────────────────────────
228
- Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95%
229
- ──────────────────────────────────────────────────────────────────────────────
230
- (Intercept) -4.16168 2.98788 -1.39285 0.2131 -11.4727 3.14939
231
- a 0.357482 0.342126 1.04489 0.3363 -0.479669 1.19463
232
- b 2.32528 3.13735 0.741159 0.4866 -5.35154 10.0021
233
- ──────────────────────────────────────────────────────────────────────────────
227
+ ──────────────────────────────────────────────────────────────────────────
228
+ Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
229
+ ──────────────────────────────────────────────────────────────────────────
230
+ (Intercept) 0.0698025 0.928295 0.08 0.9425 -2.20165 2.34126
231
+ a -0.105669 0.128107 -0.82 0.4410 -0.419136 0.207797
232
+ b -1.63199 1.12678 -1.45 0.1977 -4.38911 1.12513
233
+ ──────────────────────────────────────────────────────────────────────────
234
234
235
235
julia> df.log_y = log.(df.y);
236
236
@@ -240,13 +240,13 @@ StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.De
240
240
log_y ~ 1 + a + b
241
241
242
242
Coefficients:
243
- ──────────────────────────────────────────────────────────────────────────────
244
- Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95%
245
- ──────────────────────────────────────────────────────────────────────────────
246
- (Intercept) -4.16168 2.98788 -1.39285 0.2131 -11.4727 3.14939
247
- a 0.357482 0.342126 1.04489 0.3363 -0.479669 1.19463
248
- b 2.32528 3.13735 0.741159 0.4866 -5.35154 10.0021
249
- ──────────────────────────────────────────────────────────────────────────────
243
+ ──────────────────────────────────────────────────────────────────────────
244
+ Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
245
+ ──────────────────────────────────────────────────────────────────────────
246
+ (Intercept) 0.0698025 0.928295 0.08 0.9425 -2.20165 2.34126
247
+ a -0.105669 0.128107 -0.82 0.4410 -0.419136 0.207797
248
+ b -1.63199 1.12678 -1.45 0.1977 -4.38911 1.12513
249
+ ──────────────────────────────────────────────────────────────────────────
250
250
251
251
```
252
252
@@ -256,15 +256,15 @@ interpretation of `+`, `*`, and `&`:
256
256
``` jldoctest 1
257
257
julia> modelmatrix(@formula(y ~ 1 + b + identity(1+b)), df)
258
258
9×3 Array{Float64,2}:
259
- 1.0 0.986666 1.98667
260
- 1.0 0.555751 1.55575
261
- 1.0 0.437108 1.43711
262
- 1.0 0.424718 1.42472
263
- 1.0 0.773223 1.77322
264
- 1.0 0.28119 1.28119
265
- 1.0 0.209472 1.20947
266
- 1.0 0.251379 1.25138
267
- 1.0 0.0203749 1.02037
259
+ 1.0 0.236782 1.23678
260
+ 1.0 0.943741 1.94374
261
+ 1.0 0.445671 1.44567
262
+ 1.0 0.763679 1.76368
263
+ 1.0 0.145071 1.14507
264
+ 1.0 0.021124 1.02112
265
+ 1.0 0.152545 1.15255
266
+ 1.0 0.617492 1.61749
267
+ 1.0 0.481531 1.48153
268
268
```
269
269
270
270
## Constructing a formula programmatically
@@ -350,13 +350,15 @@ simulated coefficients.
350
350
``` jldoctest
351
351
julia> using GLM, DataFrames, StatsModels
352
352
353
- julia> data = DataFrame(a = rand(100), b = repeat(["d", "e", "f", "g"], 25));
353
+ julia> using StableRNGs; rng = StableRNG(1);
354
+
355
+ julia> data = DataFrame(a = rand(rng, 100), b = repeat(["d", "e", "f", "g"], 25));
354
356
355
357
julia> X = StatsModels.modelmatrix(@formula(y ~ 1 + a*b).rhs, data);
356
358
357
359
julia> β_true = 1:8;
358
360
359
- julia> ϵ = randn(100)*0.1;
361
+ julia> ϵ = randn(rng, 100)*0.1;
360
362
361
363
julia> data.y = X*β_true .+ ϵ;
362
364
@@ -366,18 +368,18 @@ StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.De
366
368
y ~ 1 + a + b + a & b
367
369
368
370
Coefficients:
369
- ──────────────────────────────────────────────────────────────────────────
370
- Estimate Std. Error t value Pr(>|t|) Lower 95% Upper 95%
371
- ──────────────────────────────────────────────────────────────────────────
372
- (Intercept) 0.98878 0.0384341 25.7266 <1e-43 0.912447 1.06511
373
- a 2.00843 0.0779388 25.7694 <1e-43 1.85364 2.16323
374
- b: e 3.03726 0.0616371 49.2764 <1e-67 2.91484 3.15967
375
- b: f 4.03909 0.0572857 70.5078 <1e-81 3.92531 4.15286
376
- b: g 5.02948 0.0587224 85.6484 <1e-88 4.91285 5.14611
377
- a & b: e 5.9385 0.10753 55.2264 <1e-71 5.72494 6.15207
378
- a & b: f 6.9073 0.112483 61.4075 <1e-75 6.6839 7.1307
379
- a & b: g 7.93918 0.111285 71.3407 <1e-81 7.71816 8.16021
380
- ──────────────────────────────────────────────────────────────────────────
371
+ ───────────────────────────────────────────────────────────────────────
372
+ Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
373
+ ───────────────────────────────────────────────────────────────────────
374
+ (Intercept) 1.01518 0.0400546 25.34 <1e-42 0.935626 1.09473
375
+ a 1.97476 0.0701427 28.15 <1e-46 1.83545 2.11407
376
+ b: e 3.01269 0.0571186 52.74 <1e-69 2.89925 3.12614
377
+ b: f 4.01918 0.065827 61.06 <1e-75 3.88844 4.14992
378
+ b: g 4.99176 0.0593715 84.08 <1e-88 4.87385 5.10968
379
+ a & b: e 5.98288 0.0954641 62.67 <1e-76 5.79328 6.17248
380
+ a & b: f 6.98622 0.107871 64.76 <1e-77 6.77197 7.20046
381
+ a & b: g 7.92541 0.109873 72.13 <1e-82 7.70719 8.14362
382
+ ───────────────────────────────────────────────────────────────────────
381
383
382
384
```
383
385
0 commit comments