Skip to content

Commit d968643

Browse files
committed
anova cell means keys now separated by "|" not "#" to sort logically
1 parent de646df commit d968643

File tree

4 files changed

+91
-111
lines changed

4 files changed

+91
-111
lines changed

Changes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- add demo
88
- remove filt_exp and filt_ma, deprecated in 2011
99
- now an exception to give anova n-observations <= product of categories in IVs
10+
- anova cell means keys now separated by "|" not "#" to sort logically
1011

1112
0.853 2025-01-03
1213
- uses PDL 2.096+ lib/*.pd format for quicker builds

lib/PDL/Demos/Stats.pm

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ random(100)->plot_acf( 50, { win=>$w } );
4141
# PDL::Stats::Kmeans clusters data points into "k" (a supplied number) groups
4242
$data = grandom(200, 2); # two rows = two dimensions
4343
%k = $data->kmeans; # use default of 3 clusters
44-
print "$_\t$k{$_}\n" for sort keys %k;
44+
print "$_\t@{[$k{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %k;
4545
$w->plot(
4646
(map +(with=>'points', style=>$_+1, ke=>"Cluster ".($_+1),
4747
$data->dice_axis(0,which($k{cluster}->slice(",$_")))->dog),
@@ -65,17 +65,17 @@ $data = qsort random 10, 5; # 10 obs on 5 variables
6565
$data->plot_scores( $r{eigenvector}, {win=>$w} );
6666
|],
6767

68-
[act => q|
68+
[act => q{
6969
# Let's try the analysis of variance (ANOVA) in PDL::Stats::GLM
7070
$y = pdl '[1 1 2 2 3 3 3 3 4 5 5 5]'; # suppose this is ratings for 12 apples
7171
$a = pdl '[1 2 3 1 2 3 1 2 3 1 2 3]'; # IV for types of apple
7272
@b = qw( y y y y y y n n n n n n ); # IV for whether we baked the apple
7373
%m = $y->anova( $a, \@b, { IVNM=>[qw(apple bake)], plot=>0, win=>$w } );
74-
print "$_\t$m{$_}\n" for (sort keys %m);
74+
print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m;
7575
# And plot the means of the interaction of all IVs
76-
$m{'# apple ~ bake # m'}->plot_means($m{'# apple ~ bake # se'},
76+
$m{'| apple ~ bake | m'}->plot_means($m{'| apple ~ bake | se'},
7777
{ IVNM=>[qw(apple bake)], plot=>1, win=>$w });
78-
|],
78+
}],
7979

8080
[comment => q|
8181
This concludes the demo.

lib/PDL/Stats/GLM.pd

Lines changed: 70 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -725,62 +725,53 @@ Usage:
725725

726726
perldl> %m = $y->anova( $a, \@b, { IVNM=>['apple', 'bake'] } )
727727

728-
perldl> p "$_\t$m{$_}\n" for sort keys %m
729-
# apple # m
730-
[
731-
[2.5 3 3.5]
728+
perldl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
729+
F 2.46666666666667
730+
F_df [5 6]
731+
F_p 0.151168719948632
732+
ms_model 3.08333333333333
733+
ms_residual 1.25
734+
ss_model 15.4166666666667
735+
ss_residual 7.5
736+
ss_total 22.9166666666667
737+
| apple | F 0.466666666666667
738+
| apple | F_p 0.648078345471096
739+
| apple | df 2
740+
| apple | m [
741+
[2.75 3 3.5]
732742
]
733-
734-
# apple # se
735-
[
736-
[0.64549722 0.91287093 0.64549722]
743+
| apple | ms 0.583333333333334
744+
| apple | se [
745+
[ 0.85391256 0.81649658 0.64549722]
737746
]
738-
739-
# apple ~ bake # m
740-
[
741-
[1.5 1.5 2.5]
742-
[3.5 4.5 4.5]
747+
| apple | ss 1.16666666666667
748+
| apple || err df 6
749+
| apple ~ bake | F 0.0666666666666671
750+
| apple ~ bake | F_p 0.936190104380701
751+
| apple ~ bake | df 2
752+
| apple ~ bake | m [
753+
[1.5 2 2.5]
754+
[ 4 4 4.5]
743755
]
744-
745-
# apple ~ bake # se
746-
[
747-
[0.5 0.5 0.5]
748-
[0.5 0.5 0.5]
756+
| apple ~ bake | ms 0.0833333333333339
757+
| apple ~ bake | se [
758+
[0.5 1 0.5]
759+
[ 1 1 0.5]
749760
]
750-
751-
# bake # m
752-
[
753-
[ 1.8333333 4.1666667]
761+
| apple ~ bake | ss 0.166666666666668
762+
| apple ~ bake || err df 6
763+
| bake | F 11.2666666666667
764+
| bake | F_p 0.015294126084452
765+
| bake | df 1
766+
| bake | m [
767+
[ 2 4.1666667]
754768
]
755-
756-
# bake # se
757-
[
758-
[0.30731815 0.30731815]
769+
| bake | ms 14.0833333333333
770+
| bake | se [
771+
[ 0.36514837 0.40138649]
759772
]
760-
761-
F 7.6
762-
F_df [5 6]
763-
F_p 0.0141586545851857
764-
ms_model 3.8
765-
ms_residual 0.5
766-
ss_model 19
767-
ss_residual 3
768-
ss_total 22
769-
| apple | F 2
770-
| apple | F_df [2 6]
771-
| apple | F_p 0.216
772-
| apple | ms 1
773-
| apple | ss 2
774-
| apple ~ bake | F 0.666666666666667
775-
| apple ~ bake | F_df [2 6]
776-
| apple ~ bake | F_p 0.54770848985725
777-
| apple ~ bake | ms 0.333333333333334
778-
| apple ~ bake | ss 0.666666666666667
779-
| bake | F 32.6666666666667
780-
| bake | F_df [1 6]
781-
| bake | F_p 0.00124263849516693
782-
| bake | ms 16.3333333333333
783-
| bake | ss 16.3333333333333
773+
| bake | ss 14.0833333333333
774+
| bake || err df 6
784775

785776
This is implemented as a call to L</anova_rptd>, with an C<undef>
786777
subjects vector.
@@ -853,8 +844,8 @@ sub _cell_means {
853844
$m->getndims == 1 and $m = $m->dummy(1);
854845
my $se = sqrt( ($ss/($_->sumover - 1)) / $_->sumover )->reshape(@shape);
855846
$se->getndims == 1 and $se = $se->dummy(1);
856-
$cm{ "# $idv->[$i] # m" } = $m;
857-
$cm{ "# $idv->[$i] # se" } = $se;
847+
$cm{ "| $idv->[$i] | m" } = $m;
848+
$cm{ "| $idv->[$i] | se" } = $se;
858849
$i++;
859850
}
860851
\%cm;
@@ -935,65 +926,53 @@ Usage:
935926
# subj must be the first argument
936927
my %m = $dv->anova_rptd( $subj, $b, $w, {ivnm=>['Beer', 'Wings']} );
937928

938-
print "$_\t$m{$_}\n" for sort keys %m;
939-
940-
# Beer # m
941-
[
942-
[ 10.916667 8.9166667]
943-
]
944-
945-
# Beer # se
946-
[
947-
[ 0.4614791 0.4614791]
948-
]
949-
950-
# Beer ~ Wings # m
951-
[
952-
[ 10 7]
953-
[ 10.5 9.25]
954-
[12.25 10.5]
955-
]
956-
957-
# Beer ~ Wings # se
958-
[
959-
[0.89170561 0.89170561]
960-
[0.89170561 0.89170561]
961-
[0.89170561 0.89170561]
962-
]
963-
964-
# Wings # m
965-
[
966-
[ 8.5 9.875 11.375]
967-
]
968-
969-
# Wings # se
970-
[
971-
[0.67571978 0.67571978 0.67571978]
972-
]
929+
print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
973930

974931
ss_residual 19.0833333333333
975932
ss_subject 24.8333333333333
976933
ss_total 133.833333333333
977934
| Beer | F 9.39130434782609
978935
| Beer | F_p 0.0547977008378944
979936
| Beer | df 1
937+
| Beer | m [
938+
[ 10.916667 8.9166667]
939+
]
980940
| Beer | ms 24
941+
| Beer | se [
942+
[ 0.4614791 0.4614791]
943+
]
981944
| Beer | ss 24
982945
| Beer || err df 3
983946
| Beer || err ms 2.55555555555556
984947
| Beer || err ss 7.66666666666667
985948
| Beer ~ Wings | F 0.510917030567687
986949
| Beer ~ Wings | F_p 0.623881438624431
987950
| Beer ~ Wings | df 2
951+
| Beer ~ Wings | m [
952+
[ 10 7]
953+
[ 10.5 9.25]
954+
[12.25 10.5]
955+
]
988956
| Beer ~ Wings | ms 1.625
957+
| Beer ~ Wings | se [
958+
[0.89170561 0.89170561]
959+
[0.89170561 0.89170561]
960+
[0.89170561 0.89170561]
961+
]
989962
| Beer ~ Wings | ss 3.25000000000001
990963
| Beer ~ Wings || err df 6
991964
| Beer ~ Wings || err ms 3.18055555555555
992965
| Beer ~ Wings || err ss 19.0833333333333
993966
| Wings | F 4.52851711026616
994967
| Wings | F_p 0.0632754786153548
995968
| Wings | df 2
969+
| Wings | m [
970+
[ 8.5 9.875 11.375]
971+
]
996972
| Wings | ms 16.5416666666667
973+
| Wings | se [
974+
[0.67571978 0.67571978 0.67571978]
975+
]
997976
| Wings | ss 33.0833333333333
998977
| Wings || err df 6
999978
| Wings || err ms 3.65277777777778
@@ -1131,7 +1110,7 @@ sub PDL::anova_rptd {
11311110
@ret{ keys %$cm_ref } = values %$cm_ref;
11321111

11331112
my $highest = join(' ~ ', @{ $opt{IVNM} });
1134-
$cm_ref->{"# $highest # m"}->plot_means( $cm_ref->{"# $highest # se"},
1113+
$cm_ref->{"| $highest | m"}->plot_means( $cm_ref->{"| $highest | se"},
11351114
{ %opt, IVNM=>$idv } )
11361115
if $opt{PLOT};
11371116

@@ -1300,7 +1279,7 @@ sub _fix_rptd_se {
13001279
# if ivnm lvls_ref for within ss only this can work for mixed design
13011280
my ($cm_ref, $ret, $ivnm, $lvls_ref, $n) = @_;
13021281
my @se = grep /se$/, keys %$cm_ref;
1303-
@se = map { /^# (.+?) # se$/; $1; } @se;
1282+
@se = map { /^\| (.+?) \| se$/; $1; } @se;
13041283
my @n_obs
13051284
= map {
13061285
my @ivs = split / ~ /, $_;
@@ -1313,7 +1292,7 @@ sub _fix_rptd_se {
13131292
$n * $collapsed;
13141293
} @se;
13151294
for my $i (0 .. $#se) {
1316-
$cm_ref->{"# $se[$i] # se"}
1295+
$cm_ref->{"| $se[$i] | se"}
13171296
.= sqrt( $ret->{"| $se[$i] || err ms"} / $n_obs[$i] );
13181297
}
13191298
$cm_ref;
@@ -2028,7 +2007,7 @@ Usage:
20282007

20292008
Or like this:
20302009

2031-
$m{'# apple ~ bake # m'}->plot_means;
2010+
$m{'| apple ~ bake | m'}->plot_means;
20322011

20332012
=cut
20342013

t/glm.t

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,11 @@ is_pdl $b_bad->dvrs(ones(6) * .5), pdl( 'BAD -1.17741002251547 -1.17741002251547
323323
$d->set( 20, 10 );
324324
my @idv = qw(A B C);
325325
my %m = $d->anova(\@a, $b, $c, {IVNM=>\@idv, plot=>0});
326-
$m{'# A ~ B ~ C # m'} = $m{'# A ~ B ~ C # m'}->slice(',(2),');
326+
$m{'| A ~ B ~ C | m'} = $m{'| A ~ B ~ C | m'}->slice(',(2),');
327327
test_stats_cmp(\%m, {
328328
'| A | F' => 165.252100840336,
329329
'| A ~ B ~ C | F' => 0.0756302521008415,
330-
'# A ~ B ~ C # m' => pdl([[qw(8 18 38 53)], [qw(8 23 38 53)]]),
330+
'| A ~ B ~ C | m' => pdl([[qw(8 18 38 53)], [qw(8 23 38 53)]]),
331331
});
332332
my $dsgn = $d->anova_design_matrix(undef, \@a, $b, $c, {IVNM=>\@idv});
333333
is_pdl $dsgn, pdl '
@@ -407,11 +407,11 @@ like $@, qr/residual df = 0/, 'error when too few sample';
407407
my $d = pdl qw( 3 2 1 5 2 1 5 3 1 4 1 2 3 5 5 );
408408
my $a = qsort sequence(15) % 3;
409409
my %m = $d->anova($a, {plot=>0});
410-
$m{$_} = $m{$_}->squeeze for '# IV_0 # m';
410+
$m{$_} = $m{$_}->squeeze for '| IV_0 | m';
411411
test_stats_cmp(\%m, {
412412
F => 0.160919540229886,
413413
ms_model => 0.466666666666669,
414-
'# IV_0 # m' => pdl(qw( 2.6 2.8 3.2 )),
414+
'| IV_0 | m' => pdl(qw( 2.6 2.8 3.2 )),
415415
});
416416
}
417417

@@ -424,12 +424,12 @@ like $@, qr/residual df = 0/, 'error when too few sample';
424424
my $b = sequence(60) % 3;
425425
my $c = sequence(60) % 2;
426426
my %m = $d->anova(\@a, $b, $c, {IVNM=>[qw(A B C)], plot=>0, v=>0});
427-
$m{$_} = $m{$_}->slice(',(1)')->squeeze for '# A ~ B ~ C # m', '# A ~ B ~ C # se';
427+
$m{$_} = $m{$_}->slice(',(1)')->squeeze for '| A ~ B ~ C | m', '| A ~ B ~ C | se';
428428
test_stats_cmp(\%m, {
429429
'| A | F' => 150.00306433446,
430430
'| A ~ B ~ C | F' => 0.17534855325553,
431-
'# A ~ B ~ C # m' => pdl([qw( 4 22 37 52 )], [qw( 10 22 37 52 )]),
432-
'# A ~ B ~ C # se' => pdl([qw( 0 6 1.7320508 3.4641016 )], [qw( 3 3 3.4641016 1.7320508 )]),
431+
'| A ~ B ~ C | m' => pdl([qw( 4 22 37 52 )], [qw( 10 22 37 52 )]),
432+
'| A ~ B ~ C | se' => pdl([qw( 0 6 1.7320508 3.4641016 )], [qw( 3 3 3.4641016 1.7320508 )]),
433433
});
434434
}
435435

@@ -443,11 +443,11 @@ like $@, qr/residual df = 0/, 'error when too few sample';
443443
$d->setbadat(62);
444444
$b->setbadat(61);
445445
my %m = $d->anova(\@a, $b, $c, {IVNM=>[qw(A B C)], plot=>0, V=>0});
446-
$m{$_} = $m{$_}->slice(',(2)')->squeeze for '# A ~ B ~ C # m';
446+
$m{$_} = $m{$_}->slice(',(2)')->squeeze for '| A ~ B ~ C | m';
447447
test_stats_cmp(\%m, {
448448
'| A | F' => 165.252100840336,
449449
'| A ~ B ~ C | F' => 0.0756302521008415,
450-
'# A ~ B ~ C # m' => pdl([qw(8 18 38 53)], [qw(8 23 38 53)]),
450+
'| A ~ B ~ C | m' => pdl([qw(8 18 38 53)], [qw(8 23 38 53)]),
451451
});
452452
}
453453

@@ -603,19 +603,19 @@ is_pdl pdl([1,1,1], [2,2,2])->stddz, zeroes(3,2), 'stddz nan vs bad';
603603
[1 -1 -1 -1 -1 -1 -1]
604604
';
605605
my %m = $d->anova_rptd($s, $a, {plot=>0});
606-
$m{$_} = $m{$_}->squeeze for '# IV_0 # m';
606+
$m{$_} = $m{$_}->squeeze for '| IV_0 | m';
607607
test_stats_cmp(\%m, {
608608
'| IV_0 | F' => 0.145077720207254,
609609
'| IV_0 | ms' => 0.466666666666667,
610-
'# IV_0 # m' => pdl(qw( 2.6 2.8 3.2 )),
610+
'| IV_0 | m' => pdl(qw( 2.6 2.8 3.2 )),
611611
});
612612
}
613613

614614
my %anova_bad_a = (
615615
'| a | F' => 0.351351351351351,
616616
'| a | ms' => 0.722222222222222,
617617
'| a ~ b | F' => 5.25,
618-
'# a ~ b # m' => pdl(qw( 3 1.3333333 3.3333333 3.3333333 3.6666667 2.6666667 ))->reshape(3,2),
618+
'| a ~ b | m' => pdl(qw( 3 1.3333333 3.3333333 3.3333333 3.6666667 2.6666667 ))->reshape(3,2),
619619
);
620620
{ # anova_rptd_2w bad dv
621621
my $d = pdl '[3 2 1 5 2 BAD 5 3 1 4 1 2 3 5 5 3 4 2 1 5 4 3 2 2]';
@@ -671,8 +671,8 @@ my %anova_bad_a = (
671671
'| a ~ c | F' => 3.64615384615385,
672672
'| b ~ c || err ms' => 2.63194444444445,
673673
'| a ~ b ~ c | F' => 1.71299093655589,
674-
'# a ~ b ~ c # m' => pdl(qw( 4 2.75 2.75 2.5 3.25 4.25 3.5 1.75 2 3.5 2.75 2.25 ))->reshape(2,2,3),
675-
'# a ~ b # se' => ones(2, 2) * 0.55014729,
674+
'| a ~ b ~ c | m' => pdl(qw( 4 2.75 2.75 2.5 3.25 4.25 3.5 1.75 2 3.5 2.75 2.25 ))->reshape(2,2,3),
675+
'| a ~ b | se' => ones(2, 2) * 0.55014729,
676676
});
677677
}
678678

@@ -879,7 +879,7 @@ my %ans_mixed = (
879879
'| a ~ b | F' => 1.54225352112676,
880880
'| b | F' => 0.738693467336681,
881881
'| b || err ms' => 2.76388888888889,
882-
'# a ~ b # se' => ones(3,2) * 0.70217915,
882+
'| a ~ b | se' => ones(3,2) * 0.70217915,
883883
);
884884
{ # anova_rptd mixed
885885
my $d = pdl '[3 2 1 5 2 1 5 3 1 4 1 2 3 5 5 3 4 2 1 5 4 3 2 2]';

0 commit comments

Comments
 (0)