anova cell means keys now separated by "|" not "#" to sort logically

mohawk2 · mohawk2 · commit d9686430c783 · 2025-02-24T00:21:47.000Z
diff --git a/Changes b/Changes
@@ -7,6 +7,7 @@
 - add demo
 - remove filt_exp and filt_ma, deprecated in 2011
 - now an exception to give anova n-observations <= product of categories in IVs
+- anova cell means keys now separated by "|" not "#" to sort logically
 
 0.853 2025-01-03
 - uses PDL 2.096+ lib/*.pd format for quicker builds
diff --git a/lib/PDL/Demos/Stats.pm b/lib/PDL/Demos/Stats.pm
@@ -41,7 +41,7 @@ random(100)->plot_acf( 50, { win=>$w } );
 # PDL::Stats::Kmeans clusters data points into "k" (a supplied number) groups
 $data = grandom(200, 2); # two rows = two dimensions
 %k = $data->kmeans; # use default of 3 clusters
-print "$_\t$k{$_}\n" for sort keys %k;
+print "$_\t@{[$k{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %k;
 $w->plot(
   (map +(with=>'points', style=>$_+1, ke=>"Cluster ".($_+1),
     $data->dice_axis(0,which($k{cluster}->slice(",$_")))->dog),
@@ -65,17 +65,17 @@ $data = qsort random 10, 5;      # 10 obs on 5 variables
 $data->plot_scores( $r{eigenvector}, {win=>$w} );
 |],
 
-[act => q|
+[act => q{
 # Let's try the analysis of variance (ANOVA) in PDL::Stats::GLM
 $y = pdl '[1 1 2 2 3 3 3 3 4 5 5 5]'; # suppose this is ratings for 12 apples
 $a = pdl '[1 2 3 1 2 3 1 2 3 1 2 3]'; # IV for types of apple
 @b = qw( y y y y y y n n n n n n );   # IV for whether we baked the apple
 %m = $y->anova( $a, \@b, { IVNM=>[qw(apple bake)], plot=>0, win=>$w } );
-print "$_\t$m{$_}\n" for (sort keys %m);
+print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m;
 # And plot the means of the interaction of all IVs
-$m{'# apple ~ bake # m'}->plot_means($m{'# apple ~ bake # se'},
+$m{'| apple ~ bake | m'}->plot_means($m{'| apple ~ bake | se'},
   { IVNM=>[qw(apple bake)], plot=>1, win=>$w });
-|],
+}],
 
 [comment => q|
 This concludes the demo.
diff --git a/lib/PDL/Stats/GLM.pd b/lib/PDL/Stats/GLM.pd
@@ -725,62 +725,53 @@ Usage:
 
     perldl> %m = $y->anova( $a, \@b, { IVNM=>['apple', 'bake'] } )
 
-    perldl> p "$_\t$m{$_}\n" for sort keys %m
-    # apple # m
-    [
-     [2.5   3 3.5]
+    perldl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
+    F	2.46666666666667
+    F_df	[5 6]
+    F_p	0.151168719948632
+    ms_model	3.08333333333333
+    ms_residual	1.25
+    ss_model	15.4166666666667
+    ss_residual	7.5
+    ss_total	22.9166666666667
+    | apple | F	0.466666666666667
+    | apple | F_p	0.648078345471096
+    | apple | df	2
+    | apple | m	[
+     [2.75    3  3.5]
     ]
-
-    # apple # se
-    [
-     [0.64549722 0.91287093 0.64549722]
+    | apple | ms	0.583333333333334
+    | apple | se	[
+     [       0.85391256        0.81649658        0.64549722]
     ]
-
-    # apple ~ bake # m
-    [
-     [1.5 1.5 2.5]
-     [3.5 4.5 4.5]
+    | apple | ss	1.16666666666667
+    | apple || err df	6
+    | apple ~ bake | F	0.0666666666666671
+    | apple ~ bake | F_p	0.936190104380701
+    | apple ~ bake | df	2
+    | apple ~ bake | m	[
+     [1.5   2 2.5]
+     [  4   4 4.5]
     ]
-
-    # apple ~ bake # se
-    [
-     [0.5 0.5 0.5]
-     [0.5 0.5 0.5]
+    | apple ~ bake | ms	0.0833333333333339
+    | apple ~ bake | se	[
+     [0.5   1 0.5]
+     [  1   1 0.5]
     ]
-
-    # bake # m
-    [
-     [ 1.8333333  4.1666667]
+    | apple ~ bake | ss	0.166666666666668
+    | apple ~ bake || err df	6
+    | bake | F	11.2666666666667
+    | bake | F_p	0.015294126084452
+    | bake | df	1
+    | bake | m	[
+     [               2        4.1666667]
     ]
-
-    # bake # se
-    [
-     [0.30731815 0.30731815]
+    | bake | ms	14.0833333333333
+    | bake | se	[
+     [       0.36514837        0.40138649]
     ]
-
-    F       7.6
-    F_df    [5 6]
-    F_p     0.0141586545851857
-    ms_model        3.8
-    ms_residual     0.5
-    ss_model        19
-    ss_residual     3
-    ss_total        22
-    | apple | F     2
-    | apple | F_df  [2 6]
-    | apple | F_p   0.216
-    | apple | ms    1
-    | apple | ss    2
-    | apple ~ bake | F      0.666666666666667
-    | apple ~ bake | F_df   [2 6]
-    | apple ~ bake | F_p    0.54770848985725
-    | apple ~ bake | ms     0.333333333333334
-    | apple ~ bake | ss     0.666666666666667
-    | bake | F      32.6666666666667
-    | bake | F_df   [1 6]
-    | bake | F_p    0.00124263849516693
-    | bake | ms     16.3333333333333
-    | bake | ss     16.3333333333333
+    | bake | ss	14.0833333333333
+    | bake || err df	6
 
 This is implemented as a call to L</anova_rptd>, with an C<undef>
 subjects vector.
@@ -853,8 +844,8 @@ sub _cell_means {
     $m->getndims == 1 and $m = $m->dummy(1);
     my $se = sqrt( ($ss/($_->sumover - 1)) / $_->sumover )->reshape(@shape);
     $se->getndims == 1 and $se = $se->dummy(1);
-    $cm{ "# $idv->[$i] # m" }  = $m;
-    $cm{ "# $idv->[$i] # se" } = $se;
+    $cm{ "| $idv->[$i] | m" }  = $m;
+    $cm{ "| $idv->[$i] | se" } = $se;
     $i++;
   }
   \%cm;
@@ -935,65 +926,53 @@ Usage:
       # subj must be the first argument
     my %m = $dv->anova_rptd( $subj, $b, $w, {ivnm=>['Beer', 'Wings']} );
 
-    print "$_\t$m{$_}\n" for sort keys %m;
-
-    # Beer # m
-    [
-     [ 10.916667  8.9166667]
-    ]
-
-    # Beer # se
-    [
-     [ 0.4614791  0.4614791]
-    ]
-
-    # Beer ~ Wings # m
-    [
-     [   10     7]
-     [ 10.5  9.25]
-     [12.25  10.5]
-    ]
-
-    # Beer ~ Wings # se
-    [
-     [0.89170561 0.89170561]
-     [0.89170561 0.89170561]
-     [0.89170561 0.89170561]
-    ]
-
-    # Wings # m
-    [
-     [   8.5  9.875 11.375]
-    ]
-
-    # Wings # se
-    [
-     [0.67571978 0.67571978 0.67571978]
-    ]
+    print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
 
     ss_residual	19.0833333333333
     ss_subject	24.8333333333333
     ss_total	133.833333333333
     | Beer | F	9.39130434782609
     | Beer | F_p	0.0547977008378944
     | Beer | df	1
+    | Beer | m [
+     [ 10.916667  8.9166667]
+    ]
     | Beer | ms	24
+    | Beer | se [
+     [ 0.4614791  0.4614791]
+    ]
     | Beer | ss	24
     | Beer || err df	3
     | Beer || err ms	2.55555555555556
     | Beer || err ss	7.66666666666667
     | Beer ~ Wings | F	0.510917030567687
     | Beer ~ Wings | F_p	0.623881438624431
     | Beer ~ Wings | df	2
+    | Beer ~ Wings | m [
+     [   10     7]
+     [ 10.5  9.25]
+     [12.25  10.5]
+    ]
     | Beer ~ Wings | ms	1.625
+    | Beer ~ Wings | se [
+     [0.89170561 0.89170561]
+     [0.89170561 0.89170561]
+     [0.89170561 0.89170561]
+    ]
     | Beer ~ Wings | ss	3.25000000000001
     | Beer ~ Wings || err df	6
     | Beer ~ Wings || err ms	3.18055555555555
     | Beer ~ Wings || err ss	19.0833333333333
     | Wings | F	4.52851711026616
     | Wings | F_p	0.0632754786153548
     | Wings | df	2
+    | Wings | m [
+     [   8.5  9.875 11.375]
+    ]
     | Wings | ms	16.5416666666667
+    | Wings | se [
+     [0.67571978 0.67571978 0.67571978]
+    ]
     | Wings | ss	33.0833333333333
     | Wings || err df	6
     | Wings || err ms	3.65277777777778
@@ -1131,7 +1110,7 @@ sub PDL::anova_rptd {
   @ret{ keys %$cm_ref } = values %$cm_ref;
 
   my $highest = join(' ~ ', @{ $opt{IVNM} });
-  $cm_ref->{"# $highest # m"}->plot_means( $cm_ref->{"# $highest # se"},
+  $cm_ref->{"| $highest | m"}->plot_means( $cm_ref->{"| $highest | se"},
                                            { %opt, IVNM=>$idv } )
     if $opt{PLOT};
 
@@ -1300,7 +1279,7 @@ sub _fix_rptd_se {
     # if ivnm lvls_ref for within ss only this can work for mixed design
   my ($cm_ref, $ret, $ivnm, $lvls_ref, $n) = @_;
   my @se = grep /se$/, keys %$cm_ref;
-  @se = map { /^# (.+?) # se$/; $1; } @se;
+  @se = map { /^\| (.+?) \| se$/; $1; } @se;
   my @n_obs
     = map {
         my @ivs = split / ~ /, $_;
@@ -1313,7 +1292,7 @@ sub _fix_rptd_se {
         $n * $collapsed;
       } @se;
   for my $i (0 .. $#se) {
-    $cm_ref->{"# $se[$i] # se"}
+    $cm_ref->{"| $se[$i] | se"}
       .= sqrt( $ret->{"| $se[$i] || err ms"} / $n_obs[$i] );
   }
   $cm_ref;
@@ -2028,7 +2007,7 @@ Usage:
 
 Or like this:
 
-    $m{'# apple ~ bake # m'}->plot_means;
+    $m{'| apple ~ bake | m'}->plot_means;
 
 =cut
 
diff --git a/t/glm.t b/t/glm.t
@@ -323,11 +323,11 @@ is_pdl $b_bad->dvrs(ones(6) * .5), pdl( 'BAD -1.17741002251547 -1.17741002251547
   $d->set( 20, 10 );
   my @idv = qw(A B C);
   my %m = $d->anova(\@a, $b, $c, {IVNM=>\@idv, plot=>0});
-  $m{'# A ~ B ~ C # m'} = $m{'# A ~ B ~ C # m'}->slice(',(2),');
+  $m{'| A ~ B ~ C | m'} = $m{'| A ~ B ~ C | m'}->slice(',(2),');
   test_stats_cmp(\%m, {
     '| A | F' => 165.252100840336,
     '| A ~ B ~ C | F' => 0.0756302521008415,
-    '# A ~ B ~ C # m' => pdl([[qw(8 18 38 53)], [qw(8 23 38 53)]]),
+    '| A ~ B ~ C | m' => pdl([[qw(8 18 38 53)], [qw(8 23 38 53)]]),
   });
   my $dsgn = $d->anova_design_matrix(undef, \@a, $b, $c, {IVNM=>\@idv});
   is_pdl $dsgn, pdl '
@@ -407,11 +407,11 @@ like $@, qr/residual df = 0/, 'error when too few sample';
   my $d = pdl qw( 3 2 1 5 2 1 5 3 1 4 1 2 3 5 5 );
   my $a = qsort sequence(15) % 3;
   my %m = $d->anova($a, {plot=>0});
-  $m{$_} = $m{$_}->squeeze for '# IV_0 # m';
+  $m{$_} = $m{$_}->squeeze for '| IV_0 | m';
   test_stats_cmp(\%m, {
     F => 0.160919540229886,
     ms_model => 0.466666666666669,
-    '# IV_0 # m' => pdl(qw( 2.6 2.8 3.2 )),
+    '| IV_0 | m' => pdl(qw( 2.6 2.8 3.2 )),
   });
 }
 
@@ -424,12 +424,12 @@ like $@, qr/residual df = 0/, 'error when too few sample';
   my $b = sequence(60) % 3;
   my $c = sequence(60) % 2;
   my %m = $d->anova(\@a, $b, $c, {IVNM=>[qw(A B C)], plot=>0, v=>0});
-  $m{$_} = $m{$_}->slice(',(1)')->squeeze for '# A ~ B ~ C # m', '# A ~ B ~ C # se';
+  $m{$_} = $m{$_}->slice(',(1)')->squeeze for '| A ~ B ~ C | m', '| A ~ B ~ C | se';
   test_stats_cmp(\%m, {
     '| A | F' => 150.00306433446,
     '| A ~ B ~ C | F' => 0.17534855325553,
-    '# A ~ B ~ C # m' => pdl([qw( 4 22 37 52 )], [qw( 10 22 37 52 )]),
-    '# A ~ B ~ C # se' => pdl([qw( 0 6 1.7320508 3.4641016 )], [qw( 3 3 3.4641016 1.7320508 )]),
+    '| A ~ B ~ C | m' => pdl([qw( 4 22 37 52 )], [qw( 10 22 37 52 )]),
+    '| A ~ B ~ C | se' => pdl([qw( 0 6 1.7320508 3.4641016 )], [qw( 3 3 3.4641016 1.7320508 )]),
   });
 }
 
@@ -443,11 +443,11 @@ like $@, qr/residual df = 0/, 'error when too few sample';
   $d->setbadat(62);
   $b->setbadat(61);
   my %m = $d->anova(\@a, $b, $c, {IVNM=>[qw(A B C)], plot=>0, V=>0});
-  $m{$_} = $m{$_}->slice(',(2)')->squeeze for '# A ~ B ~ C # m';
+  $m{$_} = $m{$_}->slice(',(2)')->squeeze for '| A ~ B ~ C | m';
   test_stats_cmp(\%m, {
     '| A | F' => 165.252100840336,
     '| A ~ B ~ C | F' => 0.0756302521008415,
-    '# A ~ B ~ C # m' => pdl([qw(8 18 38 53)], [qw(8 23 38 53)]),
+    '| A ~ B ~ C | m' => pdl([qw(8 18 38 53)], [qw(8 23 38 53)]),
   });
 }
 
@@ -603,19 +603,19 @@ is_pdl pdl([1,1,1], [2,2,2])->stddz, zeroes(3,2), 'stddz nan vs bad';
  [1 -1 -1 -1 -1 -1 -1]
 ';
   my %m = $d->anova_rptd($s, $a, {plot=>0});
-  $m{$_} = $m{$_}->squeeze for '# IV_0 # m';
+  $m{$_} = $m{$_}->squeeze for '| IV_0 | m';
   test_stats_cmp(\%m, {
     '| IV_0 | F' => 0.145077720207254,
     '| IV_0 | ms' => 0.466666666666667,
-    '# IV_0 # m' => pdl(qw( 2.6 2.8 3.2 )),
+    '| IV_0 | m' => pdl(qw( 2.6 2.8 3.2 )),
   });
 }
 
 my %anova_bad_a = (
   '| a | F' => 0.351351351351351,
   '| a | ms' => 0.722222222222222,
   '| a ~ b | F' => 5.25,
-  '# a ~ b # m' => pdl(qw( 3  1.3333333  3.3333333 3.3333333  3.6666667  2.6666667  ))->reshape(3,2),
+  '| a ~ b | m' => pdl(qw( 3  1.3333333  3.3333333 3.3333333  3.6666667  2.6666667  ))->reshape(3,2),
 );
 { # anova_rptd_2w bad dv
   my $d = pdl '[3 2 1 5 2 BAD 5 3 1 4 1 2 3 5 5 3 4 2 1 5 4 3 2 2]';
@@ -671,8 +671,8 @@ my %anova_bad_a = (
     '| a ~ c | F' => 3.64615384615385,
     '| b ~ c || err ms' => 2.63194444444445,
     '| a ~ b ~ c | F' => 1.71299093655589,
-    '# a ~ b ~ c # m' => pdl(qw( 4 2.75 2.75 2.5 3.25 4.25 3.5 1.75 2 3.5 2.75 2.25 ))->reshape(2,2,3),
-    '# a ~ b # se' => ones(2, 2) * 0.55014729,
+    '| a ~ b ~ c | m' => pdl(qw( 4 2.75 2.75 2.5 3.25 4.25 3.5 1.75 2 3.5 2.75 2.25 ))->reshape(2,2,3),
+    '| a ~ b | se' => ones(2, 2) * 0.55014729,
   });
 }
 
@@ -879,7 +879,7 @@ my %ans_mixed = (
   '| a ~ b | F' => 1.54225352112676,
   '| b | F' => 0.738693467336681,
   '| b || err ms' => 2.76388888888889,
-  '# a ~ b # se' => ones(3,2) * 0.70217915,
+  '| a ~ b | se' => ones(3,2) * 0.70217915,
 );
 { # anova_rptd mixed
   my $d = pdl '[3 2 1 5 2 1 5 3 1 4 1 2 3 5 5 3 4 2 1 5 4 3 2 2]';