JuliaMixedModels · ajinkya-k · Mar 26, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ data/
 site_libs/
 /.quarto/
 /.luarc.json
+.cache/
diff --git a/Project.toml b/Project.toml
@@ -28,7 +28,6 @@ MixedModelsMakie = "b12ae82c-6730-437f-aff9-d2c38332a376"
 NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RectangularFullPacked = "27983f2f-6524-42ba-a408-2b5a31c238e4"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -38,7 +37,6 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-TidierPlots = "337ecbd1-5042-4e2a-ae6f-ca776f97570a"
 TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
@@ -62,7 +60,6 @@ MixedModels = "4,5"
 MixedModelsMakie = "0.4"
 NLopt = "1"
 PooledArrays = "1"
-RCall = "0.14.8"
 Random = "1"
 SHA = "0.7"
 Scratch = "1"

diff --git a/aGHQ.qmd b/aGHQ.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---

diff --git a/datatables.qmd b/datatables.qmd
@@ -1,5 +1,7 @@
 ---
 engine: julia
+execute:
+  cache: true
 ---
 
 # Working with data tables {#sec-datatables}

diff --git a/glmmbernoulli.qmd b/glmmbernoulli.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---

diff --git a/intro.qmd b/intro.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
@@ -675,13 +677,6 @@ The apparent distribution of the estimates of $\sigma_1$ in @fig-dsm01_bs_sigma_
 A [kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation) approximates a probability density from a finite sample by blurring or smearing the positions of the sample values according to a *kernel* such as a narrow Gaussian distribution (see the linked article for details).
 In this case the distribution of the estimates is a combination of a continuous distribution and a spike or point mass at zero as shown in a histogram, @fig-dsm01_bs_sigma_hist.
 
-:::{.callout-note collapse="true"}
-
-### Adjust the alpha in multiple histograms
-
-Use a lower alpha in the colors for multiple histograms so the bars behind another color are more visible
-:::
-
 ```{julia}
 #| code-fold: true
 #| fig-cap: Histogram of bootstrap variance-components as standard deviations from model dsm01
@@ -693,7 +688,7 @@ draw(
     :value => "Bootstrap parameter estimates of σ";
     color=(:group => "Group"),
   ) *
-  AlgebraOfGraphics.histogram(; bins=80);
+  AlgebraOfGraphics.histogram(; bins=80) * visual(alpha = 0.4);
   figure=(; size=(600, 340)),
 )
 ```
@@ -724,7 +719,7 @@ draw(
     :value_abs2 => "Bootstrap sample of estimates of σ²",
     color=:group,
   ) *
-  AlgebraOfGraphics.histogram(; bins=200);
+  AlgebraOfGraphics.histogram(; bins=200) * visual(alpha = 0.4);
   figure=(; size=(600, 340)),
 )
 ```

diff --git a/largescaledesigned.qmd b/largescaledesigned.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---

diff --git a/largescaleobserved.qmd b/largescaleobserved.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags:
     - --project
@@ -28,7 +30,6 @@ using MixedModels
 using MixedModelsMakie
 using SparseArrays         # for the nnz function
 using Statistics           # for the mean function
-using TidierPlots
 using TypedTables
 ```
 
@@ -269,10 +270,18 @@ As shown if @fig-nratingsbycutoff, the number of ratings varies from a little ov
 #| code-fold: true
 #| label: fig-nratingsbycutoff
 #| fig-cap: "Number of ratings in reduced table by movie cutoff value"
-ggplot(sizespeed, aes(; x=:mc, y=:nratings, color=:ucutoff)) +
-    geom_point() +
-    geom_line() +
-    labs(x="Minimum number of ratings per movie", y="Number of ratings")
+
+sizespeed.ucbak = sizespeed.uc
+sizespeed.uc = nonnumeric.(sizespeed.uc)
+draw(
+  data(sizespeed) *
+  mapping(
+    :mc => "Minimum number of ratings per movie (mc)",
+    :nratings => "Total number of ratings",
+    color=:uc,
+  ) * visual(ScatterLines);
+  figure=(; size=(600, 350))
+)
 ```
 
 For this range of choices of cutoffs, the user cutoff has more impact on the number of ratings in the reduced dataset than does the movie cutoff.
@@ -285,10 +294,16 @@ A glance at the table shows that the number of users, `nusers`, is essentially a
 #| code-fold: true
 #| label: fig-nusersbycutoff
 #| fig-cap: "Number of users in reduced table by movie cutoff value"
-ggplot(sizespeed, aes(; x=:mc, y=:nmvie, color=:ucutoff)) +
-    geom_point() +
-    geom_line() +
-    labs(x="Minimum number of ratings per movie", y="Number of movies in table")
+
+draw(
+  data(sizespeed) *
+  mapping(
+    :mc => "Minimum number of ratings per movie (mc)",
+    :nmvie => "Number of movies in table",
+    color=:uc) *
+    visual(ScatterLines);
+  figure=(; size=(600, 350))
+)
 ```
 
 ```{julia}
@@ -400,10 +415,16 @@ The memory footprint of the model representation depends strongly on the number
 #| code-fold: true
 #| fig-cap: Memory footprint of the model representation by minimum number of ratings per user and per movie."
 #| label: fig-memoryfootprint
-ggplot(sizespeed, aes(x=:mc, y=:modelsz, color=:ucutoff)) +
-    geom_point() +
-    geom_line() +
-    labs(x="Minimum number of ratings per movie", y="Size of model (GiB)")
+
+draw(
+  data(sizespeed) *
+  mapping(
+    :mc => "Minimum number of ratings per movie (mc)",
+    :modelsz => "Size of model object (GiB)",
+    color=:uc) *
+    visual(ScatterLines);
+    figure=(; size=(600, 350))
+)
 ```
 
 @fig-memoryvsl22 shows the dominance of the `[2, 2]` block of `L` in the overall memory footprint of the model
@@ -412,10 +433,18 @@ ggplot(sizespeed, aes(x=:mc, y=:modelsz, color=:ucutoff)) +
 #| code-fold: true
 #| fig-cap: Memory footprint of the model representation (GiB) versus the size of the [2, 2] block of L (GiB)
 #| label: fig-memoryvsl22
-ggplot(sizespeed, aes(; x=:L22sz, y=:modelsz, color=:ucutoff)) +
-    geom_point() +
-    geom_line() +
-    labs(y="Size of model representation (GiB)", x="Size of [2,2] block of L (GiB)")
+
+draw(
+  data(transform!(sizespeed, [:L22sz, :modelsz] => ((x, y) -> x ./ y) => :L22prop)) *
+  mapping(
+    :modelsz => "Size of model object (GiB)",
+    [:L22sz => "Size of [2,2] block of L (GiB)", :L22prop => "Proportion of memory footprint in L[2,2]"],
+    col = dims(1) => renamer(["Size of L[2,2] block", "Memory Proportion of L[2,2] block"]),
+    color=:uc) *
+    visual(ScatterLines);
+    legend = (; position = :bottom, titleposition=:left),
+    figure=(; size=(600, 350))
+)
 ```
 
 @fig-memoryfootprint shows that when all the movies are included in the data to which the model is fit (i.e. `mc == 1`) the total memory footprint is over 20 GiB, and nearly 90% of that memory is that required for the `[2,2]` block of `L`.
@@ -441,10 +470,16 @@ As shown in @fig-evtimevsl22 the evaluation time for the objective is predominan
 #| code-fold: true
 #| fig-cap: "Evaluation time for the objective (s) versus size of the [2, 2] block of L (GiB)"
 #| label: fig-evtimevsl22
-ggplot(sizespeed, aes(x=:L22sz, y=:evtime, color=:ucutoff)) +
-    geom_point() +
-    geom_line() +
-    labs(x="Size of [2,2] block of L (GiB)", y="Time for one evaluation of objective (s)")
+
+draw(
+  data(sizespeed) *
+  mapping(
+    :L22sz => "Size of [2,2] block of L (GiB)",
+    :evtime => "Time for one evaluation of objective (s)",
+    color=:uc) *
+    visual(ScatterLines);
+    figure=(; size=(600, 350))
+)
 ```
 
 However the middle panel shows that the number of iterations to convergence is highly variable.

diff --git a/longitudinal.qmd b/longitudinal.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
@@ -44,8 +46,8 @@ using LinearAlgebra
 using MixedModels
 using MixedModelsMakie
 using Random
-using RCall
 using StandardizedPredictors
+using GLM
 ```
 
 and declare some constants, if not already defined.
@@ -96,7 +98,8 @@ draw(
     :resp => "Ramus bone length (mm)",
     color=:Subj,
   ) *
-  (visual(Scatter) + visual(Lines));
+  (visual(Scatter) + visual(Lines)),
+  scales(Color = (; legend = false,));
   figure=(; size=(600, 450)),
 )
 ```
@@ -105,22 +108,33 @@ Unfortunately, unless there are very few subjects, such figures, sometimes calle
 
 A preferred alternative is to plot response versus time with each subject's data in a separate panel (@fig-eglayout).
 
-```{r}
+```{julia}
 #| code-fold: true
 #| fig-cap: Length of ramus bone versus age for a sample of 20 boys.  The panels are ordered rowwise, starting at the bottom left, by increasing bone length at age 8.
 #| label: fig-eglayout
-plot(
-  lattice::xyplot(
-    resp ~ time|Subj,
-    $egdf,
-    type=c("g","p","r"),
-    aspect="xy",
-    index.cond=function(x,y) coef(lm(y ~ x)) %*% c(1,8),
-    xlab="Age (yr)",
-    ylab="Ramus bone length (mm)",
-  )
+lmeg = lm(@formula(resp~ 1 + time*Subj),egdf);
+newx = DataFrame(Subj = unique(egdf.Subj));
+newx.time .= 8.;
+newx.inter = predict(lmeg,newx);
+
+draw(
+  data(egdf) *
+  mapping(
+    :time => "Age (yr)",
+    :resp => "Ramus bone length (mm)",
+    layout = :Subj => sorter(sort!(newx, :inter).Subj),
+  ) * (visual(Scatter, marker = '∘', markersize = 20) + linear(;interval=nothing)) * visual(color = :blue),
+  scales(Layout = (; palette = vec([(b,a) for a in 1:10, b in 2:-1:1]))),
+  axis = (; xticklabelrotation = pi/2, xticklabelsize = 10),
+  figure = (; size=(600, 400))
 )
-NULL
+
+f = current_figure();
+colgap!(f.layout, 2);
+rowgap!(f.layout, 5);
+
+f
+
 ```
 
 To aid comparisons between subjects the axes are the same in every panel and the order of the panels is chosen systematically - in @fig-eglayout the order is by increasing bone length at 8 years of age.
@@ -170,7 +184,7 @@ If the purpose of the experiment is to create a predictive model for the growth
 
 Alternatively, we could center at the average observed time, 8.75 years, or at some other value of interest.
 
-The important thing is to make clear what the `(Itercept)` parameter estimates represent.
+The important thing is to make clear what the `(Intercept)` parameter estimates represent.
 The [StandardizedPredictors.jl](https://github.com/beacon-biosignals/StandardizedPredictors.jl) package allows for convenient representations of several standardizing transformations in a `contrasts` specification for the model.
 An advantage of this method of coding a transformation is that the coefficient names include a concise description of the transformation.
 
@@ -336,8 +350,10 @@ draw(
   data(bxgdf[("Control",)]) *
   bxaxes *
   mapping(; color=:Subj) *
-  (visual(Scatter) + visual(Lines));
+  (visual(Scatter) + visual(Lines)),
+  scales(Color = (; legend = false,));
   figure=(; size=(600, 450)),
+  legend = (; position=:bottom, titleposition = :left)
 )
 ```
 
@@ -603,10 +619,12 @@ draw(
   data(@subset(bxm03pars, :type == "ρ")) *
   mapping(
     :value => "Bootstrap replicates of correlation estimates";
-    color=(:names => "Variables"),
+    color = :names => renamer(["(Intercept), time" => "(Intercept), time", "(Intercept), time ^ 2" => "(Intercept), time².", "time, time ^ 2" => "time, time²"])  => "Variables"
   ) *
-  AlgebraOfGraphics.density();
+  AlgebraOfGraphics.density(),
+  scales(Color = (; palette = [:tomato, :teal, :orange],));
   figure=(; size=(600, 400)),
+  legend=(;position=:bottom, titleposition = :left)
 )
 ```
 
@@ -624,11 +642,13 @@ let
   )
   mp = mapping(
     :z => "Fisher's z transformation of correlation estimates";
-    color=(:names => "Variables"),
+    color=:names => renamer(["(Intercept), time" => "(Intercept), time", "(Intercept), time ^ 2" => "(Intercept), time².", "time, time ^ 2" => "time, time²"])  => "Variables"
   )
   draw(
-    data(dat) * mp * AlgebraOfGraphics.density();
+    data(dat) * mp * AlgebraOfGraphics.density(),
+    scales(Color = (; palette = [:tomato, :teal, :orange],));
     figure=(; size=(600, 400)),
+    legend=(;position=:bottom, titleposition = :left)
   )
 end
 ```
@@ -723,8 +743,10 @@ let
       color=:Group,
       col=:model,
     ) *
-    visual(Lines);
-    axis=(width=120, height=130),
+    visual(Lines),
+    scales(Color = (; palette = [:tomato, :teal, :orange],));
+    figure=(; size=(600, 400)),
+    legend=(;position=:bottom, titleposition = :left)
   )
 end
 ```