Moving R plots to julia (#92)

ajinkya-k · palday · web-flow · commit 8fe4d7791cd9 · 2025-05-13T23:01:42.000Z
Co-authored-by: Phillip Alday &lt;palday@users.noreply.github.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ data/
 site_libs/
 /.quarto/
 /.luarc.json
+.cache/
diff --git a/Project.toml b/Project.toml
@@ -28,7 +28,6 @@ MixedModelsMakie = "b12ae82c-6730-437f-aff9-d2c38332a376"
 NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RectangularFullPacked = "27983f2f-6524-42ba-a408-2b5a31c238e4"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -62,7 +61,6 @@ MixedModels = "4,5"
 MixedModelsMakie = "0.4"
 NLopt = "1"
 PooledArrays = "1"
-RCall = "0.14.8"
 Random = "1"
 SHA = "0.7"
 Scratch = "1"
@@ -71,7 +69,7 @@ StatsAPI = "1"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.7"
 Tables = "1"
-TidierPlots = "0.11"
+TidierPlots = "0.11.1"
 TypedTables = "1"
 ZipFile = "0.10"
 julia = "1.10"
diff --git a/aGHQ.qmd b/aGHQ.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
diff --git a/datatables.qmd b/datatables.qmd
@@ -1,5 +1,7 @@
 ---
 engine: julia
+execute:
+  cache: true
 ---
 
 # Working with data tables {#sec-datatables}
diff --git a/glmmbernoulli.qmd b/glmmbernoulli.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
diff --git a/intro.qmd b/intro.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
@@ -675,13 +677,6 @@ The apparent distribution of the estimates of $\sigma_1$ in @fig-dsm01_bs_sigma_
 A [kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation) approximates a probability density from a finite sample by blurring or smearing the positions of the sample values according to a *kernel* such as a narrow Gaussian distribution (see the linked article for details).
 In this case the distribution of the estimates is a combination of a continuous distribution and a spike or point mass at zero as shown in a histogram, @fig-dsm01_bs_sigma_hist.
 
-:::{.callout-note collapse="true"}
-
-### Adjust the alpha in multiple histograms
-
-Use a lower alpha in the colors for multiple histograms so the bars behind another color are more visible
-:::
-
 ```{julia}
 #| code-fold: true
 #| fig-cap: Histogram of bootstrap variance-components as standard deviations from model dsm01
@@ -693,7 +688,7 @@ draw(
     :value => "Bootstrap parameter estimates of σ";
     color=(:group => "Group"),
   ) *
-  AlgebraOfGraphics.histogram(; bins=80);
+  AlgebraOfGraphics.histogram(; bins=80) * visual(alpha = 0.4);
   figure=(; size=(600, 340)),
 )
 ```
@@ -724,7 +719,7 @@ draw(
     :value_abs2 => "Bootstrap sample of estimates of σ²",
     color=:group,
   ) *
-  AlgebraOfGraphics.histogram(; bins=200);
+  AlgebraOfGraphics.histogram(; bins=200) * visual(alpha = 0.4);
   figure=(; size=(600, 340)),
 )
 ```
diff --git a/largescaledesigned.qmd b/largescaledesigned.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
diff --git a/longitudinal.qmd b/longitudinal.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
@@ -44,8 +46,8 @@ using LinearAlgebra
 using MixedModels
 using MixedModelsMakie
 using Random
-using RCall
 using StandardizedPredictors
+using Statistics
 ```
 
 and declare some constants, if not already defined.
@@ -96,7 +98,8 @@ draw(
     :resp => "Ramus bone length (mm)",
     color=:Subj,
   ) *
-  (visual(Scatter) + visual(Lines));
+  (visual(Scatter) + visual(Lines)),
+  scales(Color = (; legend = false,));
   figure=(; size=(600, 450)),
 )
 ```
@@ -105,22 +108,48 @@ Unfortunately, unless there are very few subjects, such figures, sometimes calle
 
 A preferred alternative is to plot response versus time with each subject's data in a separate panel (@fig-eglayout).
 
-```{r}
+```{julia}
 #| code-fold: true
 #| fig-cap: Length of ramus bone versus age for a sample of 20 boys.  The panels are ordered rowwise, starting at the bottom left, by increasing bone length at age 8.
 #| label: fig-eglayout
-plot(
-  lattice::xyplot(
-    resp ~ time|Subj,
-    $egdf,
-    type=c("g","p","r"),
-    aspect="xy",
-    index.cond=function(x,y) coef(lm(y ~ x)) %*% c(1,8),
-    xlab="Age (yr)",
-    ylab="Ramus bone length (mm)",
-  )
+age_8 = subset(egdf, :time => ByRow(==(8)))
+sort!(age_8, :resp)
+slopes = combine(
+    groupby(egdf, :Subj),
+    :resp => std => :sy,
+    :time => std => :sx,
+    [:resp, :time] => ((a,b) -> cor(a, b)) => :corr,
+    [:time, :resp] => ((x,y) -> cor(x, y) * std(y) / std(x)) => :slopeabs,
 )
-NULL
+
+ry = maximum(egdf.resp) - minimum(egdf.resp)
+rx = maximum(egdf.time) - minimum(egdf.time)
+
+α = median(slopes.slopeabs) * rx / ry
+
+plt = data(egdf) *
+  mapping(
+    :time => "Age (yr)",
+    :resp => "Ramus bone length (mm)",
+    layout = :Subj => sorter(age_8.Subj),
+  ) * (visual(Scatter, marker = '∘', markersize = 20) + linear(; interval=nothing)) *
+  visual(color = :blue)
+fg = draw(plt, scales(Layout = (; palette = vec([(b,a) for a in 1:10, b in 2:-1:1])));
+         axis = (; aspect = α, xticklabelrotation = pi/2, xticklabelsize = 10),
+         figure = (; size=(800, 600))
+);
+
+f = fg.figure
+
+colgap!(f.layout, 0);
+rowgap!(f.layout, 5);
+for col in 1:10
+    print
+    colsize!(f.layout, col, Aspect(1, α))
+end
+
+
+f
 ```
 
 To aid comparisons between subjects the axes are the same in every panel and the order of the panels is chosen systematically - in @fig-eglayout the order is by increasing bone length at 8 years of age.
@@ -170,7 +199,7 @@ If the purpose of the experiment is to create a predictive model for the growth
 
 Alternatively, we could center at the average observed time, 8.75 years, or at some other value of interest.
 
-The important thing is to make clear what the `(Itercept)` parameter estimates represent.
+The important thing is to make clear what the `(Intercept)` parameter estimates represent.
 The [StandardizedPredictors.jl](https://github.com/beacon-biosignals/StandardizedPredictors.jl) package allows for convenient representations of several standardizing transformations in a `contrasts` specification for the model.
 An advantage of this method of coding a transformation is that the coefficient names include a concise description of the transformation.
 
@@ -336,8 +365,10 @@ draw(
   data(bxgdf[("Control",)]) *
   bxaxes *
   mapping(; color=:Subj) *
-  (visual(Scatter) + visual(Lines));
+  (visual(Scatter) + visual(Lines)),
+  scales(Color = (; legend = false,));
   figure=(; size=(600, 450)),
+  legend = (; position=:bottom, titleposition = :left)
 )
 ```
 
@@ -603,10 +634,12 @@ draw(
   data(@subset(bxm03pars, :type == "ρ")) *
   mapping(
     :value => "Bootstrap replicates of correlation estimates";
-    color=(:names => "Variables"),
+    color = :names => renamer(["(Intercept), time" => "(Intercept), time", "(Intercept), time ^ 2" => "(Intercept), time².", "time, time ^ 2" => "time, time²"])  => "Variables"
   ) *
-  AlgebraOfGraphics.density();
+  AlgebraOfGraphics.density(),
+  scales(Color = (; palette = [:tomato, :teal, :orange],));
   figure=(; size=(600, 400)),
+  legend=(;position=:bottom, titleposition = :left)
 )
 ```
 
@@ -624,11 +657,13 @@ let
   )
   mp = mapping(
     :z => "Fisher's z transformation of correlation estimates";
-    color=(:names => "Variables"),
+    color=:names => renamer(["(Intercept), time" => "(Intercept), time", "(Intercept), time ^ 2" => "(Intercept), time².", "time, time ^ 2" => "time, time²"])  => "Variables"
   )
   draw(
-    data(dat) * mp * AlgebraOfGraphics.density();
+    data(dat) * mp * AlgebraOfGraphics.density(),
+    scales(Color = (; palette = [:tomato, :teal, :orange],));
     figure=(; size=(600, 400)),
+    legend=(;position=:bottom, titleposition = :left)
   )
 end
 ```
@@ -723,8 +758,10 @@ let
       color=:Group,
       col=:model,
     ) *
-    visual(Lines);
-    axis=(width=120, height=130),
+    visual(Lines),
+    scales(Color = (; palette = [:tomato, :teal, :orange],));
+    figure=(; size=(600, 400)),
+    legend=(;position=:bottom, titleposition = :left)
   )
 end
 ```
diff --git a/multiple.qmd b/multiple.qmd
@@ -4,6 +4,8 @@ fig-height: 3
 fig-dpi: 192
 fig-format: png
 engine: julia
+execute:
+  cache: true
 julia:
   exeflags: ["--project"]
 ---
@@ -45,7 +47,6 @@ using EmbraceUncertainty: dataset
 using MixedModels
 using MixedModelsMakie
 using Random
-using RCall
 using StatsBase
 ```
 
@@ -247,7 +248,7 @@ We say that "fill-in" has occurred when forming the sparse Cholesky decompositio
 In this case there is a relatively minor amount of fill but in other cases there can be a substantial amount of fill.
 The computational methods are tuned to reduce the amount of fill.
 
-### Precision of parameter estimates in the Pencillin model {#sec-Penicillinprecision}
+### Precision of parameter estimates in the Penicillin model {#sec-Penicillinprecision}
 
 A parametric bootstrap sample of the parameter estimates
 
@@ -350,19 +351,40 @@ It would be fine to do this within a batch but the plot would be misleading if w
 There is no relationship between cask 'a' in batch 'A' and cask 'a' in batch 'B'.
 The labels 'a', 'b' and 'c' are used only to distinguish the three samples within a batch; they do not have a meaning across batches.
 
-```{r}
+```{julia}
 #| code-fold: true
 #| fig-cap: "Strength of paste preparations according to sample within batch"
 #| label: fig-pastesdot
-pp <- within($pastes, bb <- reorder(batch, strength))
-plot(
-  lattice::dotplot(sample ~ strength | bb, pp, pch = 21, strip = FALSE,
-    strip.left = TRUE, layout = c(1, 10),
-    scales = list(y = list(relation = "free")),
-    ylab = "Sample within batch", type = c("p", "a"),
-    xlab = "Paste strength", jitter.y = TRUE)
-)
-NULL
+batch_sort = sort!(combine(groupby(pastes, :batch),
+                           :strength => mean => :strength),
+                   :strength; rev=true).batch
+
+paste_means = combine(groupby(pastes, [:cask, :batch]),
+                      :sample,
+                      :strength => mean => "strength" )
+
+fg = draw(
+  data(pastes) *
+  mapping(
+    :cask => "Cask within batch",
+    :strength,
+    col = :batch => sorter(batch_sort)
+  ) * visual(Scatter; marker = '∘', markersize = 15, color = :blue) +
+  data(paste_means) *
+  mapping(
+    :cask => "Cask within batch",
+    :strength,
+    col = :batch => sorter(batch_sort)
+  ) * visual(Lines; color = :blue);
+  # axis = (; xticklabelsize = 10),
+  facet = (; linkyaxes=:minimal),
+  figure = (; size=(700, 350))
+);
+
+f = fg.figure
+
+colgap!(f.layout, 4)
+f
 ```
 
 In @fig-pastesdot we plot the two strength measurements on each of the samples within each of the batches and join up the average strength for each sample.
diff --git a/references.qmd b/references.qmd
@@ -26,14 +26,5 @@ using Pkg
 Pkg.status()
 ```
 
-```{julia}
-#| echo: false
-using RCall
-```
-
-```{r}
-print(sessionInfo())
-NULL
-```
 
 *This page was rendered from git revision {{< git-rev short=true >}} using Quarto {{< version >}}.*
diff --git a/theme.scss b/theme.scss
@@ -13,8 +13,8 @@ $code-bg: #f8f5f0;
 $font-family-monospace: JuliaMono, Menlo, "Roboto Mono", "Lucida Sans Typewriter", "Source Code Pro", monospace;
 
 pre {
-    line-height: normal;
+    line-height: normal !important;
     code {
-        line-height: normal;
+        line-height: normal !important;
     }
 }