diff --git a/Project.toml b/Project.toml index 968249c..1e06450 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.1.0" [deps] AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67" Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" +BSplineKit = "093aae92-e908-43d7-9660-e50ee39d5a0a" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" @@ -44,7 +45,7 @@ ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" AlgebraOfGraphics = "0.6" Arrow = "2" CSV = "0.10" -CairoMakie = "0.8,0.9,0.10" +CairoMakie = "0.8,0.9,0.10,0.11" CategoricalArrays = "0.10" Chain = "0.5" DataAPI = "1.9" @@ -56,8 +57,8 @@ Effects = "0.1,1" FreqTables = "0.4" GLM = "1" MixedModels = "4,5" -MixedModelsMakie = "0.3.13" -NLopt = "0.6" +MixedModelsMakie = "0.3" +NLopt = "0.6,1" ProgressMeter = "1.7" RCall = "0.13" Scratch = "1" diff --git a/_quarto.yml b/_quarto.yml index 0a4bf16..2e8c56f 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -19,6 +19,7 @@ book: appendices: - datatables.qmd - linalg.qmd + - profiling.qmd - aGHQ.qmd execute-dir: project diff --git a/aGHQ.qmd b/aGHQ.qmd index e2bb399..85081b4 100644 --- a/aGHQ.qmd +++ b/aGHQ.qmd @@ -137,7 +137,6 @@ using LinearAlgebra using MixedModels using MixedModelsMakie using NLopt -using PooledArrays using ProgressMeter using StatsAPI diff --git a/intro.qmd b/intro.qmd index 989e361..a0f22f6 100644 --- a/intro.qmd +++ b/intro.qmd @@ -102,26 +102,26 @@ To access these data within `Julia` we must first attach this package, and other ```{julia} #| code-fold: show #| output: false +import ProgressMeter # progress of optimizer iterations +ProgressMeter.ijulia_behavior(:clear) # suppress progress output + using AlgebraOfGraphics # high-level graphics using CairoMakie # graphics back-end -using DataFrameMacros # elegant DataFrame manipulation using DataFrames # DataFrame implementation using Markdown # utilities for generating markdown text using MixedModels # fit and examine mixed-effects models using MixedModelsMakie # graphics for mixed-effects models -using Printf # formatted printing -using ProgressMeter # progress of optimizer iterations using Random # random number generation -using StatsBase # basic statistical summaries +using Statistics # basic statistical summaries using EmbraceUncertainty: dataset # `dataset` means this one +using AlgebraOfGraphics: density # `density` means this one CairoMakie.activate!(; type="svg") # Scalable Vector Graphics -ProgressMeter.ijulia_behavior(:clear) # suppress progress output ``` A package must be attached before any of the data sets or functions in the package can be used. -If entering this line results in an error report stating that there is no package by one of these names then you must first install the package(s). +If entering this block of code results in an error report stating that there is no package by one of these names then you must first install the package(s). If you are using Julia version 1.8.0 or later the error report will include instructions on how to do this. In what follows, we will assume that these packages have been installed and attached to the session before any of the code shown has been run. @@ -196,7 +196,7 @@ last(dyestuff, 7) or we could tabulate the data using `groupby` and `combine` from [DataFrames](https://github.com/JuliaData/DataFrames.jl). ```{julia} -combine(groupby(dyestuff, :batch), :yield => mean, nrow => :n) +combine(groupby(dyestuff, :batch), :yield => mean, nrow) ``` Although this table does show us an important property of the data, namely that there are exactly 5 observations on each batch --- a property that we will describe by saying that the data are *balanced* with respect to `batch` --- we usually learn much more about the structure of such data from plots like @fig-dyestuffdata @@ -206,14 +206,21 @@ Although this table does show us an important property of the data, namely that #| label: fig-dyestuffdata #| code-fold: true let - sumry = sort!(combine(groupby(dyestuff, :batch), :yield => mean => :yield), :yield) + sumry = sort!( + combine(groupby(dyestuff, :batch), :yield => mean => :yield), + :yield, + ) mp = mapping( :yield => "Yield of dyestuff [g]", - :batch => sorter(sumry.batch) => "Batch of intermediate product", + :batch => + sorter(sumry.batch) => "Batch of intermediate product", ) draw( - (data(dyestuff) * mp * visual(Scatter; marker='○', markersize=12)) + - (data(sumry) * mp * visual(Lines)); + ( + data(dyestuff) * + mp * + visual(Scatter; marker='○', markersize=12) + ) + (data(sumry) * mp * visual(Lines)); figure=(; resolution=(800, 400)), ) end @@ -272,14 +279,20 @@ A data plot (@fig-dyestuff2data) #| label: fig-dyestuff2data #| code-fold: true let - sumry = sort!(combine(groupby(dyestuff2, :batch), :yield => mean => :yield), :yield) + sumry = sort!( + combine(groupby(dyestuff2, :batch), :yield => mean => :yield), + :yield, + ) mp = mapping( :yield => "Simulated yield", :batch => sorter(sumry.batch) => "Batch", ) draw( - (data(dyestuff2) * mp * visual(Scatter; marker='○', markersize=12)) + - (data(sumry) * mp * visual(Lines)); + ( + data(dyestuff2) * + mp * + visual(Scatter; marker='○', markersize=12) + ) + (data(sumry) * mp * visual(Lines)); figure=(; resolution=(800, 400)), ) end @@ -304,9 +317,8 @@ We will explain the structure of the formula after we have considered an example We fit a model to the data allowing for an overall level of the `yield` and for an additive random effect for each level of `batch`. ```{julia} -dsm01 = let - form = @formula(yield ~ 1 + (1 | batch)) - fit(MixedModel, form, dyestuff) +dsm01 = let f = @formula(yield ~ 1 + (1 | batch)) + fit(MixedModel, f, dyestuff) end ``` @@ -379,9 +391,8 @@ An alternative measure of precision of the estimate - a coverage interval based Fitting a similar model to the `dyestuff2` data produces an estimate $\widehat{\sigma}_1=0$. ```{julia} -dsm02 = let - form = @formula(yield ~ 1 + (1 | batch)) - fit(MixedModel, form, dyestuff2) +dsm02 = let f = @formula(yield ~ 1 + (1 | batch)) + fit(MixedModel, f, dyestuff2) end println(dsm02) ``` @@ -559,32 +570,14 @@ For methods that involve simulation, it is best to initialize a random number ge Beginning with Julia v1.7.0 the default random number generator is the `Xoshiro` generator, which we initialize to an arbitrary, but reproducible, value. The object returned by a call to `parametricbootstrap` has a somewhat complex internal structure to allow for the ability to simulate from complex models while still maintaining a comparatively small storage footprint. -To examine the distribution of the parameter estimates we extract a table of all the estimated parameters and convert it to a `DataFrame`. +To examine the distribution of the parameter estimates we extract a `Table` of all the estimated parameters. ```{julia} #| warning: false -const hide_progress = true # hide the progress bar when sampling +const progress = false # hide the progress bar when sampling Random.seed!(4321234) # random number generator -dsm01samp = parametricbootstrap(10_000, dsm01; hide_progress) -dsm01pars = DataFrame(dsm01samp.allpars) -first(dsm01pars, 7) -``` - -:::{.callout-note} - -### Switch to Table(dsm01samp.tbl) formulation - -::: - -```{julia} -last(dsm01pars, 7) -``` - -Plots of the bootstrap estimates for individual parameters are obtained by extracting subsets of the rows of this dataframe using `subset` methods from the `DataFrames` package or the `@subset` macro from the `DataFramesMacros` package. -For example, - -```{julia} -βdf = @subset(dsm01pars, :type == "β") +dsm01samp = parametricbootstrap(10_000, dsm01; progress) +dsm01stbl = dsm01samp.tbl ``` We begin by examining density plots constructed using the [AlgebraOfGraphics](https://github.com/JuliaGraphics/AlgebraOfGraphics.jl) package. @@ -596,32 +589,24 @@ You can check the details by clicking on the "Code" button in the HTML version o #| fig-cap: Kernel density plot of bootstrap fixed-effects parameter estimates from dsm01 #| label: fig-dsm01_bs_beta_density draw( - data(@subset(dsm01pars, :type == "β")) * - mapping( - :value => "Bootstrap samples of β"; - color=(:names => "Names"), - ) * - AlgebraOfGraphics.density(); - figure=(; resolution=(800, 450)), + data(dsm01stbl) * + mapping(:β1 => "Bootstrap samples of β") * + density(); + figure=(; resolution=(800, 450)) ) ``` -:::{.callout-note collapse="true"} - -### Use :compact=true when interpolating numeric values - -Find a way to use `:compact=true` when interpolating numeric values into the text -::: - ```{julia} #| echo: false -Markdown.parse( - """ - The distribution of the estimates of `β₁` is more-or-less a Gaussian (or "normal") shape, with a mean value of $(@sprintf("%f", mean(βdf.value))) which is close to the estimated `β₁` of $(@sprintf("%f", only(dsm01.β))). +let context = (:compact => true) + Markdown.parse( + """ + The distribution of the estimates of `β₁` is more-or-less a Gaussian (or "normal") shape, with a mean value of $(repr(mean(dsm01stbl.β1); context)), which is close to the estimated `β₁` of $(repr(only(dsm01.β); context)). - Similarly the standard deviation of the simulated β values, $(std(βdf.value)) is close to the standard error of the parameter, $(only(stderror(dsm01))). -""", -) + Similarly the standard deviation of the simulated β values, $(repr(std(dsm01stbl.β1); context)), is close to the standard error of the parameter, $(repr(only(stderror(dsm01)); context)). + """, + ) +end ``` In other words, the estimator of the fixed-effects parameter in this case behaves as we would expect. @@ -634,12 +619,12 @@ The situation is different for the estimates of the standard deviation parameter #| fig-cap: Kernel density plot of bootstrap variance-component parameter estimates from model dsm01 #| label: fig-dsm01_bs_sigma_density draw( - data(@subset(dsm01pars, :type == "σ")) * + data(dsm01stbl) * mapping( - :value => "Bootstrap samples of σ"; - color=(:group => "Group"), + [:σ, :σ1] .=> "Bootstrap samples of standard deviations", ) * - AlgebraOfGraphics.density(); + mapping(color=dims(1) => renamer(["σ", "σ₁"])) * + density(); figure=(; resolution=(800, 450)), ) ``` @@ -648,11 +633,13 @@ The estimator for the residual standard deviation, $\sigma$, is approximately no ```{julia} #| echo: false -Markdown.parse( - """ -There is one peak around the "true" value for the simulation, $(only(dsm01.σs.batch)), and another peak at zero. -""", -) +let context = :compact => true + Markdown.parse( + """ + There is one peak around the "true" value for the simulation, $(repr(only(dsm01.σs.batch); context)), and another peak at zero. + """, + ) +end ``` The apparent distribution of the estimates of $\sigma_1$ in @fig-dsm01_bs_sigma_density is being distorted by the method of approximating the density. @@ -671,11 +658,11 @@ Use a lower alpha in the colors for multiple histograms so the bars behind anoth #| fig-cap: Histogram of bootstrap variance-components as standard deviations from model dsm01 #| label: fig-dsm01_bs_sigma_hist draw( - data(@subset(dsm01pars, :type == "σ")) * + data(dsm01stbl) * mapping( - :value => "Bootstrap parameter estimates of σ"; - color=(:group => "Group"), + [:σ, :σ1] .=> "Bootstrap samples of standard deviations", ) * + mapping(color=dims(1) => renamer(["σ", "σ₁"])) * AlgebraOfGraphics.histogram(; bins=80); figure=(; resolution=(800, 450)), ) @@ -698,19 +685,47 @@ In many cases standard errors are quoted for estimates of the variance component #| code-fold: true #| fig-cap: Histogram of bootstrap variance-components from model dsm01 #| label: fig-dsm01_bs_sigmasqr_hist -draw( - data(@transform(@subset(dsm01pars, :type == "σ"), abs2(:value))) * - mapping( - :value_abs2 => "Bootstrap sample of estimates of σ²", - color=:group, - ) * - AlgebraOfGraphics.histogram(; bins=200); - figure=(; resolution=(800, 450)), -) +let σs = ["σ", "σ1"] + draw( + data(dsm01stbl) * + mapping( + σs .=> abs2 .=> "Bootstrap samples of variance components", + color=dims(1) => renamer(σs), + ) * + AlgebraOfGraphics.histogram(; bins=200); + figure=(; resolution=(800, 450)), + ) +end ``` The approach of creating coverage intervals from a bootstrap sample of parameter estimates, described in the next section, does accomodate non-normal distributions. +## Quantile-quantile plots and profile-based confidence intervals + +Because the distribution of the bootstrap replications of $\sigma_1$ is a hybrid distribution --- that is, a continuous distribution of positive values with a non-zero probability of exactly zero --- neither a kernel density plot (@fig-dsm01_bs_sigma_density) nor a histogram (@fig-dsm01_bs_sigma_hist) is an ideal depiction of it. + +A *quantile-quantile* or [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot) provides an alternative representation of the distribution in which the "pulse" at zero can be easily discerned. +If we choose to plot the quantiles of the standard normal (or Gaussian) distribution versus the sample quantiles, as in @fig-dsm01_bs_beta_qq, we have a visual assessment of the extent to which the distribution deviates from a normal distribution. +Such a plot is also called a [normal probability plot](https://en.wikipedia.org/wiki/Normal_probability_plot). + +```{julia} +#| code-fold: true +#| fig-cap: Normal probability plot of bootstrap fixed-effects coefficients from model dsm01 +#| label: fig-dsm01_bs_beta_qq +let f = Figure(; size=(1000,400)), + pars = [:β1 => "β", :σ => "σ", :σ1 => "σ₁"], + ppts = inv(512):inv(256):1.0, + z = quantile(Normal(), ppts) + map(enumerate(pars)) do (j, p) + ax = Axis(f[1, j]; xlabel=last(p), aspect=1) + lines!(quantile(getproperty(dsm01stbl, first(p)), ppts), z) + end + f +end +``` + + + ### Confidence intervals on the parameters {#sec-confints} A bootstrap sample of parameter estimates also provides us with a method of creating bootstrap coverage intervals, which can be regarded as confidence intervals on the parameters. @@ -723,7 +738,7 @@ For example, from the sorted parameter values we could return the interval from The `shortestcovint` method returns the shortest of all these potential intervals which will correspond to the interval with the highest empirical density. ```{julia} -DataFrame(shortestcovint(dsm01samp)) +Table(shortestcovint(dsm01samp)) ``` For the `(Intercept)` fixed-effects parameter the interval is similar to an "asymptotic" or Wald interval constructed as the estimate plus or minus two standard errors. @@ -750,11 +765,13 @@ DataFrame(dsm01trace.optsum.fitlog) ```{julia} #| echo: false -Markdown.parse( - """ -Here the algorithm converges after 18 function evaluations to a profiled deviance of $(dsm01trace.objective) at θ = $(only(dsm01trace.theta)). -""", -) +let context = :compact => true + Markdown.parse( + """ + Here the algorithm converges after 18 function evaluations to a profiled deviance of $(repr(dsm01trace.objective; context)) at θ = $(repr(only(dsm01trace.theta); context)). + """, + ) +end ``` ## Assessing the random effects {#sec-assessRE} diff --git a/longitudinal.qmd b/longitudinal.qmd index 080389a..a5d922e 100644 --- a/longitudinal.qmd +++ b/longitudinal.qmd @@ -60,7 +60,7 @@ In this chapter we introduce graphical and statistical techniques for the analys ## The elstongrizzle data -Data from a dental study measuring the lengths of the ramus bone (mm) in 20 boys at 8, 8.5, 9, and 9.5 years of age were reported in @elstongrizzle1962 and in @davis2002. +Data from a dental study measuring the lengths of the ramus of the mandible (mm) in 20 boys at 8, 8.5, 9, and 9.5 years of age were reported in @elstongrizzle1962 and in @davis2002. ```{julia} elstongrizzle = dataset(:elstongrizzle) @@ -78,13 +78,13 @@ A common way of plotting such longitudinal data is response versus time on a sin ```{julia} #| code-fold: true -#| fig-cap: Length of ramus bone versus age for a sample of 20 boys. +#| fig-cap: Length of the ramus of the mandible versus age for a sample of 20 boys. #| label: fig-egspaghetti draw( data(egdf) * mapping( :time => "Age (yr)", - :resp => "Ramus bone length (mm)", + :resp => "Length of the ramus of the mandible (mm)", color=:Subj, ) * (visual(Scatter) + visual(Lines)), @@ -97,7 +97,7 @@ A preferred alternative is to plot response versus time with each subject's data ```{julia} #| code-fold: true -#| fig-cap: Length of ramus bone versus age for a sample of 20 boys. The panels are ordered rowwise, starting at the bottom left, by increasing bone length at age 8. +#| fig-cap: Length of the ramus of the mandible versus age for a sample of 20 boys. The panels are ordered rowwise, starting at the bottom left, by increasing bone length at age 8. #| label: fig-eglayout RCall.ijulia_setdevice(MIME("image/svg+xml"); width=8, height=7) R""" @@ -109,7 +109,7 @@ print( aspect="xy", index.cond=function(x,y) coef(lm(y ~ x)) %*% c(1,8), xlab="Age (yr)", - ylab="Ramus bone length (mm)", + ylab="Length of the ramus of the mandible (mm)", ), ) """; @@ -565,10 +565,13 @@ bxm03samp = parametricbootstrap( Xoshiro(8642468), 10_000, bxm03; - hide_progress=true, + progress=false, ) -bxm03pars = DataFrame(bxm03samp.allpars) -DataFrame(shortestcovint(bxm03samp)) +bxm03stbl = bxm03samp.tbl +``` + +```{julia} +Table(shortestcovint(bxm03samp)) ``` shows that the coverage intervals for both of the correlation parameters involving the `(Intercept)` extend out to one of the limits of the allowable range [-1, 1] of correlations. @@ -580,12 +583,13 @@ A kernel density plot, @fig-bxm03rhodens, of the parametric bootstrap estimates #| fig-cap: Kernel density plots of parametric bootstrap estimates of correlation estimates from model bxm03 #| label: fig-bxm03rhodens draw( - data(@subset(bxm03pars, :type == "ρ")) * + data(bxm03stbl) * mapping( - :value => "Bootstrap replicates of correlation estimates"; - color=(:names => "Variables"), + [:ρ1, :ρ2, :ρ3] .=> "Bootstrap samples of correlations of random effects", ) * + mapping(color=dims(1) => renamer(["ρ₁", "ρ₂", "ρ₃"])) * AlgebraOfGraphics.density(); + figure=(; resolution=(800, 450)), ) ``` @@ -595,20 +599,15 @@ Even on the scale of [Fisher's z transformation](https://en.wikipedia.org/wiki/F #| code-fold: true #| fig-cap: Kernel density plots of Fisher's z transformation of parametric bootstrap estimates of correlation estimates from model bxm03 #| label: fig-bxm03rhodensatanh -let - dat = @transform( - @subset(bxm03pars, :type == "ρ"), - :z = atanh(clamp(:value, -0.99999, 0.99999)) - ) - mp = mapping( - :z => "Fisher's z transformation of correlation estimates"; - color=(:names => "Variables"), - ) - draw( - data(dat) * mp * AlgebraOfGraphics.density(); - figure=(; resolution=(800, 450)), - ) -end +draw( + data(bxm03stbl) * + mapping( + [:ρ1, :ρ2, :ρ3] .=> (x -> atanh(clamp(x, -0.99999, 0.99999))).=> "Fisher's z transformation of correlation bootstrap estimates", + ) * + mapping(color=dims(1) => renamer(["ρ₁", "ρ₂", "ρ₃"])) * + AlgebraOfGraphics.density(); + figure=(; resolution=(800, 450)), +) ``` Because of these high correlations, trying to deal with the degenerate random effects distribution by simply removing the random effects for `time ^ 2` reduces the model too much. diff --git a/multiple.qmd b/multiple.qmd index 1303918..b2aeeb7 100644 --- a/multiple.qmd +++ b/multiple.qmd @@ -29,7 +29,7 @@ Attach the packages to be used in this chapter ```{julia} #| code-fold: true #| output: false -const hide_progress = true +const progress = false using AlgebraOfGraphics using CairoMakie @@ -210,7 +210,7 @@ caterpillar!( ) ``` -The prediction intervals on the random effects (@fig-pnm01platepred and @fig-pnm01samplepred) confirm that the conditional distribution of the random effects for `plate` much less variability than does the conditional distribution of the random effects for `sample`. (Note that the horizontal scales on these two plots are different.) +The prediction intervals on the random effects (@fig-pnm01platepred and @fig-pnm01samplepred) confirm that the conditional distribution of the random effects for `plate` exhibits much less variability than does the conditional distribution of the random effects for `sample`. (Note that the horizontal scales on these two plots are different.) However, the conditional distribution of the random effect for a particular sample, say sample `F`, has less variability than the conditional distribution of the random effect for a particular plate, say plate `m`. That is, the lines in @fig-pnm01samplepred are wider than the lines in @fig-pnm01platepred, even after taking the different axis scales into account. This is because the conditional distribution of the random effect for a particular sample depends on 24 responses while the conditional distribution of the random effect for a particular plate depends on only 6 responses. @@ -251,14 +251,14 @@ A parametric bootstrap sample of the parameter estimates ```{julia} #| code-fold: true bsrng = Random.seed!(9876789) -pnm01samp = parametricbootstrap(bsrng, 10_000, pnm01; hide_progress) -pnm01pars = DataFrame(pnm01samp.allpars); +pnm01samp = parametricbootstrap(bsrng, 10_000, pnm01; progress) +pnm01stbl = pnm01samp.tbl ``` can be used to create shortest 95% coverage intervals for the parameters in the model. ```{julia} -DataFrame(shortestcovint(pnm01samp)) +Table(shortestcovint(pnm01samp)) ``` As for model `dsm01` the bootstrap parameter estimates of the fixed-effects parameter have approximately a "normal" or Gaussian shape, as shown in the kernel density plot (@fig-pnm01bsbeta) @@ -268,11 +268,8 @@ As for model `dsm01` the bootstrap parameter estimates of the fixed-effects para #| fig-cap: "Parametric bootstrap estimates of fixed-effects parameters in model pnm01" #| label: fig-pnm01bsbeta draw( - data(@subset(pnm01pars, :type == "β")) * - mapping( - :value => "Bootstrap samples of β"; - color=(:names => "Names"), - ) * + data(pnm01stbl) * + mapping(:β1 => "Bootstrap samples of β₁") * AlgebraOfGraphics.density(); figure=(; resolution=(800, 450)), ) @@ -292,11 +289,11 @@ The densities of the variance-components, on the scale of the standard deviation #| label: fig-pnm01bssigma #| code-fold: true draw( - data(@subset(pnm01pars, :type == "σ")) * + data(pnm01stbl) * mapping( - :value => "Bootstrap samples of σ"; - color=(:group => "Group"), + [:σ, :σ1, :σ2] .=> "Bootstrap samples of standard deviations", ) * + mapping(color=dims(1) => renamer(["σ", "σ₁", "σ₂"])) * AlgebraOfGraphics.density(); figure=(; resolution=(800, 450)), ) @@ -458,8 +455,8 @@ Furthermore, kernel density estimates from a parametric bootstrap sample of the ```{julia} Random.seed!(4567654) -psm01samp = parametricbootstrap(10_000, psm01; hide_progress) -psm01pars = DataFrame(psm01samp.allpars); +psm01samp = parametricbootstrap(10_000, psm01; progress) +psm01stbl = psm01samp.tbl ``` ```{julia} @@ -467,11 +464,11 @@ psm01pars = DataFrame(psm01samp.allpars); #| label: fig-psm01bssampdens #| code-fold: true draw( - data(@subset(psm01pars, :type == "σ")) * + data(psm01stbl) * mapping( - :value => "Bootstrap samples of σ"; - color=(:group => "Group"), + [:σ, :σ1, :σ2] .=> "Bootstrap samples of standard deviations", ) * + mapping(color=dims(1) => renamer(["σ", "σ₁", "σ₂"])) * AlgebraOfGraphics.density(); figure=(; resolution=(800, 450)), ) @@ -529,7 +526,7 @@ Comparing the coverage intervals for models `psm01` and `psm02` ```{julia} #| code-fold: true -DataFrame(shortestcovint(psm01samp)) +Table(shortestcovint(psm01samp)) ``` ```{julia} @@ -538,9 +535,9 @@ psm02samp = parametricbootstrap( Random.seed!(9753579), 10_000, psm02; - hide_progress, + progress, ) -DataFrame(shortestcovint(psm02samp)) +Table(shortestcovint(psm02samp)) ``` The confidence intervals on $\sigma$ and $\beta_0$ are similar for the two models. diff --git a/profiling.qmd b/profiling.qmd new file mode 100644 index 0000000..31f4dbc --- /dev/null +++ b/profiling.qmd @@ -0,0 +1,262 @@ +--- +jupyter: julia-1.9 +--- + +\newcommand\bbL{{\mathbf{L}}} +\newcommand\bbbeta{{\boldsymbol{\beta}}} +\newcommand\bbtheta{{\boldsymbol{\theta}}} + +# Profile-based Inference {#sec-profiling} + +As described in @sec-modelformulation, in the `MixedModels.jl` package the *maximum likelihood estimates* (mle's) of the parameters of a mixed-effects model are determined by minimizing a *profiled objective* with respect to a reduced parameter vector, $\bbtheta$. +The value of the objective is negative twice the log-likelihood for the model at the observed data, given the values of the parameters. +The objective is *profiled* in the sense that the other parameters in the model, $\bbbeta$ and $\sigma$, are at their conditionally optimal values, given $\bbtheta$. + +In @sec-dyestuffLMM, where we fit a model to the *dyestuff* data (described in @sec-dyestuff), the parameter $\bbtheta$ is one-dimensional and we can easily visualize the function to be optimized. + +First attach the packages to be used, access the data sets, establish the contrasts dictionary, and fit the model. + +```{julia} +#| code-fold: true +using BSplineKit +using CairoMakie +using Distributions +using MixedModels +using MixedModelsMakie +using ProgressMeter + +CairoMakie.activate!(; type="svg"); +ProgressMeter.ijulia_behavior(:clear); +if !@isdefined contrasts + const contrasts = Dict{Symbol,Any}(:batch => Grouping()) +end +dsm01 = let + f = @formula(yield ~ 1 + (1|batch)) + fit(MixedModel, f, MixedModels.dataset(:dyestuff); contrasts) +end +``` + +The `objective!` method for a single argument that is a `LinearMixedModel` returns a function that returns the value of the objective for the model from an argument that can be a scalar, an `AbstractVector`, or a `Tuple`. + +Because the value of the scalar $\theta$ that minimizes the objective is approximately 0.75. + +```{julia} +only(dsm01.optsum.final) +``` + +we choose to plot the profiled objective over the interval `0.0 .. 2.1` in @fig-profiledobjdsm01. + +```{julia} +#| code-fold: true +#| fig-cap: Profiled objective function for model dsm01 as a function of the scalar parameter θ +#| label: fig-profiledobjdsm01 +try + lines( + 0.0 .. 2.1, + objective!(dsm01); + axis=(; xlabel="θ", ylabel="Profiled objective for dsm01"), + ) +finally + objective!(dsm01, dsm01.optsum.final) # restore parameter estimate +end +``` + +## Differences in the profiled objective + +In a likelihood ratio test of the hypothesis $H_0: \theta = 0$ (i.e. the random effects are inert and the model reverts to a linear model) versus $H_a: \theta\ne0$, the test statistic is the difference in the profiled objective at $\theta=0$ to that at the optimum. +The distribution of this test statistic is approximately a $\chi^2$ distribution where the degrees of freedom are the number of constraints on the parameters under $H_0$ --- one, in this case. + +Thus a more meaningful scale for the vertical axis is the difference of the profiled objective at $\theta$ to that at $\hat\theta$, as shown in @fig-profiledobjdsm01diff. + +```{julia} +#| code-fold: true +#| fig-cap: Difference of the profiled objective function for model dsm01 from that at the optimal θ. +#| label: fig-profiledobjdsm01diff +try + lines( + 0.0 .. 2.1, + x -> objective!(dsm01, x) - dsm01.optsum.fmin; + axis=(; xlabel="θ", ylabel="Change in the profiled objective for dsm01"), + ) +finally + objective!(dsm01, dsm01.optsum.final) # restore parameter estimate +end +``` + +### Profile-based confidence intervals + +Although not always presented in this way, general confidence intervals and confidence regions are associated with hypothesis tests on the corresponding parameter. +A $(1- \alpha)$ confidence interval (or, more generally, confidence set) on a parameter like $\theta$ in this model is the set of values of $\theta_0$ for which $H_0: \theta = \theta_0$ versus $H_a: \theta \ne \theta_0$ is not rejected at level $\alpha$. + +Thus a 95% profile-based confidence interval on $\theta$ for this model is the set of values for which the difference in the profiled objective from the minimum profiled objective is less than + +```{julia} +quantile(Chisq(1), 0.95) +``` + +Because a $\chi^2$ distribution on 1 degree of freedom is the square of a standard normal distribution, it can be more intuitive to consider the difference in the profiled objective from the minimum objective on the square root scale. +A further enhancement is to use the *signed square root*, which is the square root of the profiled objective but with the sign of $\theta-\hat\theta$. +This *profile ζ* function can be compared to quantiles of the standard normal (or Gaussian) distribution. + +The `profile` method for a `LinearMixedModel` returns a structure with a table of values of the profile ζ for each of the parameters in the model. +In the case of the $\bbtheta$ parameters the profile ζ is either evaluated directly, when $\bbtheta$ is one-dimensional, or by conditional optimization of general profiled objective with respect to the other components of $\bbtheta$. +For the other parameters the constrained optimization is customized for the type of parameter. + +```{julia} +dspr01 = profile(dsm01) +dspr01.tbl +``` + +The columns of the table are `p`, the parameter being profiled, $\zeta$, the signed square root of the difference between the profiled objective and the minimum objective, and the parameter values at the conditional optimum. + +@fig-thetazetadsm01 shows the profiled objective on the ζ scale (signed square root of the difference from the minimum value) versus the $\theta$ parameter. + +```{julia} +#| code-fold: true +#| fig-cap: Signed square-root of the difference of the profiled objective function for model dsm01 from that at the optimal θ. +#| label: fig-thetazetadsm01 +lines( + 0.0 .. 2.1, + identity ∘ dspr01.fwd[:θ1], + axis=(; xlabel="θ", ylabel="Profile ζ for dsm01"), +) +``` + +The parameter values at the conditional optimum may list redundant parameters, such as $\sigma_1=\sigma\theta_1$, in this case. +The reason for including possibly redundant parameters is because the $\bbtheta$ parameters are constructed to simplify the optimization process, whereas parameters like $\sigma$ and $\sigma_1$ have a direct interpretation in the model as standard deviations (i.e. scale parameters) in the description of the stochastic components of the model. + + +The table can be filtered according to the parameter being profiled, which is stored as a symbol in column `p` of the table + +```{julia} +filter(r -> r.p == :β1, dspr01.tbl) +``` + +Another way of writing the expression on which to filter is as a composition of a partially-applied equality comparison and the `first` function, which extracts the first element of each row. + +```{julia} +#| output: false +filter(==(:β1) ∘ first, dspr01.tbl) +``` + +The function `zetaplot!` in the `MixedModelsMakie` package produces plots of the profile ζ versus the parameter value, either on the ζ scale, @fig-sigmazetadsm01, or on the absolute value scale with confidence intervals added, @fig-sigmaabszetadsm01. + +```{julia} +#| code-fold: true +#| fig-cap: Profile ζ as a function of the dispersion parameters in model dsm01 +#| label: fig-sigmazetadsm01 +let + f2 = Figure(; resolution=(1000,550)) + zetaplot!(f2, dspr01; ptyp='σ') +end +``` + +```{julia} +#| code-fold: true +#| fig-cap: The absolute value of the profile ζ as a function of the dispersion parameters in model dsm01. The horizontal lines are confidence intervals with nominal 50%, 80%, 90%, 95% and 99% coverage. +#| label: fig-sigmaabszetadsm01 +let + f = Figure(; resolution=(1000,550)) + zetaplot!(f, dspr01; absv=true, ptyp='σ') +end +``` + +```{julia} +#| code-fold: true +#| fig-cap: Profile ζ as a function of the fixed-effects parameter in model dsm01 +#| label: fig-betazetadsm01 +let + f3 = Figure(; resolution=(600,550)) + zetaplot!(f3, dspr01; ptyp='β') +end +``` + +Numerical values for the endpoints of the confidence intervals are returned by `confint` methods. +Applied to the model itself, confint returns intervals on only the fixed-effects parameters, and these are based on their estimates, their standard errors and quantiles of the standard normal distribution. +Such intervals are sometimes called the "Wald intervals". + +```{julia} +confint(dsm01) # 95% confidence interval, by default +``` + +The method for a `MixedModelProfile` object returns the profile-based confidence intervals (95% coverage, by default), which more closely reflect the behavior of the estimators. + +```{julia} +confint(dspr01) +``` + +These intervals correspond to the horizontal lines in @fig-sigmaabszetadsm01 that are second from the top. +To get the endpoints of the uppermost horizontal lines, which are the 99% profile-based confidence intervals, use + +```{julia} +confint(dspr01; level=0.99) +``` + +## Forward and reverse splines + +The profile table contains the results of individual optimizations or evaluations of the profiled objective. +A `MixedModelProfile` object also contains cubic interpolation splines, created using the [BSplineKit.jl](https://github.com/jipolanco/BSplineKit.jl) package, +in both the forward (parameter $\rightarrow\zeta$) and reverse ($\zeta\rightarrow$ parameter) directions. + +The slope of the profile ζ function at the estimate is the inverse of the standard error for the parameter. +Thus the slope of the inverse profile ζ function at zero is the standard error. + +```{julia} +((Derivative(1) * dspr01.rev[:β1])(0.0), only(stderror(dsm01))) +``` + +## Interpretation of the ζ plots + +A plot of the profile ζ versus the parameter, like the panels in @fig-sigmazetadsm01, gives information about the distribution of the estimator of the parameter that is similar to what we get from a normal probability plot of a sample from the distribution. + +If this plot is essentially a straight line over the region of interest then the distribution of the estimator of the parameter is very close to a normal distribution. + +## Other examples + +Set up the model evaluation +```{julia} +#| code-fold: true +#| output: false +contrasts[:sample] = contrasts[:plate] = contrasts[:batch] = contrasts[:cask] = Grouping() +include(pkgdir(MixedModels, "test", "modelcache.jl")) +``` +```{julia} +pspr01 = profile(first(models(:pastes))) +println(pspr01.m) +``` + +```{julia} +confint(pspr01) +``` + +```{julia} +#| code-fold: true +#| fig-cap: The absolute value of the profile ζ as a function of the dispersion parameters in model psm01. The horizontal lines are confidence intervals with nominal 50%, 80%, 90%, 95% and 99% coverage. +#| label: fig-sigmaabszetasm01 +zetaplot!(Figure(;resolution=(800,400)), pspr01; ptyp='σ', absv=true) +``` + +```{julia} +pspr02 = profile(last(models(:pastes))) +confint(pspr02) +``` + +```{julia} +#| code-fold: true +#| fig-cap: The absolute value of the profile ζ as a function of the dispersion parameters in model psm02. The horizontal lines are confidence intervals with nominal 50%, 80%, 90%, 95% and 99% coverage. + +#| label: fig-profileζpsm02abs +zetaplot!(Figure(; resolution=(800,400)), pspr02; ptyp='σ', absv=true) +``` + +```{julia} +pnpr01 = profile(first(models(:penicillin))) +confint(pnpr01) +``` + +```{julia} +#| code-fold: true +#| fig-cap: Profile ζ as a function of the standard deviation parameters in model psm02. +#| label: fig-sigmazetapnpr01 +zetaplot!(Figure(;resolution=(1000,400)), pnpr01; ptyp='σ') +```