You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: paper/CDL-bibliography/cdl.bib
+80-4Lines changed: 80 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -1,7 +1,80 @@
1
1
2
2
3
+
@article{NortEtal02,
4
+
author = {North, B V and Curtis, D and Sham, P C},
5
+
date-added = {2024-02-21 18:40:44 -0500},
6
+
date-modified = {2024-02-21 18:43:17 -0500},
7
+
doi = {10.1086/341527},
8
+
journal = {American Journal of Human Genetics},
9
+
month = {Aug},
10
+
number = {2},
11
+
pages = {439--441},
12
+
title = {A note on the calculation of empirical P values from Monte Carlo procedures.},
13
+
volume = {71},
14
+
year = {2002}}
15
+
16
+
@book{DaviHink97,
17
+
author = {Davison, A. C. and Hinkley, D. V.},
18
+
date-modified = {2024-02-21 18:39:11 -0500},
19
+
month = {October},
20
+
publisher = {Cambridge University Press},
21
+
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
22
+
title = {Bootstrap Methods and their Application},
23
+
year = {1997}}
24
+
25
+
@incollection{SnijBosk11,
26
+
author = {T A B Snijders and R Bosker},
27
+
booktitle = {{Multilevel Analysis: An Introduction to Basic and Advanced Multilevel Modeling}},
28
+
chapter = {6},
29
+
date-added = {2024-02-21 18:08:36 -0500},
30
+
date-modified = {2024-02-21 18:29:12 -0500},
31
+
edition = {2nd},
32
+
month = {November},
33
+
pages = {94--108},
34
+
publisher = {Sage Publications},
35
+
title = {More powerful tests for variance parameters},
36
+
year = {2011}}
37
+
38
+
@article{ScheEtal08b,
39
+
author = {Fabian Scheipl and Sonja Greven and Helmut K{\"u}chenhoff},
40
+
date-added = {2024-02-21 17:29:35 -0500},
41
+
date-modified = {2024-02-21 17:30:19 -0500},
42
+
doi = {https://doi.org/10.1016/j.csda.2007.10.022},
43
+
issn = {0167--9473},
44
+
journal = {Computational Statistics \& Data Analysis},
45
+
number = {7},
46
+
pages = {3283-3299},
47
+
title = {Size and power of tests for a zero random effect variance or polynomial regression in additive and linear mixed models},
48
+
volume = {52},
49
+
year = {2008}}
50
+
51
+
@article{GoldSimo00,
52
+
author = {Nick Goldman and Simon Whelan},
53
+
date-added = {2024-02-21 17:18:14 -0500},
54
+
date-modified = {2024-02-21 17:19:25 -0500},
55
+
doi = {10.1093/oxfordjournals.molbev.a026378},
56
+
issn = {0737--4038},
57
+
journal = {Molecular Biology and Evolution},
58
+
month = {06},
59
+
number = {6},
60
+
pages = {975-978},
61
+
title = {{Statistical Tests of Gamma-Distributed Rate Heterogeneity in Models of Sequence Evolution in Phylogenetics}},
62
+
volume = {17},
63
+
year = {2000}}
64
+
65
+
@article{HaleHojs14,
66
+
author = {Halekoh, Ulrich and H{\o}jsgaard, S{\o}ren},
67
+
date-added = {2024-02-21 17:05:30 -0500},
68
+
date-modified = {2024-02-21 17:05:45 -0500},
69
+
doi = {10.18637/jss.v059.i09},
70
+
journal = {Journal of Statistical Software},
71
+
number = {9},
72
+
pages = {1--32},
73
+
title = {{A Kenward-Roger Approximation and Parametric Bootstrap Methods for Tests in Linear Mixed Models -- The R Package pbkrtest}},
74
+
volume = {59},
75
+
year = {2014}}
76
+
3
77
@article{BarrEtal13,
4
-
abstract = {Linear mixed-effects models (LMEMs) have become increasingly prominent in psycholinguistics and related areas. However, many researchers do not seem to appreciate how random effects structures affect the generalizability of an analysis. Here, we argue that researchers using LMEMs for confirmatory hypothesis testing should minimally adhere to the standards that have been in place for many decades. Through theoretical arguments and Monte Carlo simulation, we show that LMEMs generalize best when they include the maximal random effects structure justified by the design. The generalization performance of LMEMs including data-driven random effects structures strongly depends upon modeling criteria and sample size, yielding reasonable results on moderately-sized samples when conservative criteria are used, but with little or no power advantage over maximal models. Finally, random-intercepts-only LMEMs used on within-subjects and/or within-items data from populations where subjects and/or items vary in their sensitivity to experimental manipulations always generalize worse than separate F1 and F2 tests, and in many cases, even worse than F1 alone. Maximal LMEMs should be the `gold standard' for confirmatory hypothesis testing in psycholinguistics and beyond.},
5
78
author = {Dale J. Barr and Roger Levy and Christoph Scheepers and Harry J. Tily},
6
79
date-added = {2024-02-20 10:18:31 -0500},
7
80
date-modified = {2024-02-20 10:18:52 -0500},
@@ -25798,7 +25871,8 @@ @article{GoloTaub99
25798
25871
publisher = {Society for Neuroscience},
25799
25872
title = {Head direction cells in rats with hippocampal or overlying neocortical lesions: evidence for impaired angular path integration},
author = {R G Robertson and E T Rolls and P Georges-Fran{\c{c}}ois and S Panzeri},
@@ -31915,8 +31989,9 @@ @article{Srin99
31915
31989
volume = {1},
31916
31990
year = {1999}}
31917
31991
31918
-
@article{ScheEtal08,
31992
+
@article{ScheEtal08a,
31919
31993
author = {C A Schevon and S K Ng and J Cappell and R R Goodman and G McKhann and A Waziri and A Branner and A Sosunov and C E Schroeder and R G Emerson},
31994
+
date-modified = {2024-02-21 17:30:06 -0500},
31920
31995
journal = {Journal of Clinical Neurophysiology},
31921
31996
number = {6},
31922
31997
pages = {321--330},
@@ -35180,7 +35255,8 @@ @article{DawEtal02
35180
35255
pages = {603--616},
35181
35256
title = {Opponent interactions between serotonin and dopamine},
Fundamental Forces}}\DIFadd{, }\textit{\DIFadd{Birth of Stars}}\DIFadd{, or general physics knowledge.
1746
1746
Note that with our coding scheme, identifiers for each }\texttt{\DIFadd{question}} \DIFadd{are
1747
1747
implicitly nested within levels of }\texttt{\DIFadd{lecture}} \DIFadd{and do not require explicit
1748
-
nesting in our model formula.
1748
+
nesting in our model formula. We then iteratively removed random effects from
1749
+
the maximal model until it successfully converged with a full rank (i.e., non-singular)
1750
+
random effects variance-covariance matrix.
1749
1751
}
1750
1752
1751
-
%DIF > We then iteratively removed random effects from the maximal model until it
1752
-
%DIF > successfully converged with a full rank (i.e., non-singular) random effects
1753
-
%DIF > variance-covariance matrix.
1754
-
1755
1753
%DIF > % JRM NOTE: do we need this next paragraph? Commenting out for now...
1756
1754
%DIF > %When inspecting the model's random effect estimates revealed multiple terms estimated at the boundary of their parameter space (i.e., variance components of 0 or correlation terms of $\pm 1$), we found that the order in which we eliminated these terms typically did not affect which terms did and did not need to be removed in order for the model to converge to a non-degenerate solution.
1757
1755
%DIF > When this required eliminating multiple terms whose estimates reached the boundary of their parameter space (i.e., variance components of 0 or correlation terms of $\pm 1$), we found that the order in which we did so typically did not change the set of terms that needed to be removed in order for the model to converge to a non-degenerate solution.
\DIFadd{where ``}\texttt{\DIFadd{accuracy}}\DIFadd{'', ``}\texttt{\DIFadd{participant}}\DIFadd{'', and ``}\texttt{\DIFadd{question}}\DIFadd{'' are as defined above.
1783
+
As with our full models, the null models we fit for the “All questions” version of the analysis for each quiz contained an additional term, $\mathtt{(1\ \vert\ lecture)}$, where ``}\texttt{\DIFadd{lecture}}\DIFadd{'' are as defined above.
1784
+
We then compared each full model to its reduced (null) equivalent using a likelihood-ratio test (LRT).
1785
+
Because the typical asymptotic $\chi^2_d$ approximation of the null distribution for the LRT statistic ($\lambda_{LR}$) is anti-conservative for models that differ in their random slope terms~\mbox{%DIFAUXCMD
, we computed $p$-values for these tests using a parametric bootstrapping procedure~\mbox{%DIFAUXCMD
1788
+
\citep{HaleHojs14}}\hskip0pt%DIFAUXCMD
1789
+
.
1790
+
For each of 1,000 bootstraps, we used the fitted null model to simulate a sample of observations of equal size to our original sample.
1791
+
We then re-fit both the null and full models to this simulated sample and compared them via an LRT.
1792
+
This yielded a distribution of $\lambda_{LR}$ statistics we may expect to observe under our null hypothesis.
1793
+
Following~\mbox{%DIFAUXCMD
1794
+
\citep{DaviHink97,NortEtal02}}\hskip0pt%DIFAUXCMD
1795
+
, we computed a corrected $p$-value for our observed $\lambda_{LR}$ as $\frac{r + 1}{n + 1}$, where $r$ is the number of simulated model comparisons that yielded a $\lambda_{LR}$ greater than or equal to our observed value and $n$ is the number of simulations we ran (1,000).
1785
1796
}
1786
1797
1787
-
1788
-
1789
-
1790
-
1791
-
1792
-
1793
-
1794
-
1795
-
%DIF > In order to assess the predictive value of the knowledge estimates, we then fit a second set of ``null'' models to the same sets of observations used to fit our full GLMMs. These
1796
-
%DIF >
1797
-
%DIF >
1798
-
%DIF > used the same sets of observations used to fit these GLMMs to fit a second set of ``null'' models.
1799
-
%DIF >
1800
-
%DIF >
1801
-
%DIF >
1802
-
%DIF > Next, in order to assess the predictive value of the knowledge estimates
1803
-
%DIF >
1804
-
%DIF >
1805
-
%DIF >
1806
-
%DIF > In order to assess the predictive value of the knowledge estimates we used to fit each GLMM, we then fit a \textit{second} model to the same data as each of the 15
1807
-
%DIF >
1808
-
%DIF > In order to assess whether the knowledge estimates we used to fit each GLMM could reliably predict participants' success on held-out questions, we then fit a second GLMM to the observations
Copy file name to clipboardExpand all lines: paper/main.tex
+12-90Lines changed: 12 additions & 90 deletions
Original file line number
Diff line number
Diff line change
@@ -1502,11 +1502,9 @@ \subsubsection*{Generalized linear mixed models}\label{subsec:glmm}
1502
1502
Fundamental Forces}, \textit{Birth of Stars}, or general physics knowledge.
1503
1503
Note that with our coding scheme, identifiers for each \texttt{question} are
1504
1504
implicitly nested within levels of \texttt{lecture} and do not require explicit
1505
-
nesting in our model formula.
1506
-
1507
-
% We then iteratively removed random effects from the maximal model until it
1508
-
% successfully converged with a full rank (i.e., non-singular) random effects
1509
-
% variance-covariance matrix.
1505
+
nesting in our model formula. We then iteratively removed random effects from
1506
+
the maximal model until it successfully converged with a full rank (i.e., non-singular)
1507
+
random effects variance-covariance matrix.
1510
1508
1511
1509
%% JRM NOTE: do we need this next paragraph? Commenting out for now...
1512
1510
% %When inspecting the model's random effect estimates revealed multiple terms estimated at the boundary of their parameter space (i.e., variance components of 0 or correlation terms of $\pm 1$), we found that the order in which we eliminated these terms typically did not affect which terms did and did not need to be removed in order for the model to converge to a non-degenerate solution.
@@ -1531,96 +1529,20 @@ \subsubsection*{Generalized linear mixed models}\label{subsec:glmm}
1531
1529
1532
1530
To assess the predictive value of our knowledge estimates, we compared each
1533
1531
GLMM's ability to discriminate between correctly and incorrectly answered
1534
-
questions to that of an analogous model that did not consider estimated
1532
+
questions to that of an analogous model that did \textit{not} consider estimated
1535
1533
knowledge. Specifically, we used the same sets of observations with which we
1536
1534
fit each ``full'' model to fit a second ``null'' model, with the formula:
%In order to assess the predictive value of the knowledge estimates, we then fit a second set of ``null'' models to the same sets of observations used to fit our full GLMMs. These
1551
-
%
1552
-
%
1553
-
%used the same sets of observations used to fit these GLMMs to fit a second set of ``null'' models.
1554
-
%
1555
-
%
1556
-
%
1557
-
%Next, in order to assess the predictive value of the knowledge estimates
1558
-
%
1559
-
%
1560
-
%
1561
-
%In order to assess the predictive value of the knowledge estimates we used to fit each GLMM, we then fit a \textit{second} model to the same data as each of the 15
1562
-
%
1563
-
%In order to assess whether the knowledge estimates we used to fit each GLMM could reliably predict participants' success on held-out questions, we then fit a second GLMM to the observations
where ``\texttt{accuracy}'', ``\texttt{participant}'', and ``\texttt{question}'' are as defined above.
1539
+
As with our full models, the null models we fit for the “All questions” version of the analysis for each quiz contained an additional term, $\mathtt{(1\ \vert\ lecture)}$, where ``\texttt{lecture}'' are as defined above.
1540
+
We then compared each full model to its reduced (null) equivalent using a likelihood-ratio test (LRT).
1541
+
Because the typical asymptotic $\chi^2_d$ approximation of the null distribution for the LRT statistic ($\lambda_{LR}$) is anti-conservative for models that differ in their random slope terms~\citep{GoldSimo00,ScheEtal08b,SnijBosk11}, we computed $p$-values for these tests using a parametric bootstrapping procedure~\citep{HaleHojs14}.
1542
+
For each of 1,000 bootstraps, we used the fitted null model to simulate a sample of observations of equal size to our original sample.
1543
+
We then re-fit both the null and full models to this simulated sample and compared them via an LRT.
1544
+
This yielded a distribution of $\lambda_{LR}$ statistics we may expect to observe under our null hypothesis.
1545
+
Following~\citep{DaviHink97,NortEtal02}, we computed a corrected $p$-value for our observed $\lambda_{LR}$ as $\frac{r + 1}{n + 1}$, where $r$ is the number of simulated model comparisons that yielded a $\lambda_{LR}$ greater than or equal to our observed value and $n$ is the number of simulations we ran (1,000).
1624
1546
1625
1547
\subsubsection*{Estimating the ``smoothness'' of knowledge}\label{subsec:smoothness}
0 commit comments