ContextLab
diff --git a/‎paper/figs/t_stats.pdf‎
23.9 KB b/‎paper/figs/t_stats.pdf‎
23.9 KB
diff --git a/‎paper/figs/t_stats_content_only.pdf‎
25.4 KB b/‎paper/figs/t_stats_content_only.pdf‎
25.4 KB
diff --git a/‎paper/figs/t_stats_function_only.pdf‎
28.2 KB b/‎paper/figs/t_stats_function_only.pdf‎
28.2 KB
diff --git a/‎paper/figs/t_stats_pos.pdf‎
29.5 KB b/‎paper/figs/t_stats_pos.pdf‎
29.5 KB
diff --git a/‎paper/main.pdf‎
22.8 KB b/‎paper/main.pdf‎
22.8 KB
diff --git a/‎paper/main.tex‎
Lines changed: 19 additions & 18 deletions b/‎paper/main.tex‎
Lines changed: 19 additions & 18 deletions
diff --git a/‎paper/supplement.pdf‎
54.5 KB b/‎paper/supplement.pdf‎
54.5 KB
diff --git a/‎paper/supplement.tex‎
Lines changed: 14 additions & 10 deletions b/‎paper/supplement.tex‎
Lines changed: 14 additions & 10 deletions
@@ -192,7 +192,7 @@ \subsubsection{Investigating the contributions of function words, content
   \includegraphics[width=\textwidth]{figs/loss_all_authors.pdf}
 
 
-\caption{\textbf{Cross-entropy loss across models and authors.}\textbf{A.}
+\caption{\textbf{Cross-entropy loss across models and authors.} \textbf{A.}
 Average cross-entropy loss on \textit{Train}ing data and held-out test data
 from each author, plotted as a function of the number of training epochs. Each
 color denotes a model trained on a single author's work. Error ribbons denote
@@ -235,20 +235,20 @@ \subsection{Predictive comparison testing of eight classic authors}
   \includegraphics[width=\textwidth]{figs/t_stats.pdf}
 
 
-  \caption{\textbf{Same vs. other author comparisons, by
-      model.}\textbf{A.} Each curve denotes, as a function of the
-    number of training epochs, the the $t$-statistic from a $t$-test
-    comparing the distribution of losses (across random seeds)
-    assigned to held-out texts from the given author (color) versus
-    held-out texts from all other authors. \textbf{B.} The average
-    $t$-statistic across all eight authors, as a function of the
-    number of training epochs. Error ribbons denote
-    bootstrap-estimated 95\% confidence intervals across authors.  See Supplementary
-Materials for analogous plots using models trained on only content words (Supp.
-Fig.~\ttestsContent), only function words (Supp.
-Fig.~\ttestsFunction), and only parts of speech (Supp.
-Fig.~\ttestsPOS).}
-\label{fig:t-stats}
+\caption{\textbf{Same vs. other author comparisons, by model.} \textbf{A.} Each
+curve denotes, as a function of the number of training epochs, the the
+$t$-statistic from a $t$-test comparing the distribution of losses (across
+random seeds) assigned to held-out texts from the given author (color) versus
+held-out texts from all other authors. \textbf{B.} The average $t$-statistic
+across all eight authors, as a function of the number of training epochs. The
+black curves in both panels indicates the average $t$-value corresponding to $p
+= 0.001$, for each epoch. Error ribbons denote bootstrap-estimated 95\%
+confidence intervals across authors. See Supplementary Materials for analogous
+plots using models trained on only content words (Supp. Fig.~\ttestsContent),
+only function words (Supp. Fig.~\ttestsFunction), and only parts of speech
+(Supp. Fig.~\ttestsPOS).} 
+
+\label{fig:t-stats} 
 \end{figure*}
 
 We also wondered how many training epochs were required for the models to
@@ -623,9 +623,10 @@ \subsection{Concluding remarks}
 
 \section*{Acknowledgments}
 
-We acknowledge helpful discussions with Jacob Bacus, Hung-Tu Chen,
-and Paxton Fitzpatrick. This research was supported in part by
-National Science Foundation Grant 2145172 to JRM.
+We acknowledge helpful discussions with Jacob Bacus, Hung-Tu Chen, and Paxton
+Fitzpatrick. This research was supported in part by National Science Foundation
+Grant 2145172 to JRM and by a GPU cluster generously donated by the estate of
+Daniel J. Milstein.
 
 \section*{Data and code availability}
 
 
@@ -127,8 +127,9 @@
 distribution of losses (across random seeds) assigned to held-out texts from
 the given author (color) versus held-out texts from all other authors.
 \textbf{B.} The average $t$-statistic across all eight authors, as a function
-of the number of training epochs. Error ribbons denote bootstrap-estimated 95\%
-confidence intervals across authors.}
+of the number of training epochs. The black curves in both panels indicates the
+average $t$-value corresponding to $p = 0.001$, for each epoch. Error ribbons
+denote bootstrap-estimated 95\% confidence intervals across authors.}
 
 \label{fig:t-stats-content}
 \end{figure*}
@@ -141,13 +142,14 @@
 \caption{\textbf{Same vs. other author comparisons, by model, using only
 function words.} Follows the general format of Figure~\ttests~in the main text,
 but uses models trained on only function words. All content words are masked
-out using \texttt{<CONTENT>}. \textbf{A.} Each curve denotes, as a function of the
-number of training epochs, the the $t$-statistic from a $t$-test comparing the
-distribution of losses (across random seeds) assigned to held-out texts from
-the given author (color) versus held-out texts from all other authors.
+out using \texttt{<CONTENT>}. \textbf{A.} Each curve denotes, as a function of
+the number of training epochs, the the $t$-statistic from a $t$-test comparing
+the distribution of losses (across random seeds) assigned to held-out texts
+from the given author (color) versus held-out texts from all other authors.
 \textbf{B.} The average $t$-statistic across all eight authors, as a function
-of the number of training epochs. Error ribbons denote bootstrap-estimated 95\%
-confidence intervals across authors.}
+of the number of training epochs. The black curves in both panels indicates the
+average $t$-value corresponding to $p = 0.001$, for each epoch. Error ribbons
+denote bootstrap-estimated 95\% confidence intervals across authors.}
 
 \label{fig:t-stats-function}
 \end{figure*}
@@ -165,8 +167,10 @@
 comparing the distribution of losses (across random seeds) assigned to held-out
 texts from the given author (color) versus held-out texts from all other
 authors. \textbf{B.} The average $t$-statistic across all eight authors, as a
-function of the number of training epochs. Error ribbons denote
-bootstrap-estimated 95\% confidence intervals across authors.}
+function of the number of training epochs. The black curves in both panels
+indicates the average $t$-value corresponding to $p = 0.001$, for each epoch.
+Error ribbons denote bootstrap-estimated 95\% confidence intervals across
+authors.}
 
 \label{fig:t-stats-pos}
 \end{figure*}