markovmodel
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎manuscript/figures/figure_2.pdf‎
647 Bytes b/‎manuscript/figures/figure_2.pdf‎
647 Bytes
diff --git a/‎manuscript/figures/figure_3.pdf‎
-12.7 KB b/‎manuscript/figures/figure_3.pdf‎
-12.7 KB
diff --git a/‎manuscript/figures/figure_5.pdf‎
22.7 KB b/‎manuscript/figures/figure_5.pdf‎
22.7 KB
diff --git a/‎manuscript/literature.bib‎
Lines changed: 607 additions & 952 deletions b/‎manuscript/literature.bib‎
Lines changed: 607 additions & 952 deletions
diff --git a/‎manuscript/manuscript.tex‎
Lines changed: 20 additions & 13 deletions b/‎manuscript/manuscript.tex‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎notebooks/00-pentapeptide-showcase.ipynb‎
Lines changed: 89 additions & 62 deletions b/‎notebooks/00-pentapeptide-showcase.ipynb‎
Lines changed: 89 additions & 62 deletions
@@ -21,7 +21,7 @@ We keep the details minimal throughout the showcase but point to the more specia
 In detail, the remaining eight notebooks revisit all aspects shown in the showcase, provide additional details and variants, and contain exercises (and solutions) to self-check your learning progress:
 
 1. Data-I/O and featurization [➜ 📓](notebooks/01-data-io-and-featurization.ipynb)
-2. Dimension reduction and discretization [➜ 📓](notebooks/02-dimension-reduction-and-discretisation.ipynb)
+2. Dimension reduction and discretization [➜ 📓](notebooks/02-dimension-reduction-and-discretization.ipynb)
 3. MSM estimation and validation [➜ 📓](notebooks/03-msm-estimation-and-validation.ipynb)
 4. MSM analysis [➜ 📓](notebooks/04-msm-analysis.ipynb)
 5. PCCA and TPT analysis [➜ 📓](notebooks/05-pcca-tpt.ipynb)
 
@@ -110,14 +110,15 @@ \subsection{Software/system requirements}
 \begin{itemize}
 \item \textbf{PyEMMA} -- MSM/HMM estimation, validation, analysis, and visualization, and its dependencies~\cite{pyemma}
 \item mdshare -- A downloader for MD data from a public server
-\item notebook -- The Jupyter notebook tool used for running the tutorials~\cite{jupyter}, along with extension packages jupyter\_contrib\_nbextensions and nbexamples
+\item notebook -- The Jupyter~\cite{jupyter} notebook tool used for running the tutorials, along with extension packages jupyter\_contrib\_nbextensions and nbexamples
+\item matplotlib -- A plotting library~\cite{matplotlib}
 \item nglview -- Widget for active viewing of molecular structures in Jupyter environments~\cite{nglview}
 \end{itemize}
 
 The tutorial software is currently supported for Python versions $3.5$ and $3.6$ on the operating systems Linux, OSX, and Windows.
 
 Should the user prefer not to use Anaconda, a manual installation via the pip installer is possible.
-Alternatively, one can use the Binder service to view and run the tutorials online in any browser.
+Alternatively, one can use the Binder service (\url{https://mybinder.org}) to view and run the tutorials online in any browser.
 
 \section{Content and links}
 
@@ -128,7 +129,7 @@ \section{Content and links}
 
 \subsection{The PyEMMA workflow}
 
-In short, the workflow for a full analysis of an MD dataset might consist of,
+In short, the workflow (Fig.~\ref{fig:workflowchart}) for a full analysis of an MD dataset might consist of,
 \begin{itemize}
 	\item extracting molecular features from the raw data (01),
 	\item transforming those features into a suitable, low dimensional subspace (02),
@@ -147,7 +148,7 @@ \subsection{Feature selection}
 
 \begin{figure}
 \includegraphics{figure_2}
-\caption{Exemplary analysis of the conformational dynamics of a pentapeptide backbone: (a)~The Trp-Leu-Ala-Leu-Leu pentapeptide in licorice representation~\cite{vmd}.
+\caption{Example analysis of the conformational dynamics of a pentapeptide backbone: (a)~The Trp-Leu-Ala-Leu-Leu pentapeptide in licorice representation~\cite{vmd}.
 (b)~The VAMP-2 score indicates which of the tested featurizations contains the highest kinetic variance.
 (c)~The sample density projected onto the first two time-lagged independent components (ICs) at lag time $\tau=0.5$ ns shows multiple density maxima and
 (d)~the time series of the first two ICs show rare transition events.
@@ -203,12 +204,11 @@ \subsection{Analyzing the MSM}
 
 \begin{figure}
 \includegraphics{figure_3}
-\caption{Exemplary analysis of the conformational dynamics of a pentapeptide backbone:
-(a) The reweighted free energy surface projected onto the first two independent components exhibits five minima which
-(b) PCCA++ identifies as five metastable states.
-(c) The second right eigenvector shows that the slowest process shifts probability between the least probable state ($\mathcal{S}_1$) and the other states, in particular states ($\mathcal{S}_4$, $\mathcal{S}_5$), whereas
-(d) the committor $\mathcal{S}_2\to\mathcal{S}_4$ indicates that states $\mathcal{S}_{(1,3,5)}$ act as a transition region between states $\mathcal{S}_2$ and $\mathcal{S}_4$.
-(e) The Trp-1 SASA autocorrelation function yields a weak signal (top) which, however, can be enhanced if the system is prepared in the nonequilibrium condition $\mathcal{S}_1$ (bottom).}
+\caption{Example analysis of the conformational dynamics of a pentapeptide backbone:
+(a)~The reweighted free energy surface projected onto the first two independent components exhibits five minima which
+(b)~PCCA++ identifies as five metastable states.
+(c)~The second right eigenvector shows that the slowest process shifts probability between the least probable state ($\mathcal{S}_1$) and the other states, in particular states ($\mathcal{S}_4$, $\mathcal{S}_5$), whereas
+(d)~the committor $\mathcal{S}_2\to\mathcal{S}_4$ indicates that states $\mathcal{S}_{(1,3,5)}$ act as a transition region between states $\mathcal{S}_2$ and $\mathcal{S}_4$.}
 \label{fig:msm-analysis}
 \end{figure}
 
@@ -257,25 +257,32 @@ \subsection{Analyzing the MSM}
 
 \begin{figure}
 \includegraphics{figure_4}
-\caption{Visualization of the transition paths from $\mathcal{S}_2$ to $\mathcal{S}_4$:
+\caption{Example analysis of the conformational dynamics of a pentapeptide backbone:
+visualization of the transition paths from $\mathcal{S}_2$ to $\mathcal{S}_4$.
 Metastable states $\mathcal{S}_{(1-5)}$ are represented by an ensemble of representative structures and are arranged along the horizonal axis according to their committor probabilities. The three main transition pathways starting from $\mathcal{S}_2$ and ending in $\mathcal{S}_4$ are depicted by gray arrows with thickness proportional to the transition flux. The dominant pathway proceeds through $\mathcal{S}_5$.}
 \label{fig:tpt-network}
 \end{figure}
 
 \subsection{Connecting the MSM with experimental data}
 
+\begin{figure}
+\includegraphics{figure_5}
+\caption{Example analysis of the conformational dynamics of a pentapeptide backbone: (a)~the Trp-1 SASA autocorrelation function yields a weak signal which, however, (b)~can be enhanced if the system is prepared in the nonequilibrium condition $\mathcal{S}_1$. The solid/orange lines denote the maximum likelihood MSM result; the dashed/blue lines and the the shaded areas indicate sample means and $95\%$ confidence intervals computed with a Bayesian sampling procedure.}
+\label{fig:msm-exp-obs}
+\end{figure}
+
 MSMs can also be analyzed in the context of experimental observables. Connecting MSM analysis to experimental data can both serve as an accuracy test of our MSM as well as provide a mechanistic interpretation of observed experimental signals.
 Since we have both the stationary and dynamic properties of the molecular system encoded in the MSM transition probability matrix, we can compute observables that involve both stationary ensemble averages as well as correlation functions.
 
 As an example, here we look at the fluorescence correlation of Trp-1, since this terminal tryptophan is a realistic experimental observable for our pentapeptide system.
 In order to compute the fluorescence correlation functions we require a microscopic, instantaneous value of the tryptophan fluorescence for each of the original $75$ MSM microstates.
 To approximate the fluorescence signal in our pentapeptide system, we use the mdtraj library~\cite{mdtraj} to compute the solvent accessible surface area (SASA) of Trp-1.
-Now that we have an approximation of the fluorescence in each of our MSM states, we can use PyEMMA to compute the fluorescence autocorrelation function (ACF) from our MSM (\ref{fig:msm-analysis}e, upper).
+Now that we have an approximation of the fluorescence in each of our MSM states, we can use PyEMMA to compute the fluorescence autocorrelation function (ACF) from our MSM (\ref{fig:msm-exp-obs}a).
 Note how the computed ACF has a very small response (i.e., signal amplitude).
 
 Using PyEMMA, we can simulate the relaxation of an observable if we had prepared our molecular system in a nonequilibrium initial condition.
 The experimental counterpart of such a prediction could be a temperature or pressure jump experiment or a stopped flow assay.
-To illustrate such an experiment, we initialize our molecular ensemble as the metastable distribution of $\mathcal{S}_1$ and follow the predicted fluorescence signal as it relaxes to equilibrium (\ref{fig:msm-analysis}e, lower).
+To illustrate such an experiment, we initialize our molecular ensemble as the metastable distribution of $\mathcal{S}_1$ and follow the predicted fluorescence signal as it relaxes to equilibrium (\ref{fig:msm-exp-obs}b).
 We see that the predicted relaxation signal has a much larger amplitude for the nonequilibrium initialization, making it more likely to be experimentally measurable.
 
 \subsection{Summary}
 
@@ -1226,15 +1226,27 @@
     "ax_mol.set_axis_off()\n",
     "ax_mol.imshow(plt.imread('static/pentapeptide-structure.png'))\n",
     "\n",
-    "ax_feat = fig.add_subplot(gs[2000:3150, 400:1800])\n",
-    "ax_feat.bar(\n",
-    "    vamp_bars_plot['labels'],\n",
-    "    vamp_bars_plot['scores'],\n",
-    "    yerr=vamp_bars_plot['errors'],\n",
-    "    color=['C0', 'C1', 'C2'])\n",
-    "ax_feat.tick_params(axis='x', labelrotation=20)\n",
-    "ax_feat.set_ylabel('VAMP2 score')\n",
-    "ax_feat.set_title(r'lag time $\\tau$ = {:.1f} ns'.format(vamp_bars_plot['lag'] * 0.1))\n",
+    "ax_feat_label = fig.add_subplot(gs[2000:3150, 180:1800])\n",
+    "ax_feat_upper = fig.add_subplot(gs[2000:2720, 400:1800])\n",
+    "ax_feat_lower = fig.add_subplot(gs[2790:3150, 400:1800])\n",
+    "for ax in (ax_feat_upper, ax_feat_lower):\n",
+    "    ax.bar(\n",
+    "        vamp_bars_plot['labels'],\n",
+    "        vamp_bars_plot['scores'],\n",
+    "        yerr=vamp_bars_plot['errors'],\n",
+    "        color=['C0', 'C1', 'C2'])\n",
+    "for key in ('top', 'bottom', 'left', 'right'):\n",
+    "    ax_feat_label.spines[key].set_visible(False)\n",
+    "ax_feat_label.set_ylabel('VAMP2 score')\n",
+    "ax_feat_label.set_xticks([])\n",
+    "ax_feat_label.set_yticks([])\n",
+    "ax_feat_upper.set_title(r'lag time $\\tau$ = {:.1f} ns'.format(vamp_bars_plot['lag'] * 0.1))\n",
+    "ax_feat_upper.spines['bottom'].set_visible(False)\n",
+    "ax_feat_upper.set_xticks([])\n",
+    "ax_feat_upper.set_ylim(2.8, 3.6)\n",
+    "ax_feat_lower.spines['top'].set_visible(False)\n",
+    "ax_feat_lower.tick_params(axis='x', labelrotation=20)\n",
+    "ax_feat_lower.set_ylim(0.0, 0.4)\n",
     "\n",
     "ax_density = fig.add_subplot(gs[2000:3150, 2200:3350])\n",
     "_, _, misc = pyemma.plots.plot_density(\n",
@@ -1300,7 +1312,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig = plt.figure(figsize=(3.47, 5.85))\n",
+    "fig = plt.figure(figsize=(3.47, 3.95))\n",
     "gw = int(np.floor(0.5 + 1000 * fig.get_figwidth()))\n",
     "gh = int(np.floor(0.5 + 1000 * fig.get_figheight()))\n",
     "gs = plt.GridSpec(gh, gw)\n",
@@ -1309,10 +1321,9 @@
     "ax_box = fig.add_subplot(gs[:, :])\n",
     "ax_box.set_axis_off()\n",
     "ax_box.text(0.03, 0.97, '(a)', size=10)\n",
-    "ax_box.text(0.55, 0.97, '(b)', size=10)\n",
-    "ax_box.text(0.03, 0.66, '(c)', size=10)\n",
-    "ax_box.text(0.55, 0.66, '(d)', size=10)\n",
-    "ax_box.text(0.03, 0.30, '(e)', size=10)\n",
+    "ax_box.text(0.53, 0.97, '(b)', size=10)\n",
+    "ax_box.text(0.03, 0.50, '(c)', size=10)\n",
+    "ax_box.text(0.53, 0.50, '(d)', size=10)\n",
     "\n",
     "ax_fe = fig.add_subplot(gs[400:1750, 400:1750])\n",
     "_, _, misc = pyemma.plots.plot_free_energy(\n",
@@ -1343,6 +1354,11 @@
     "                             for i in range(nstates)])\n",
     "ax_state.set_xticklabels([])\n",
     "ax_state.set_yticklabels([])\n",
+    "ax_state.text(0.70, 6.30, '$\\mathcal{S}_1$', size=10)\n",
+    "ax_state.text(3.00, 4.00, '$\\mathcal{S}_2$', size=10)\n",
+    "ax_state.text(2.50, 2.00, '$\\mathcal{S}_3$', size=10)\n",
+    "ax_state.text(5.20, -0.50, '$\\mathcal{S}_4$', size=10)\n",
+    "ax_state.text(-0.20, 0.00, '$\\mathcal{S}_5$', size=10)\n",
     "\n",
     "evec_idx = 1\n",
     "ax_eig = fig.add_subplot(gs[2300:3650, 400:1750])\n",
@@ -1358,7 +1374,6 @@
     "misc['cbar'].set_ticks(np.linspace(*misc['cbar'].get_clim(), 3))\n",
     "misc['cbar'].ax.xaxis.set_ticks_position('top')\n",
     "misc['cbar'].ax.xaxis.set_label_position('top')\n",
-    "ax_eig.set_xticklabels([])\n",
     "ax_eig.set_xlabel('IC 1')\n",
     "ax_eig.set_ylabel('IC 2')\n",
     "\n",
@@ -1380,46 +1395,6 @@
     "ax_flux.set_xlabel('IC 1')\n",
     "ax_flux.set_yticklabels([])\n",
     "\n",
-    "ax_acf = fig.add_subplot(gs[4100:4825, 400:3350])\n",
-    "ax_acf.set_title('Trp-1 SASA')\n",
-    "ax_acf.plot(eq_time_ml, eq_acf_ml, '-', color='C1', label='ML MSM')\n",
-    "ax_acf.plot(\n",
-    "    eq_time_bayes,\n",
-    "    eq_acf_bayes,\n",
-    "    '--',\n",
-    "    color='C0',\n",
-    "    label='Bayesian MSM')\n",
-    "ax_acf.fill_between(\n",
-    "    eq_time_bayes,\n",
-    "    eq_acf_bayes_ci_l[1],\n",
-    "    eq_acf_bayes_ci_u[1],\n",
-    "    facecolor='C0',\n",
-    "    alpha=0.3)\n",
-    "ax_acf.semilogx()\n",
-    "ax_acf.set_xlim((eq_time_ml[1], eq_time_ml[-1]))\n",
-    "ax_acf.set_xticks([])\n",
-    "ax_acf.set_ylabel(r'ACF / nm$^4$')\n",
-    "ax_acf.legend()\n",
-    "\n",
-    "ax_rlx = fig.add_subplot(gs[4825:5550, 400:3350])\n",
-    "ax_rlx.plot(eq_time_ml, eq_relax_ml, '-', color='C1', label='ML MSM')\n",
-    "ax_rlx.plot(\n",
-    "    eq_time_bayes,\n",
-    "    eq_relax_bayes,\n",
-    "    '--',\n",
-    "    color='C0',\n",
-    "    label='Bayesian MSM')\n",
-    "ax_rlx.fill_between(\n",
-    "    eq_time_bayes,\n",
-    "    eq_relax_bayes_CI_l[1],\n",
-    "    eq_relax_bayes_CI_u[1],\n",
-    "    facecolor='C0',\n",
-    "    alpha=0.3)\n",
-    "ax_rlx.semilogx()\n",
-    "ax_rlx.set_ylabel(r'Average / nm$^2$')\n",
-    "ax_rlx.set_xlim((eq_time_ml[1], eq_time_ml[-1]))\n",
-    "ax_rlx.set_xlabel(r'time / ns')\n",
-    "\n",
     "fig.savefig('data/figure_3.pdf', dpi=300)"
    ]
   },
@@ -1469,6 +1444,65 @@
     "fig.savefig('data/figure_4.pdf', dpi=300)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(3.47, 2.5))\n",
+    "gw = int(np.floor(0.5 + 1000 * fig.get_figwidth()))\n",
+    "gh = int(np.floor(0.5 + 1000 * fig.get_figheight()))\n",
+    "gs = plt.GridSpec(gh, gw)\n",
+    "gs.update(hspace=0.0, wspace=0.0, left=0.0, right=1.0, bottom=0.0, top=1.0)\n",
+    "\n",
+    "ax_box = fig.add_subplot(gs[:, :])\n",
+    "ax_box.set_axis_off()\n",
+    "ax_box.text(0.01, 0.95, '(a)', size=10)\n",
+    "ax_box.text(0.01, 0.50, '(b)', size=10)\n",
+    "\n",
+    "ax_acf = fig.add_subplot(gs[50:1075, 500:3400])\n",
+    "ax_acf.plot(eq_time_ml, eq_acf_ml, '-', color='C1', label='ML MSM')\n",
+    "ax_acf.plot(\n",
+    "    eq_time_bayes,\n",
+    "    eq_acf_bayes,\n",
+    "    '--',\n",
+    "    color='C0',\n",
+    "    label='Bayesian MSM')\n",
+    "ax_acf.fill_between(\n",
+    "    eq_time_bayes,\n",
+    "    eq_acf_bayes_ci_l[1],\n",
+    "    eq_acf_bayes_ci_u[1],\n",
+    "    facecolor='C0',\n",
+    "    alpha=0.3)\n",
+    "ax_acf.semilogx()\n",
+    "ax_acf.set_xlim((eq_time_ml[1], eq_time_ml[-1]))\n",
+    "ax_acf.set_xticks([])\n",
+    "ax_acf.set_ylabel(r'ACF / nm$^4$')\n",
+    "ax_acf.legend()\n",
+    "\n",
+    "ax_rlx = fig.add_subplot(gs[1125:2150, 500:3400])\n",
+    "ax_rlx.plot(eq_time_ml, eq_relax_ml, '-', color='C1', label='ML MSM')\n",
+    "ax_rlx.plot(\n",
+    "    eq_time_bayes,\n",
+    "    eq_relax_bayes,\n",
+    "    '--',\n",
+    "    color='C0',\n",
+    "    label='Bayesian MSM')\n",
+    "ax_rlx.fill_between(\n",
+    "    eq_time_bayes,\n",
+    "    eq_relax_bayes_CI_l[1],\n",
+    "    eq_relax_bayes_CI_u[1],\n",
+    "    facecolor='C0',\n",
+    "    alpha=0.3)\n",
+    "ax_rlx.semilogx()\n",
+    "ax_rlx.set_ylabel(r'Average / nm$^2$')\n",
+    "ax_rlx.set_xlim((eq_time_ml[1], eq_time_ml[-1]))\n",
+    "ax_rlx.set_xlabel(r'time / ns')\n",
+    "\n",
+    "fig.savefig('data/figure_5.pdf', dpi=300)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1521,13 +1555,6 @@
     "<a id=\"cite-hmm-baum-welch-alg\"/><sup><a href=#ref-14>[^]</a></sup>Leonard E. Baum and Ted Petrie and George Soules and Norman Weiss. 1970. _A Maximization Technique Occurring in the Statistical Analysis of Probabilistic Functions of Markov Chains_. [URL](http://www.jstor.org/stable/2239727)\n",
     "\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {