From 3b3f1c99776f7a914257fa078e48ee137f0bf5ab Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Tue, 21 Oct 2025 17:19:58 -0400
Subject: [PATCH 01/17] Put the general layout of the additional section to be
 transplanted into analyze-rxte-spectra.md. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 5a244788..99930880 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -8,7 +8,7 @@ authors:
   affiliations: ['University of Maryland, Baltimore County', 'HEASARC, NASA Goddard']
   orcid: 0000-0001-9658-1396
   website: https://davidt3.github.io/
-date: '2025-10-15'
+date: '2025-10-21'
 jupytext:
   text_representation:
     extension: .md
@@ -98,6 +98,7 @@ from tqdm import tqdm
 ```{code-cell} python
 :tags: [hide-input]
 
+SRC_NAME = "Eta Car"
 
 ```
 
@@ -522,6 +523,27 @@ for label in ax_arr[1].get_xticklabels(which="major"):
 plt.show()
 ```
 
+## 5. Applying simple unsupervised machine learning to the spectra
+
+
+### Scale and normalize the spectra
+
+
+### Dimensionality reduction with Principal Component Analysis (PCA)
+
+
+### Dimensionality reduction with T-distributed Stochastic Neighbor Embedding (t-SNE)
+
+
+### Dimensionality reduction with Uniform Manifold Approximation and Projection (UMAP)
+
+
+### Automated clustering of like spectra with Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN)
+
+
+### Exploring the results of spectral clustering
+
+
 ***
 
 
@@ -532,7 +554,7 @@ Author: Tess Jaffe, HEASARC Chief Archive Scientist.
 
 Author: David J Turner, HEASARC Staff Scientist.
 
-Updated On: 2025-10-15
+Updated On: 2025-10-21
 
 +++
 

From 25bc81a6bdc761254f732134b266ec8b882d0937 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Tue, 21 Oct 2025 17:38:12 -0400
Subject: [PATCH 02/17] Copied in some of Tess' original code cells from the
 RXTE spectra ML notebook. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 66 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 99930880..7be5fede 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -76,7 +76,12 @@ from astropy.units import Quantity
 from astroquery.heasarc import Heasarc
 from matplotlib.ticker import FuncFormatter
 from s3fs import S3FileSystem
+from sklearn.cluster import HDBSCAN
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
+from umap import UMAP
 
 %matplotlib inline
 ```
@@ -99,7 +104,6 @@ from tqdm import tqdm
 :tags: [hide-input]
 
 SRC_NAME = "Eta Car"
-
 ```
 
 ### Configuration
@@ -525,9 +529,69 @@ plt.show()
 
 ## 5. Applying simple unsupervised machine learning to the spectra
 
+In the following, we will use different models from the `sklearn` module.
+
+As a general rule, ML models work best when they are normalized, so the shape is important, not just the magnitude. We use [StandardScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html).
+
+Note that after applying the scaler, we switch to plots in a linear scale.
 
 ### Scale and normalize the spectra
 
+```{code-cell} python
+scaled_specs = []
+for i in tqdm(range(specs.shape[0])):
+    s = StandardScaler()
+    scaled_specs.append(s.fit_transform(specs[i].reshape(-1, 1)).T[0])
+scaled_specs = np.array(scaled_specs)
+```
+
+Visualize the scaled and unscaled spectra for comparison
+
+```{code-cell} python
+plt.figure(figsize=(10, 6))
+plt.plot(xvals.T, scaled_specs.T, linewidth=0.3)
+plt.xlabel("Energy (keV)")
+plt.ylabel("Scaled Normalized Count Rate (C/s)")
+plt.title("Scaled Eta Carinae RXTE Spectra (lin-lin)")
+
+plt.figure(figsize=(10, 6))
+plt.plot(xvals.T, specs.T, linewidth=0.3)
+plt.xlabel("Energy (keV)")
+plt.ylabel("Normalized Count Rate (C/s)")
+plt.title("Unscaled Eta Carinae RXTE Spectra (lin-lin)");
+```
+
+Note that the scaled spectra all have a similar shape AND magnitude, whereas the unscaled spectra have a similar shape but not mangitude.
+
+Scaling has the effect of making big features smaller, but small features bigger. So, let's cut off the spectra at 9 keV in order to avoid noise driving the analysis, then rescale.
+
+```{code-cell} python
+specs = specs[:, : xref[xref <= 9.0001].shape[0]]
+xref = xref[: xref[xref <= 9.0001].shape[0]]
+
+scaled_specs = []
+for i in tqdm(range(specs.shape[0])):
+    s = StandardScaler()
+    scaled_specs.append(s.fit_transform(specs[i].reshape(-1, 1)).T[0])
+scaled_specs = np.array(scaled_specs)
+```
+
+Plot the scaled and unscaled spectra for comparison again.
+
+```{code-cell} python
+xvals = np.tile(xref, (specs.shape[0], 1))
+plt.figure(figsize=(10, 6))
+plt.plot(xvals.T, scaled_specs.T, linewidth=0.4)
+plt.xlabel("Energy (keV)")
+plt.ylabel("Scaled Normalized Count Rate (C/s)")
+plt.title("Scaled Eta Carinae RXTE Spectra (lin-lin)")
+
+plt.figure(figsize=(10, 6))
+plt.plot(xvals.T, specs.T, linewidth=0.4)
+plt.xlabel("Energy (keV)")
+plt.ylabel("Normalized Count Rate (C/s)")
+plt.title("Unscaled Eta Carinae RXTE Spectra (lin-lin)");
+```
 
 ### Dimensionality reduction with Principal Component Analysis (PCA)
 

From 1f1bff907c01f1289232b7d9598f380cab0d1ceb Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 12:14:22 -0400
Subject: [PATCH 03/17] Played around with the ML sections on Fornax, and have
 added working code back in to the notebook in the ML sections. It is
 compatible with how information is already loaded in, and includes some
 optimizations/improvements. Still no commentary, and the plotting code will
 be improved more. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 163 ++++++++++++++----
 1 file changed, 127 insertions(+), 36 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 7be5fede..8f8ed9fc 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -74,9 +74,10 @@ from astropy.coordinates import SkyCoord
 from astropy.time import Time, TimeDelta
 from astropy.units import Quantity
 from astroquery.heasarc import Heasarc
+from cycler import cycler
 from matplotlib.ticker import FuncFormatter
 from s3fs import S3FileSystem
-from sklearn.cluster import HDBSCAN
+from sklearn.cluster import DBSCAN
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import StandardScaler
@@ -535,78 +536,168 @@ As a general rule, ML models work best when they are normalized, so the shape is
 
 Note that after applying the scaler, we switch to plots in a linear scale.
 
-### Scale and normalize the spectra
+### Preparing
+
+Setup some energy grid that the spectra will interpolate over.
+start at 2 keV due to low-resolution noise below that energy - specific to RXTE
+stop at 12 keV due to no visible activity from Eta Carinae above that energy
 
 ```{code-cell} python
-scaled_specs = []
-for i in tqdm(range(specs.shape[0])):
-    s = StandardScaler()
-    scaled_specs.append(s.fit_transform(specs[i].reshape(-1, 1)).T[0])
-scaled_specs = np.array(scaled_specs)
+interp_en_vals = np.arange(2.0, 12.0, 0.1)
+
+interp_spec_vals = []
+for spec_info in spec_plot_data:
+    interp_spec_vals.append(np.interp(interp_en_vals, spec_info[0], spec_info[2]))
+
+interp_spec_vals = np.array(interp_spec_vals)
 ```
 
-Visualize the scaled and unscaled spectra for comparison
+### Scale and normalize the spectra
+
+```{code-cell} python
+scaled_interp_spec_vals = StandardScaler().fit_transform(interp_spec_vals)
+```
 
 ```{code-cell} python
 plt.figure(figsize=(10, 6))
-plt.plot(xvals.T, scaled_specs.T, linewidth=0.3)
-plt.xlabel("Energy (keV)")
-plt.ylabel("Scaled Normalized Count Rate (C/s)")
-plt.title("Scaled Eta Carinae RXTE Spectra (lin-lin)")
+plt.plot(interp_en_vals, interp_spec_vals.T, linewidth=0.3)
+
+plt.show()
+
 
 plt.figure(figsize=(10, 6))
-plt.plot(xvals.T, specs.T, linewidth=0.3)
-plt.xlabel("Energy (keV)")
-plt.ylabel("Normalized Count Rate (C/s)")
-plt.title("Unscaled Eta Carinae RXTE Spectra (lin-lin)");
+plt.plot(interp_en_vals, scaled_interp_spec_vals.T, linewidth=0.3)
+
+plt.show()
 ```
 
 Note that the scaled spectra all have a similar shape AND magnitude, whereas the unscaled spectra have a similar shape but not mangitude.
 
 Scaling has the effect of making big features smaller, but small features bigger. So, let's cut off the spectra at 9 keV in order to avoid noise driving the analysis, then rescale.
 
+
+### Dimensionality reduction with Principal Component Analysis (PCA)
+
+```{code-cell} python
+# For comparison, compute PCA
+pca = PCA(n_components=2)
+scaled_specs_pca = pca.fit_transform(scaled_interp_spec_vals)
+plt.figure(figsize=(8, 8))
+plt.scatter(scaled_specs_pca[:, 0], scaled_specs_pca[:, 1])
+plt.title("PCA-reduced Eta Carinae RXTE Spectra")
+plt.axis("off")
+```
+
+### Dimensionality reduction with T-distributed Stochastic Neighbor Embedding (t-SNE)
+
+```{code-cell} python
+tsne = TSNE(n_components=2)
+scaled_specs_tsne = tsne.fit_transform(scaled_interp_spec_vals)
+plt.figure(figsize=(8, 8))
+plt.scatter(scaled_specs_tsne[:, 0], scaled_specs_tsne[:, 1])
+plt.title("TSNE-reduced Eta Carinae RXTE Spectra")
+plt.axis("off")
+```
+
+### Dimensionality reduction with Uniform Manifold Approximation and Projection (UMAP)
+
 ```{code-cell} python
-specs = specs[:, : xref[xref <= 9.0001].shape[0]]
-xref = xref[: xref[xref <= 9.0001].shape[0]]
+um = UMAP(random_state=1)
+scaled_specs_umap = um.fit_transform(scaled_interp_spec_vals)
+plt.figure(figsize=(8, 8))
+plt.scatter(scaled_specs_umap[:, 0], scaled_specs_umap[:, 1])
+plt.title("UMAP-reduced Eta Carinae RXTE Spectra")
+plt.axis("off")
+```
 
-scaled_specs = []
-for i in tqdm(range(specs.shape[0])):
-    s = StandardScaler()
-    scaled_specs.append(s.fit_transform(specs[i].reshape(-1, 1)).T[0])
-scaled_specs = np.array(scaled_specs)
+### Automated clustering of like spectra with Density-Based Spatial Clustering of Applications with Noise (DBSCAN)
+
+```{code-cell} python
+dbs = DBSCAN(eps=0.6, min_samples=2)
+clusters = dbs.fit(scaled_specs_umap)
+labels = np.unique(clusters.labels_)
+plt.figure(figsize=(8, 8))
+for i in range(len(np.unique(labels[labels >= 0]))):
+    plt.scatter(
+        scaled_specs_umap[clusters.labels_ == i, 0],
+        scaled_specs_umap[clusters.labels_ == i, 1],
+        label="Cluster " + str(i),
+    )
+plt.legend()
+plt.title("Clustered UMAP-reduced Eta Carinae RXTE Spectra")
+plt.axis("off")
 ```
 
-Plot the scaled and unscaled spectra for comparison again.
+### Exploring the results of spectral clustering
 
 ```{code-cell} python
-xvals = np.tile(xref, (specs.shape[0], 1))
+# Plot the scaled spectra mean
 plt.figure(figsize=(10, 6))
-plt.plot(xvals.T, scaled_specs.T, linewidth=0.4)
+for i in range(len(np.unique(labels[labels >= 0]))):
+    plt.plot(
+        interp_en_vals,
+        scaled_interp_spec_vals[clusters.labels_ == i].mean(axis=0),
+        label="Cluster " + str(i),
+    )
+plt.legend()
 plt.xlabel("Energy (keV)")
 plt.ylabel("Scaled Normalized Count Rate (C/s)")
-plt.title("Scaled Eta Carinae RXTE Spectra (lin-lin)")
+plt.title("Scaled Eta Carinae RXTE Spectra Cluster Mean (lin-lin)")
+plt.show()
 
+# Plot the unscaled spectra mean
 plt.figure(figsize=(10, 6))
-plt.plot(xvals.T, specs.T, linewidth=0.4)
+for i in range(len(np.unique(labels[labels >= 0]))):
+    plt.plot(
+        interp_en_vals,
+        interp_spec_vals[clusters.labels_ == i].mean(axis=0),
+        label="Cluster " + str(i),
+    )
+plt.legend()
 plt.xlabel("Energy (keV)")
 plt.ylabel("Normalized Count Rate (C/s)")
-plt.title("Unscaled Eta Carinae RXTE Spectra (lin-lin)");
+plt.title("Unscaled Eta Carinae RXTE Spectra Cluster Mean (lin-lin)")
+plt.show()
 ```
 
-### Dimensionality reduction with Principal Component Analysis (PCA)
-
-
-### Dimensionality reduction with T-distributed Stochastic Neighbor Embedding (t-SNE)
+```{code-cell} python
+marker_cycler = cycler(marker=["x", "d", "+", "p", ".", "2", "*", "H", "X", "v"])
+default_color_cycler = plt.rcParams["axes.prop_cycle"]
+new_cycler = marker_cycler + default_color_cycler
 
+fig = plt.figure(figsize=(13, 4))
 
-### Dimensionality reduction with Uniform Manifold Approximation and Projection (UMAP)
+plt.gca().set_prop_cycle(new_cycler)
 
+plt.minorticks_on()
+plt.tick_params(which="both", direction="in", top=True, right=True)
 
-### Automated clustering of like spectra with Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN)
+for clust_id in np.unique(clusters.labels_):
+    cur_mask = clusters.labels_ == clust_id
+
+    plt.errorbar(
+        np.array(obs_start)[cur_mask],
+        pho_inds[cur_mask, 0],
+        yerr=pho_inds[cur_mask, 1],
+        capsize=2,
+        lw=0.7,
+        alpha=0.8,
+        label=f"Cluster {clust_id}",
+        linestyle="None",
+    )
 
+plt.ylabel("Photon Index", fontsize=15)
+plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Hh-%Mm %d-%b-%Y"))
+plt.xlabel("Time", fontsize=15)
 
-### Exploring the results of spectral clustering
+for label in plt.gca().get_xticklabels(which="major"):
+    label.set(
+        y=label.get_position()[1] - 0.01, rotation=40, horizontalalignment="right"
+    )
 
+plt.legend()
+plt.show()
+```
 
 ***
 

From f9683b698f21cdf9ae99b72f9c42729175da3337 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 13:44:55 -0400
Subject: [PATCH 04/17] Improved the plots in the ML section of the RXTE
 spectrum notebook. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 193 ++++++++++++------
 1 file changed, 131 insertions(+), 62 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 8f8ed9fc..cfe1ad70 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -8,7 +8,7 @@ authors:
   affiliations: ['University of Maryland, Baltimore County', 'HEASARC, NASA Goddard']
   orcid: 0000-0001-9658-1396
   website: https://davidt3.github.io/
-date: '2025-10-21'
+date: '2025-10-22'
 jupytext:
   text_representation:
     extension: .md
@@ -311,10 +311,9 @@ cwd = os.getcwd()
 
 ### Reading and fitting the spectra
 
-
 This code will read in the spectra and fit a simple power-law model with default start values (we do not necessarily
 recommend this model for this type of source, nor leaving parameters set to default values). It also extracts the
-spectrum data points, fitted model data points and the fitted model parameters, for plotting purposes.
+spectrum data points, fitted model data points for plotting, and the fitted model parameters.
 
 Note that we move into the directory where the spectra are stored. This is because the main source spectra files
 have relative paths to the background and response files in their headers, and if we didn't move into the
@@ -384,7 +383,7 @@ for x, xerr, y, yerr in spec_plot_data:
 plt.xscale("log")
 plt.yscale("log")
 
-plt.xlabel("Energy (keV)", fontsize=15)
+plt.xlabel("Energy [keV]", fontsize=15)
 plt.ylabel(r"Counts cm$^{-2}$ s$^{-1}$ keV$^{-1}$", fontsize=15)
 
 plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
@@ -408,7 +407,7 @@ for fit_ind, fit in enumerate(fit_plot_data):
 plt.xscale("log")
 plt.yscale("log")
 
-plt.xlabel("Energy (keV)", fontsize=15)
+plt.xlabel("Energy [keV]", fontsize=15)
 plt.ylabel(r"Counts cm$^{-2}$ s$^{-1}$ keV$^{-1}$", fontsize=15)
 
 plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
@@ -422,8 +421,8 @@ plt.show()
 
 As we have fit models to all these spectra, and retrieved their parameter's values, we should take a look at them!
 
-Exactly what you do at this point will depend entirely upon your science case, and the type of object you've been
-analysing, but any analysis will benefit from an initial examination of the fitted parameter values (particularly if
+Exactly what you do at this point will depend entirely upon your science case and the type of object you've been
+analyzing. However, any analysis will benefit from an initial examination of the fitted parameter values (particularly if
 you have fit hundreds of spectra, as we have).
 
 ### Fitted model parameter distributions
@@ -559,14 +558,27 @@ scaled_interp_spec_vals = StandardScaler().fit_transform(interp_spec_vals)
 ```
 
 ```{code-cell} python
-plt.figure(figsize=(10, 6))
-plt.plot(interp_en_vals, interp_spec_vals.T, linewidth=0.3)
+fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
+fig.subplots_adjust(hspace=0.0)
 
-plt.show()
+for ax_inds, ax in np.ndenumerate(ax_arr):
+    ax.minorticks_on()
+    ax.tick_params(which="both", direction="in", top=True, right=True)
+
+ax_arr[0].plot(interp_en_vals, interp_spec_vals.T, lw=0.4)
+
+ax_arr[0].xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
+ax_arr[0].yaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
 
+ax_arr[0].set_ylabel(r"Spectrum [ct cm$^{-2}$ s$^{-1}$ keV$^{-1}$]", fontsize=15)
 
-plt.figure(figsize=(10, 6))
-plt.plot(interp_en_vals, scaled_interp_spec_vals.T, linewidth=0.3)
+# Now for the scaled interpolated spectra
+ax_arr[1].plot(interp_en_vals, scaled_interp_spec_vals.T, lw=0.4)
+
+ax_arr[1].xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
+
+ax_arr[1].set_ylabel(r"Scaled Spectrum", fontsize=15)
+ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
 
 plt.show()
 ```
@@ -575,39 +587,84 @@ Note that the scaled spectra all have a similar shape AND magnitude, whereas the
 
 Scaling has the effect of making big features smaller, but small features bigger. So, let's cut off the spectra at 9 keV in order to avoid noise driving the analysis, then rescale.
 
+### Reducing the dimensionality of the set of spectra
 
-### Dimensionality reduction with Principal Component Analysis (PCA)
+#### Principal Component Analysis (PCA)
 
 ```{code-cell} python
-# For comparison, compute PCA
 pca = PCA(n_components=2)
+
 scaled_specs_pca = pca.fit_transform(scaled_interp_spec_vals)
-plt.figure(figsize=(8, 8))
-plt.scatter(scaled_specs_pca[:, 0], scaled_specs_pca[:, 1])
-plt.title("PCA-reduced Eta Carinae RXTE Spectra")
-plt.axis("off")
 ```
 
-### Dimensionality reduction with T-distributed Stochastic Neighbor Embedding (t-SNE)
+#### T-distributed Stochastic Neighbor Embedding (t-SNE)
 
 ```{code-cell} python
 tsne = TSNE(n_components=2)
 scaled_specs_tsne = tsne.fit_transform(scaled_interp_spec_vals)
-plt.figure(figsize=(8, 8))
-plt.scatter(scaled_specs_tsne[:, 0], scaled_specs_tsne[:, 1])
-plt.title("TSNE-reduced Eta Carinae RXTE Spectra")
-plt.axis("off")
 ```
 
-### Dimensionality reduction with Uniform Manifold Approximation and Projection (UMAP)
+#### Uniform Manifold Approximation and Projection (UMAP)
 
 ```{code-cell} python
-um = UMAP(random_state=1)
+um = UMAP(random_state=1, n_jobs=1)
 scaled_specs_umap = um.fit_transform(scaled_interp_spec_vals)
-plt.figure(figsize=(8, 8))
-plt.scatter(scaled_specs_umap[:, 0], scaled_specs_umap[:, 1])
-plt.title("UMAP-reduced Eta Carinae RXTE Spectra")
-plt.axis("off")
+```
+
+#### Comparing the results of the different dimensionality reduction methods
+
+```{code-cell} python
+fig, ax_arr = plt.subplots(2, 2, figsize=(8, 8))
+fig.subplots_adjust(hspace=0.0, wspace=0.0)
+
+for ax_inds, ax in np.ndenumerate(ax_arr):
+    ax.minorticks_on()
+    ax.tick_params(which="both", direction="in", top=True, right=True)
+
+# PCA plot
+ax_arr[0, 0].scatter(
+    scaled_specs_pca[:, 0],
+    scaled_specs_pca[:, 1],
+    alpha=0.6,
+    color="firebrick",
+    label="PCA",
+)
+ax_arr[0, 0].set_xticklabels([])
+ax_arr[0, 0].set_yticklabels([])
+ax_arr[0, 0].legend(fontsize=14)
+
+# t-SNE plot
+ax_arr[0, 1].scatter(
+    scaled_specs_tsne[:, 0],
+    scaled_specs_tsne[:, 1],
+    alpha=0.6,
+    color="tab:cyan",
+    marker="v",
+    label="t-SNE",
+)
+ax_arr[0, 1].set_xticklabels([])
+ax_arr[0, 1].set_yticklabels([])
+ax_arr[0, 1].legend(fontsize=14)
+
+# UMAP plot
+ax_arr[1, 0].scatter(
+    scaled_specs_umap[:, 0],
+    scaled_specs_umap[:, 1],
+    alpha=0.6,
+    color="goldenrod",
+    marker="p",
+    label="UMAP",
+)
+ax_arr[1, 0].set_xticklabels([])
+ax_arr[1, 0].set_yticklabels([])
+ax_arr[1, 0].legend(fontsize=14)
+
+# Make the fourth subplot invisible
+ax_arr[1, 1].set_visible(False)
+
+plt.suptitle("Comparison of dimensionality reduction", fontsize=16, y=0.92)
+
+plt.show()
 ```
 
 ### Automated clustering of like spectra with Density-Based Spatial Clustering of Applications with Noise (DBSCAN)
@@ -615,48 +672,60 @@ plt.axis("off")
 ```{code-cell} python
 dbs = DBSCAN(eps=0.6, min_samples=2)
 clusters = dbs.fit(scaled_specs_umap)
-labels = np.unique(clusters.labels_)
+
+clust_labels = np.unique(clusters.labels_)
+```
+
+```{code-cell} python
 plt.figure(figsize=(8, 8))
-for i in range(len(np.unique(labels[labels >= 0]))):
+
+plt.minorticks_on()
+plt.tick_params(which="both", direction="in", top=True, right=True)
+
+for clust_id in clust_labels:
     plt.scatter(
-        scaled_specs_umap[clusters.labels_ == i, 0],
-        scaled_specs_umap[clusters.labels_ == i, 1],
-        label="Cluster " + str(i),
+        scaled_specs_umap[clusters.labels_ == clust_id, 0],
+        scaled_specs_umap[clusters.labels_ == clust_id, 1],
+        label=f"Cluster {clust_id}",
     )
-plt.legend()
-plt.title("Clustered UMAP-reduced Eta Carinae RXTE Spectra")
-plt.axis("off")
+plt.title("DBSCAN clustered UMAP-reduced spectra", fontsize=16)
+plt.legend(fontsize=14)
+
+plt.tight_layout()
+plt.show()
 ```
 
 ### Exploring the results of spectral clustering
 
 ```{code-cell} python
-# Plot the scaled spectra mean
-plt.figure(figsize=(10, 6))
-for i in range(len(np.unique(labels[labels >= 0]))):
-    plt.plot(
-        interp_en_vals,
-        scaled_interp_spec_vals[clusters.labels_ == i].mean(axis=0),
-        label="Cluster " + str(i),
-    )
-plt.legend()
-plt.xlabel("Energy (keV)")
-plt.ylabel("Scaled Normalized Count Rate (C/s)")
-plt.title("Scaled Eta Carinae RXTE Spectra Cluster Mean (lin-lin)")
-plt.show()
+fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
+fig.subplots_adjust(hspace=0.0)
+
+for ax_inds, ax in np.ndenumerate(ax_arr):
+    ax.minorticks_on()
+    ax.tick_params(which="both", direction="in", top=True, right=True)
+
+for clust_id in np.unique(clusters.labels_):
+    mean_spec = interp_spec_vals[clusters.labels_ == clust_id].mean(axis=0)
+    ax_arr[0].plot(interp_en_vals, mean_spec.T, label=f"Cluster {clust_id}")
 
-# Plot the unscaled spectra mean
-plt.figure(figsize=(10, 6))
-for i in range(len(np.unique(labels[labels >= 0]))):
-    plt.plot(
-        interp_en_vals,
-        interp_spec_vals[clusters.labels_ == i].mean(axis=0),
-        label="Cluster " + str(i),
+ax_arr[0].xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
+ax_arr[0].yaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
+
+ax_arr[0].set_ylabel(r"Spectrum [ct cm$^{-2}$ s$^{-1}$ keV$^{-1}$]", fontsize=15)
+ax_arr[0].legend(fontsize=14)
+
+for clust_id in np.unique(clusters.labels_):
+    mean_scaled_spec = scaled_interp_spec_vals[clusters.labels_ == clust_id].mean(
+        axis=0
     )
-plt.legend()
-plt.xlabel("Energy (keV)")
-plt.ylabel("Normalized Count Rate (C/s)")
-plt.title("Unscaled Eta Carinae RXTE Spectra Cluster Mean (lin-lin)")
+    ax_arr[1].plot(interp_en_vals, mean_scaled_spec.T, label=f"Cluster {clust_id}")
+
+ax_arr[1].xaxis.set_major_formatter(FuncFormatter(lambda inp, _: "{:g}".format(inp)))
+
+ax_arr[1].set_ylabel(r"Scaled Spectrum", fontsize=15)
+ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
+
 plt.show()
 ```
 
@@ -709,7 +778,7 @@ Author: Tess Jaffe, HEASARC Chief Archive Scientist.
 
 Author: David J Turner, HEASARC Staff Scientist.
 
-Updated On: 2025-10-21
+Updated On: 2025-10-22
 
 +++
 

From aa1ea68fefb0ac567bec4f1e611e7572d79a6e6f Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 14:59:49 -0400
Subject: [PATCH 05/17] Added a lot of commentary to the machine learning
 section of the RXTE notebook. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 106 ++++++++++++++++--
 1 file changed, 96 insertions(+), 10 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index cfe1ad70..58271115 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -529,34 +529,84 @@ plt.show()
 
 ## 5. Applying simple unsupervised machine learning to the spectra
 
-In the following, we will use different models from the `sklearn` module.
+From our previous analysis, fitting a simple power-law model to the spectra and plotting the parameters against
+the time of their observation, we can see that some quite interesting spectral changes occur over the course of
+RXTE's survey of this object.
 
-As a general rule, ML models work best when they are normalized, so the shape is important, not just the magnitude. We use [StandardScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html).
+We might now want to know whether we can identify those same behaviors in a model independent way, to ensure that
+it isn't just a strange emergent property of spectral model choice.
 
-Note that after applying the scaler, we switch to plots in a linear scale.
+Additionally, if the spectral changes we observed through the fitted model parameters **are** representative of real
+behavior, then are we seeing a single transient event where the emission returns to 'normal' after the most
+significant changes, or is it entering a new 'phase' of its emission life cycle?
+
+We are going to use some very simple machine learning techniques to explore these questions by:
+- Reducing the spectra (with around one hundred energy bins and corresponding spectral values) to two dimensions.
+- Using a clustering technique to group similar spectra together.
+- Examining which spectra have been found to be the most similar.
 
 ### Preparing
 
-Setup some energy grid that the spectra will interpolate over.
-start at 2 keV due to low-resolution noise below that energy - specific to RXTE
-stop at 12 keV due to no visible activity from Eta Carinae above that energy
+Simply shoving the RXTE spectra that we already loaded in through some machine learning techniques is not likely to
+produce useful results.
+
+Machine learning techniques that reduce dataset dimensionality often benefit from re-scaling the datasets so that all
+each feature (the spectral value for a particular energy bin, in this case) exist within the same general
+range (-1 to 1, for example). This is because the distance between points is often used as some form of metric in
+these techniques, and we wish to give every feature the same weight in those calculations.
+
+It will hopefully mean that, for instance, the overall normalization isn't the one dominant factor
+in grouping the spectra together, and instead other features (particularly the shape of a spectrum) will be
+given weight as well.
+
+#### Interpolating the spectra onto a common energy grid
+
+Our first step is to place all the spectra onto a common energy grid. Due to changing calibration and instrument
+response, we cannot guarantee that the energy bins of all our spectra are identical.
+
+Looking at the first few energy bins of two different spectra, as an example:
+```{code-cell} python
+print(spec_plot_data[0][0][:5])
+print(spec_plot_data[40][0][:5])
+```
+
+The quickest and easiest way to deal with this is to define a common energy grid, and then interpolate all of our
+spectra onto it.
+
+We choose to begin the grid at 2 keV to avoid low-resolution noise at lower energies, a limitation of RXTE-PCA data, and
+to stop it at 12 keV due to an evident lack of emission from Eta Car above that energy. The grid will have a
+resolution of 0.1 keV.
 
 ```{code-cell} python
+# Defining the specified energy grid
 interp_en_vals = np.arange(2.0, 12.0, 0.1)
 
+# Iterate through all loaded RXTE-PCA spectra
 interp_spec_vals = []
 for spec_info in spec_plot_data:
+    # This runs the interpolation, using the values we extracted from pyXspec earlier.
+    #  spec_info[0] are the energy values, and spec_info[2] the spectral values
     interp_spec_vals.append(np.interp(interp_en_vals, spec_info[0], spec_info[2]))
 
+# Make the interpolated spectra into a numpy array
 interp_spec_vals = np.array(interp_spec_vals)
 ```
 
-### Scale and normalize the spectra
+#### Scale and normalize the spectra
+
+As we've already mentioned, the machine learning techniques we're going to use will work best if the input data
+are scaled. This `StandardScaler` class will move the mean of each feature (spectral value in an energy bin) to zero
+and scale it unit variance.
 
 ```{code-cell} python
 scaled_interp_spec_vals = StandardScaler().fit_transform(interp_spec_vals)
 ```
 
+#### Examining the scaled and normalized spectra
+
+To demonstrate the changes we've made to our dataset, we'll visualize both the interpolated, and the scaled
+interpolated spectra:
+
 ```{code-cell} python
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
@@ -583,11 +633,25 @@ ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
 plt.show()
 ```
 
-Note that the scaled spectra all have a similar shape AND magnitude, whereas the unscaled spectra have a similar shape but not mangitude.
+### Reducing the dimensionality of the scaled spectral dataset
+
+At this point, we _could_ try to find similar spectra by applying a clustering technique directly to the scaled
+dataset we just created. However, it has been well demonstrated that finding similar data points (clustering them
+together, in other words) is very difficult in high-dimensional data.
+
+This is a result of something called "the curse of dimensionality"
+([see this article for a full explanation](https://towardsdatascience.com/curse-of-dimensionality-a-curse-to-machine-learning-c122ee33bfeb/))
+and it is a common problem in machine learning and data science.
 
-Scaling has the effect of making big features smaller, but small features bigger. So, let's cut off the spectra at 9 keV in order to avoid noise driving the analysis, then rescale.
+One of the ways to combat this issue is to try and reduce the dimensionality of the dataset. The hope is that the
+data point that represents a spectrum (in N dimensions, where N is the number of energy bins in our interpolated
+spectrum) can be projected/reduced to a much smaller number of dimensions without losing the information that will
+help us separate our group the different spectra.
 
-### Reducing the dimensionality of the set of spectra
+We're going to try three common dimensionality reduction techniques:
+- Principal Component Analysis (PCA)
+- T-distributed Stochastic Neighbor Embedding (t-SNE)
+- Uniform Manifold Approximation and Projection (UMAP)
 
 #### Principal Component Analysis (PCA)
 
@@ -613,6 +677,14 @@ scaled_specs_umap = um.fit_transform(scaled_interp_spec_vals)
 
 #### Comparing the results of the different dimensionality reduction methods
 
+In each case, we reduced the scaled spectral dataset to two dimensions, so it is easy to visualize how each
+technique has behaved. We're going to visually assess the separability data point groups (we are starting with the
+assumption that there _will_ be some distinct groupings of spectra).
+
+Just from a quick look, it is fairly obvious that UMAP has done the best job of forming distinct, separable, groupings
+of spectra. **That doesn't necessarily mean that those spectra are somehow physically linked**, but it does seem like
+it will be the best dataset to run our clustering algorithm on.
+
 ```{code-cell} python
 fig, ax_arr = plt.subplots(2, 2, figsize=(8, 8))
 fig.subplots_adjust(hspace=0.0, wspace=0.0)
@@ -669,13 +741,27 @@ plt.show()
 
 ### Automated clustering of like spectra with Density-Based Spatial Clustering of Applications with Noise (DBSCAN)
 
+There are a litany of clustering algorithms implemented in scikit-learn, all with different characteristics,
+strengths, and weaknesses. The scikit-learn
+[website has an interesting comparison](https://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html)
+of their performance on different toy datasets, which gives an idea of what sorts of features can be separated
+with each approach.
+
+Some algorithms require that you specify the number of clusters you want to find, which is not particularly easy to do
+while doing this sort of exploratory data analysis. As such, we're going to use 'DBSCAN', which identifies dense
+cores of data points and expands clusters from them. You should read about a variety of clustering techniques, and
+how they work, before deciding on one to use for your own scientific work.
+
 ```{code-cell} python
 dbs = DBSCAN(eps=0.6, min_samples=2)
 clusters = dbs.fit(scaled_specs_umap)
 
+# The labels of the point clusters
 clust_labels = np.unique(clusters.labels_)
+clust_labels
 ```
 
+We will once again visualize the
 ```{code-cell} python
 plt.figure(figsize=(8, 8))
 

From 2adc4be84afc6c130ff17ca3a8d26e5c4c9621eb Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 15:06:50 -0400
Subject: [PATCH 06/17] Added more commentary on the ML part of the RXTE
 notebook, particularly on the mean cluster spectra section. For issue #111

---
 .../rxte/analyze-rxte-spectra.md                 | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 58271115..d3fc1d5a 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -761,7 +761,9 @@ clust_labels = np.unique(clusters.labels_)
 clust_labels
 ```
 
-We will once again visualize the
+We will once again visualize the UMAP-reduced spectral dataset, but this time we'll colour each data point by the
+cluster that DBSCAN says it belongs to. That will give us a good idea of how well the algorithm has performed:
+
 ```{code-cell} python
 plt.figure(figsize=(8, 8))
 
@@ -783,6 +785,16 @@ plt.show()
 
 ### Exploring the results of spectral clustering
 
+Now that we think we've identified distinct groupings of spectra that are similar (in the two-dimensional space
+produced by UMAP at least), we can look to see whether they look distinctly different in their original
+high-dimensional parameter space!
+
+Here we examine both unscaled and scaled versions of the interpolated spectra, but rather than coloring every
+individual spectrum by the cluster that it belongs to, we instead plot the mean spectrum of each cluster.
+
+This approach makes it much easier to interpret the figures, and we can see straight away that most of the
+mean spectra of the clusters are quite distinct from one another:
+
 ```{code-cell} python
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
@@ -815,6 +827,8 @@ ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
 plt.show()
 ```
 
+
+
 ```{code-cell} python
 marker_cycler = cycler(marker=["x", "d", "+", "p", ".", "2", "*", "H", "X", "v"])
 default_color_cycler = plt.rcParams["axes.prop_cycle"]

From db55893790178a3abde30296f625eba7121acd65 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 15:16:48 -0400
Subject: [PATCH 07/17] Put the last initial piece of commentary on the ML
 section of the RXTE notebook. For issue #111

---
 .../rxte/analyze-rxte-spectra.md                       | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index d3fc1d5a..8c85e6f2 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -827,7 +827,17 @@ ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
 plt.show()
 ```
 
+Having done all of this, we can return to the original goal of this section, and look to see whether the significant
+time-dependent behaviors of our fitted model parameters are also identified through this model-independent
+approach. Additionally, if they are identified this way, are the spectra before and after the most significant changes
+different, or did the emission return to 'normal' after the disruption?
 
+We plot the same figure of power-law photon index versus time that we did earlier, but now we can color the data
+points by which point cluster their spectrum is associated with.
+
+From this plot it seems like the significant fluctuations of photon index with time **are not an emergent property
+of our model choice**, and instead represent real differences in the RXTE-PCA spectra of Eta Car at different
+observation times!
 
 ```{code-cell} python
 marker_cycler = cycler(marker=["x", "d", "+", "p", ".", "2", "*", "H", "X", "v"])

From a3a5d49331aadd5f731f9901bc5e61a13cb3d5ff Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 15:48:32 -0400
Subject: [PATCH 08/17] May have finished the addition of ML sections to the
 RXTE notebook. For issue #111

---
 .../rxte/analyze-rxte-spectra.md              | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 8c85e6f2..5a964974 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -653,8 +653,19 @@ We're going to try three common dimensionality reduction techniques:
 - T-distributed Stochastic Neighbor Embedding (t-SNE)
 - Uniform Manifold Approximation and Projection (UMAP)
 
+[This article](https://medium.com/@aastha.code/dimensionality-reduction-pca-t-sne-and-umap-41d499da2df2) provides
+simple summaries of these three techniques and when they can be used. We encourage you to identify resources that
+explain whatever dimensionality reduction technique you may end up using, as it is all too easy to treat these
+techniques as black boxes.
+
 #### Principal Component Analysis (PCA)
 
+PCA is arguably the simplest of the dimensionality reduction techniques that we're trying out in this
+demonstration. The technique works by projecting the high-dimensionality data into a lower-dimension parameter
+space (two dimensions, in this case) whilst maximizing the variance of the projected data.
+
+PCA is best suited to linearly separable data, but isn't particularly suitable for non-linear relationships.
+
 ```{code-cell} python
 pca = PCA(n_components=2)
 
@@ -663,6 +674,14 @@ scaled_specs_pca = pca.fit_transform(scaled_interp_spec_vals)
 
 #### T-distributed Stochastic Neighbor Embedding (t-SNE)
 
+Unlike PCA, t-SNE **is** suited to non-linearly separable data, though it is also much more computationally
+expensive. This technique works by comparing two distributions:
+- Pairwise similarities (defined by some distance metric) of the data points in the original high-dimensional data.
+- The equivalent similarities but in the projected two-dimensional space.
+
+The goal being to minimize the divergence between the two distributions and effectively try to represent the
+same spacings between data points in N-dimensions in a two-dimensional space.
+
 ```{code-cell} python
 tsne = TSNE(n_components=2)
 scaled_specs_tsne = tsne.fit_transform(scaled_interp_spec_vals)
@@ -670,6 +689,13 @@ scaled_specs_tsne = tsne.fit_transform(scaled_interp_spec_vals)
 
 #### Uniform Manifold Approximation and Projection (UMAP)
 
+Finally, the UMAP technique is also well suited to non-linearly separable data. It also does well at preserving
+and representing local and global structures from the original N-dimensional data in the output
+two-dimensional (in our case) space.
+
+UMAP in particular is mathematically complex (compared to the other two techniques we're using) - [a full explanation
+by its creators can be found here.](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html)
+
 ```{code-cell} python
 um = UMAP(random_state=1, n_jobs=1)
 scaled_specs_umap = um.fit_transform(scaled_interp_spec_vals)

From 4f471fa5ace592e34c553962e4b6632fa6e350bd Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 15:59:03 -0400
Subject: [PATCH 09/17] Updated the run time of the RXTE notebook - gotten much
 faster! Backend changes to Xamin must have kicked in I suppose! Puts issue
 #111 into PR

---
 .../mission_specific_analyses/rxte/analyze-rxte-spectra.md   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 5a964974..4c7e8c43 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -33,6 +33,7 @@ By the end of this tutorial, you will:
 - Understand how to retrieve the information necessary to access RXTE spectra stored in the HEASARC S3 bucket.
 - Be capable of downloading and visualizing retrieved spectra.
 - Perform basic spectral fits and explore how spectral properties change with time.
+- Use simple machine learning techniques to perform a model-independent analysis of the spectral data.
 
 
 ## Introduction
@@ -57,7 +58,7 @@ We find all the standard spectra and then load, visualize, and fit them with pyX
 
 ### Runtime
 
-As of 9th October 2025, this notebook takes ~10m to run to completion on Fornax, using the 'small' server with 8GB RAM/ 2 cores.
+As of 22nd October 2025, this notebook takes ~2m to run to completion on Fornax, using the 'small' server with 8GB RAM/ 2 cores.
 
 ## Imports & Environments
 We need the following Python modules:
@@ -124,7 +125,7 @@ else:
 
 ## 1. Finding the data
 
-To identify the relevant RXTE data, we can use [Xamin](https://heasarc.gsfc.nasa.gov/xamin/), the HEASARC web portal, the Virtual Observatory (VO) python client `pyvo`, or **the AstroQuery module** (our choice for this demonstration).
+To identify the relevant RXTE data, we could use [Xamin](https://heasarc.gsfc.nasa.gov/xamin/), the HEASARC web portal, the Virtual Observatory (VO) python client `pyvo`, or **the AstroQuery module** (our choice for this demonstration).
 
 ### Using AstroQuery to find the HEASARC table that lists all of RXTE's observations
 

From d8d91c4c28559d27211a64893d448fa68ca6998d Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 16:12:01 -0400
Subject: [PATCH 10/17] Manually added deps to be installed to the heasoft
 environment on CircleCI (NOT A PERMANENT SOLUTION SEE ISSUE #90)

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index ca08ff75..5c3d56f6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -131,7 +131,7 @@ jobs:
           name: Installing extra dependencies
           # TODO THIS METHOD OF DEFINING DEPS IS NOT GOOD ENOUGH, EVEN FOR A TEMPORARY SOLUTION
           command: |
-            micromamba install -y -c conda-forge -n heasoft astroquery pyvo tqdm aplpy s3fs boto3
+            micromamba install -y -c conda-forge -n heasoft astroquery pyvo tqdm aplpy s3fs boto3 scikit-learn umap-learn
             micromamba install -y -c conda-forge -n sas astroquery pyvo tqdm aplpy s3fs boto3
 
       - run:

From 0fa81a60c78b13a3baf3bf0ab3b2261a58b586b1 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Wed, 22 Oct 2025 16:52:58 -0400
Subject: [PATCH 11/17] Hopefully made sure that the
 heasoftpy-getting-started.md notebook will fetch its own geomagnetic data.

---
 .../heasoftpy/heasoftpy-getting-started.md    | 45 ++++++++++++-------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/tutorials/useful_high_energy_tools/heasoftpy/heasoftpy-getting-started.md b/tutorials/useful_high_energy_tools/heasoftpy/heasoftpy-getting-started.md
index b74b7fe2..e569ebfc 100644
--- a/tutorials/useful_high_energy_tools/heasoftpy/heasoftpy-getting-started.md
+++ b/tutorials/useful_high_energy_tools/heasoftpy/heasoftpy-getting-started.md
@@ -96,7 +96,12 @@ def worker(in_dir):
     with hsp.utils.local_pfiles_context():
 
         # Call the tasks of interest
-        out = hsp.nicerl2(indir=in_dir, noprompt=True, clobber=True)
+        out = hsp.nicerl2(
+            indir=in_dir,
+            noprompt=True,
+            clobber=True,
+            geomag_path=GEOMAG_PATH,
+        )
 
         # Run any other tasks...
 
@@ -126,6 +131,7 @@ Here we include code that downloads the data for our examples - we don't include
 notebooks as we do not wish it to be the main focus.
 
 (configuration)=
+
 ```{code-cell} python
 :tags: [hide-input]
 
@@ -134,9 +140,11 @@ mp.set_start_method("fork", force=True)
 
 # Here we make sure we have all the data this notebook requires
 if os.path.exists("../../../_data"):
-    nu_data_dir = "../../../_data/NuSTAR/"
-    ni_data_dir = "../../../_data/NICER/"
+    ROOT_DATA_DIR = "../../../_data"
+    nu_data_dir = os.path.join(ROOT_DATA_DIR, "NuSTAR", "")
+    ni_data_dir = os.path.join(ROOT_DATA_DIR, "NICER", "")
 else:
+    ROOT_DATA_DIR = os.getcwd()
     nu_data_dir = "NuSTAR/"
     ni_data_dir = "NICER/"
 
@@ -161,6 +169,13 @@ if any([not os.path.exists(os.path.join(ni_data_dir, oi)) for oi in NI_OBS_IDS])
     # Heasarc.download_data(ni_data_links, location=ni_data_dir)
     Heasarc.download_data(ni_data_links, host="aws", location=ni_data_dir)
     # Heasarc.download_data(ni_data_links, host='sciserver', location=ni_data_dir)
+
+# -------- Get geomagnetic data ---------
+# This ensures that geomagnetic data required for NICER analyses are downloaded
+GEOMAG_PATH = os.path.join(ROOT_DATA_DIR, "geomag")
+os.makedirs(GEOMAG_PATH, exist_ok=True)
+out = hsp.nigeodown(outdir=GEOMAG_PATH, allow_failure=False)
+# ---------------------------------------
 ```
 
 ***
@@ -343,17 +358,10 @@ We do this by creating a helper function `worker` that wraps the task call and a
 
 ```{warning}
 Running the `nicerl2` tool requires that the `GEOMAG_PATH` environment variable be set to a [path that contains
-up-to-date geomagnetic data](https://heasarc.gsfc.nasa.gov/docs/nicer/analysis_threads/geomag/). You can use the
-HEASoft [nigeodown](https://heasarc.gsfc.nasa.gov/docs/software/lheasoft/help/nigeodown.html) tool to download
-that data.
-```
-
-```{code-cell} python
-if "GEOMAG_PATH" not in os.environ:
-    raise FileNotFoundError(
-        "The 'GEOMAG_PATH' environment variable "
-        "must be set to point to geomagnetic data."
-    )
+up-to-date geomagnetic data](https://heasarc.gsfc.nasa.gov/docs/nicer/analysis_threads/geomag/), or that a path is
+passed to the `nicerl2` time. You can use the HEASoft
+[nigeodown](https://heasarc.gsfc.nasa.gov/docs/software/lheasoft/help/nigeodown.html) tool to download
+that data, as we did in the "Global setup: Configuration" section near the beginning of the notebook.
 ```
 
 ```{code-cell} python
@@ -365,12 +373,15 @@ with mp.Pool(nproc) as p:
     result = p.map(worker, obsids)
 
 result
-```
+````
 
 ## About this Notebook
 
-**Author:** Abdu Zoghbi, Staff Scientist.\
-**Updated On:** 2025-09-03
+Author: Abdu Zoghbi, HEASARC Staff Scientist
+
+Author: David Turner, HEASARC Staff Scientist
+
+Updated On: 2025-10-22
 
 +++
 

From 9610ade18cfea193056c24f306a562d902ace11a Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Thu, 23 Oct 2025 09:21:36 -0400
Subject: [PATCH 12/17] Shrunk the DBSCAN-coloured UMAP space figure to 6x6,
 and removed tick labels. In the RXTE spectral exploration notebook. For PR
 #117

---
 .../mission_specific_analyses/rxte/analyze-rxte-spectra.md  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 4c7e8c43..7b11ba9e 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -792,7 +792,7 @@ We will once again visualize the UMAP-reduced spectral dataset, but this time we
 cluster that DBSCAN says it belongs to. That will give us a good idea of how well the algorithm has performed:
 
 ```{code-cell} python
-plt.figure(figsize=(8, 8))
+plt.figure(figsize=(6, 6))
 
 plt.minorticks_on()
 plt.tick_params(which="both", direction="in", top=True, right=True)
@@ -803,6 +803,10 @@ for clust_id in clust_labels:
         scaled_specs_umap[clusters.labels_ == clust_id, 1],
         label=f"Cluster {clust_id}",
     )
+
+plt.gca().set_yticklabels([])
+plt.gca().set_xticklabels([])
+
 plt.title("DBSCAN clustered UMAP-reduced spectra", fontsize=16)
 plt.legend(fontsize=14)
 

From a06bfcfef36371aaa974472a30e75b7fa60191a4 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Thu, 6 Nov 2025 12:37:17 -0500
Subject: [PATCH 13/17] Added tags to large matplotlib-code-cells, the user
 doesn't want to see them open from the get go. For PR #117

---
 .../rxte/analyze-rxte-spectra.md                 | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 7b11ba9e..14448697 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -8,7 +8,7 @@ authors:
   affiliations: ['University of Maryland, Baltimore County', 'HEASARC, NASA Goddard']
   orcid: 0000-0001-9658-1396
   website: https://davidt3.github.io/
-date: '2025-10-22'
+date: '2025-11-06'
 jupytext:
   text_representation:
     extension: .md
@@ -19,7 +19,7 @@ kernelspec:
   display_name: heasoft
   language: python
   name: heasoft
-title: RXTE Spectral Analysis Example
+title: RXTE spectral analysis example
 ---
 
 # Exploring RXTE spectral observations of Eta Car
@@ -484,6 +484,8 @@ Now we actually plot the Photon Index and Normalization values against the start
 strong indication of time varying X-ray emission from Eta Car:
 
 ```{code-cell} python
+:tags: [hide-input]
+
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(13, 8))
 fig.subplots_adjust(hspace=0.0)
 
@@ -609,6 +611,8 @@ To demonstrate the changes we've made to our dataset, we'll visualize both the i
 interpolated spectra:
 
 ```{code-cell} python
+:tags: [hide-input]
+
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
 
@@ -713,6 +717,8 @@ of spectra. **That doesn't necessarily mean that those spectra are somehow physi
 it will be the best dataset to run our clustering algorithm on.
 
 ```{code-cell} python
+:tags: [hide-input]
+
 fig, ax_arr = plt.subplots(2, 2, figsize=(8, 8))
 fig.subplots_adjust(hspace=0.0, wspace=0.0)
 
@@ -827,6 +833,8 @@ This approach makes it much easier to interpret the figures, and we can see stra
 mean spectra of the clusters are quite distinct from one another:
 
 ```{code-cell} python
+:tags: [hide-input]
+
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
 
@@ -871,6 +879,8 @@ of our model choice**, and instead represent real differences in the RXTE-PCA sp
 observation times!
 
 ```{code-cell} python
+:tags: [hide-input]
+
 marker_cycler = cycler(marker=["x", "d", "+", "p", ".", "2", "*", "H", "X", "v"])
 default_color_cycler = plt.rcParams["axes.prop_cycle"]
 new_cycler = marker_cycler + default_color_cycler
@@ -919,7 +929,7 @@ Author: Tess Jaffe, HEASARC Chief Archive Scientist.
 
 Author: David J Turner, HEASARC Staff Scientist.
 
-Updated On: 2025-10-22
+Updated On: 2025-11-06
 
 +++
 

From 88f8a624bbbaa4181ed1cea5717b3982ff9921e3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 Jan 2026 22:15:10 +0000
Subject: [PATCH 14/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/workflows/populate-production-notebooks.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/populate-production-notebooks.yml b/.github/workflows/populate-production-notebooks.yml
index c28e1b7f..9b3be836 100644
--- a/.github/workflows/populate-production-notebooks.yml
+++ b/.github/workflows/populate-production-notebooks.yml
@@ -106,16 +106,16 @@ jobs:
           find downloaded_artifacts -name "*.ipynb" -type f -print0 | while IFS= read -r -d '' file; do
             # 1. Strip the leading directory to get the relative path
             rel_path="${file#downloaded_artifacts/}"
-            
+
             # 2. Determine the destination directory
             dest_dir="production-notebooks/$(dirname "$rel_path")"
-            
+
             # 3. Create the directory and move the file
             mkdir -p "$dest_dir"
             mv "$file" "production-notebooks/$rel_path"
           done
 
-          
+
           # Copy changed markdown files from main branch to production
           for nb in $(echo "${{ steps.nb_artifacts_to_fetch.outputs.notebooks_to_update }}" | tr ',' '\n'); do
 

From 5f73d48488ba78e653835e8e4e93862e91bd602d Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Fri, 16 Jan 2026 17:24:02 -0500
Subject: [PATCH 15/17] Added the updated cell metadata to collapse plotting
 and configuration cells (makes sure that the cells are collapsed in the
 markdown/ipynb notebooks as well hopefully). For PR #117

---
 .../rxte/analyze-rxte-spectra.md              | 78 +++++++++++++++----
 1 file changed, 61 insertions(+), 17 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 8fb45a31..77237251 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -8,7 +8,7 @@ authors:
   affiliations: ['University of Maryland, Baltimore County', 'HEASARC, NASA Goddard']
   orcid: 0000-0001-9658-1396
   website: https://davidt3.github.io/
-date: '2025-11-06'
+date: '2026-01-16'
 jupytext:
   text_representation:
     extension: .md
@@ -91,18 +91,14 @@ from umap import UMAP
 
 ### Functions
 
-```{code-cell} python
-:tags: [hide-input]
-
-# This cell will be automatically collapsed when the notebook is rendered, which helps
-#  to hide large and distracting functions while keeping the notebook self-contained
-#  and leaving them easily accessible to the user
-```
-
 ### Constants
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 SRC_NAME = "Eta Car"
 ```
@@ -112,7 +108,11 @@ SRC_NAME = "Eta Car"
 The only configuration we do is to set up the root directory where we will store downloaded data.
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 if os.path.exists("../../../_data"):
     ROOT_DATA_DIR = "../../../_data/RXTE/"
@@ -371,6 +371,12 @@ norms = np.array(norms)
 Using the data extracted in the last step, we can plot the spectra and fitted models using matplotlib.
 
 ```{code-cell} python
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
+
 # Now we plot the spectra
 fig = plt.figure(figsize=(8, 6))
 
@@ -396,6 +402,12 @@ plt.show()
 ### Visualizing the fitted models
 
 ```{code-cell} python
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
+
 fig = plt.figure(figsize=(8, 6))
 
 plt.minorticks_on()
@@ -431,6 +443,12 @@ This shows us what the distributions of the Photon Index (related to the power l
 model normalization look like. We can see that the distributions are not particularly symmetric and Gaussian-looking.
 
 ```{code-cell} python
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
+
 fig, ax_arr = plt.subplots(1, 2, sharey="row", figsize=(13, 6))
 fig.subplots_adjust(wspace=0.0)
 
@@ -483,7 +501,11 @@ Now we actually plot the Photon Index and Normalization values against the start
 strong indication of time varying X-ray emission from Eta Car:
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(13, 8))
 fig.subplots_adjust(hspace=0.0)
@@ -610,7 +632,11 @@ To demonstrate the changes we've made to our dataset, we'll visualize both the i
 interpolated spectra:
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
@@ -716,7 +742,11 @@ of spectra. **That doesn't necessarily mean that those spectra are somehow physi
 it will be the best dataset to run our clustering algorithm on.
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 fig, ax_arr = plt.subplots(2, 2, figsize=(8, 8))
 fig.subplots_adjust(hspace=0.0, wspace=0.0)
@@ -797,6 +827,12 @@ We will once again visualize the UMAP-reduced spectral dataset, but this time we
 cluster that DBSCAN says it belongs to. That will give us a good idea of how well the algorithm has performed:
 
 ```{code-cell} python
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
+
 plt.figure(figsize=(6, 6))
 
 plt.minorticks_on()
@@ -832,7 +868,11 @@ This approach makes it much easier to interpret the figures, and we can see stra
 mean spectra of the clusters are quite distinct from one another:
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 fig, ax_arr = plt.subplots(2, 1, sharex="col", figsize=(16, 12))
 fig.subplots_adjust(hspace=0.0)
@@ -878,7 +918,11 @@ of our model choice**, and instead represent real differences in the RXTE-PCA sp
 observation times!
 
 ```{code-cell} python
-:tags: [hide-input]
+---
+tags: [hide-input]
+jupyter:
+  source_hidden: true
+---
 
 marker_cycler = cycler(marker=["x", "d", "+", "p", ".", "2", "*", "H", "X", "v"])
 default_color_cycler = plt.rcParams["axes.prop_cycle"]
@@ -928,7 +972,7 @@ Author: Tess Jaffe, HEASARC Chief Archive Scientist.
 
 Author: David J Turner, HEASARC Staff Scientist.
 
-Updated On: 2025-11-06
+Updated On: 2026-01-16
 
 +++
 

From 1fe9d217397099f30702bf3b56df92b188716edf Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Fri, 16 Jan 2026 17:36:07 -0500
Subject: [PATCH 16/17] Lots of little grammar fixes to analyze-rxte-spectra.md

---
 .../rxte/analyze-rxte-spectra.md              | 52 +++++++++----------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index 77237251..a0f0ad66 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -57,7 +57,7 @@ We find all the standard spectra and then load, visualize, and fit them with pyX
 
 ### Runtime
 
-As of 22nd October 2025, this notebook takes ~2m to run to completion on Fornax, using the 'small' server with 8GB RAM/ 2 cores.
+As of 16th January 2026, this notebook takes ~3 minutes to run to completion on Fornax, using the 'small' server with 8GB RAM/ 2 cores.
 
 ## Imports & Environments
 We need the following Python modules:
@@ -139,7 +139,7 @@ table_name
 
 ### Identifying RXTE observations of Eta Car
 
-Now that we have identified the HEASARC table that contains information on RXTE pointings, we're going to search
+Now that we have identified the HEASARC table that contains summary information about all of RXTE's observations, we're going to search
 it for observations of **Eta Car**.
 
 For convenience, we pull the coordinate of Eta Car from the CDS name resolver functionality built into AstroPy's
@@ -207,7 +207,7 @@ data_links
 ```
 
 ## 2. Acquiring the data
-We now know where the relevant RXTE-PCA spectra are stored in the HEASARC S3 bucket, and will proceed to download
+We now know where the relevant RXTE-PCA spectra are stored in the HEASARC S3 bucket and will proceed to download
 them for local use.
 
 ```{caution}
@@ -234,7 +234,7 @@ Heasarc.download_data(data_links[:3], host="aws", location=ROOT_DATA_DIR)
 ### Downloading only RXTE-PCA spectra
 
 Rather than downloading all files for all our observations, we will now _only_ fetch those that are directly
-relevant to what we want to do in this notebook - this method is a little more involved than using AstroQuery, but
+relevant to what we want to do in this notebook. This method is a little more involved than using AstroQuery, but
 it is more efficient and flexible.
 
 We make use of a Python module called `s3fs`, which allows us to interact with files stored on Amazon's S3
@@ -260,8 +260,8 @@ shows us that PCA spectra and supporting files are named as:
 - **xp{ObsID}_b2.pha** - the background spectrum companion to the source spectrum.
 - **xp{ObsID}.rsp** - the supporting file that defines the response curve (sensitivity over energy range) and redistribution matrix (a mapping of channel to energy) for the RXTE-PCA instrument during the observation.
 
-We set up a file patterns for these three files for each datalink entry, and then use the `expand_path` method of
-our previously-set-up S3 filesystem object to find all the files that match the pattern. This is useful because the
+We set up patterns for these three files for each datalink entry, and then use the `expand_path` method of
+our previously set-up S3 filesystem object to find all the files that match the pattern. This is useful because the
 RXTE datalinks we found might include sections of a particular observation that do not have standard products
 generated, for instance, the slewing periods before/after the telescope was aligned on target.
 
@@ -287,7 +287,7 @@ ret = s3.get(val_file_uris, spec_file_path)
 
 We have acquired the spectra and their supporting files and will perform very basic visualizations and model fitting
 using the Python wrapper to the ubiquitous X-ray spectral fitting code, XSPEC. To learn more advanced uses of
-pyXspec please refer to the [documentation](https://heasarc.gsfc.nasa.gov/docs/software/xspec/python/html/index.html),
+pyXspec, please refer to the [documentation](https://heasarc.gsfc.nasa.gov/docs/software/xspec/python/html/index.html)
 or examine other tutorials in this repository.
 
 We set the ```chatter``` parameter to 0 to reduce the printed text given the large number of files we are reading.
@@ -317,7 +317,7 @@ spectrum data points, fitted model data points for plotting, and the fitted mode
 
 Note that we move into the directory where the spectra are stored. This is because the main source spectra files
 have relative paths to the background and response files in their headers, and if we didn't move into the
-directory XSPEC would not be able to find them.
+working directory, then pyXspec would not be able to find them.
 
 ```{code-cell} python
 # We move into the directory where the spectra are stored
@@ -431,7 +431,7 @@ plt.show()
 
 ## 4. Exploring model fit results
 
-As we have fit models to all these spectra, and retrieved their parameter's values, we should take a look at them!
+As we have fit models to all these spectra and retrieved the parameter's values, we should take a look at them!
 
 Exactly what you do at this point will depend entirely upon your science case and the type of object you've been
 analyzing. However, any analysis will benefit from an initial examination of the fitted parameter values (particularly if
@@ -471,13 +471,13 @@ plt.show()
 ### Do model parameters vary with time?
 
 That might then make us wonder if the reason we're seeing these non-Gaussian distributions is due to Eta Car's
-X-ray emission varying with time over the course of RXTE's campaign? Some kinds of X-ray source are extremely
+X-ray emission varying with time over the course of RXTE's campaign? Some kinds of X-ray sources are extremely
 variable, and we know that Eta Car's X-ray emission is variable in other wavelengths.
 
-As a quick check, we can retrieve the start time of each RXTE observation from the source spectra, and then plot
+As a quick check, we can retrieve the start time of each RXTE observation from the source spectra and then plot
 the model parameter values against the time of their observation. In this case, we extract the modified Julian
-date (MJD) reference time, the time system, and the start time (which is currently relative to the reference time) -
-combining this information lets us convert the start time into a datetime object.
+date (MJD) reference time, the time system, and the start time (which is currently relative to the reference time).
+Combining this information lets us convert the start time into a datetime object.
 
 ```{code-cell} python
 obs_start = []
@@ -555,7 +555,7 @@ plt.show()
 
 From our previous analysis, fitting a simple power-law model to the spectra and plotting the parameters against
 the time of their observation, we can see that some quite interesting spectral changes occur over the course of
-RXTE's survey of this object.
+RXTE's survey visits to Eta Car.
 
 We might now want to know whether we can identify those same behaviors in a model independent way, to ensure that
 it isn't just a strange emergent property of spectral model choice.
@@ -575,7 +575,7 @@ Simply shoving the RXTE spectra that we already loaded in through some machine l
 produce useful results.
 
 Machine learning techniques that reduce dataset dimensionality often benefit from re-scaling the datasets so that all
-each feature (the spectral value for a particular energy bin, in this case) exist within the same general
+features (the spectral value for a particular energy bin, in this case) exist within the same general
 range (-1 to 1, for example). This is because the distance between points is often used as some form of metric in
 these techniques, and we wish to give every feature the same weight in those calculations.
 
@@ -594,11 +594,11 @@ print(spec_plot_data[0][0][:5])
 print(spec_plot_data[40][0][:5])
 ```
 
-The quickest and easiest way to deal with this is to define a common energy grid, and then interpolate all of our
+The quickest and easiest way to deal with this is to define a common energy grid and then interpolate all of our
 spectra onto it.
 
 We choose to begin the grid at 2 keV to avoid low-resolution noise at lower energies, a limitation of RXTE-PCA data, and
-to stop it at 12 keV due to an evident lack of emission from Eta Car above that energy. The grid will have a
+to stop it at 12 keV due to a noticeable lack of emission from Eta Car above that energy. The grid will have a
 resolution of 0.1 keV.
 
 ```{code-cell} python
@@ -628,8 +628,7 @@ scaled_interp_spec_vals = StandardScaler().fit_transform(interp_spec_vals)
 
 #### Examining the scaled and normalized spectra
 
-To demonstrate the changes we've made to our dataset, we'll visualize both the interpolated, and the scaled
-interpolated spectra:
+To demonstrate the changes we've made to our dataset, we'll visualize both the interpolated and the scaled-interpolated spectra:
 
 ```{code-cell} python
 ---
@@ -670,13 +669,12 @@ dataset we just created. However, it has been well demonstrated that finding sim
 together, in other words) is very difficult in high-dimensional data.
 
 This is a result of something called "the curse of dimensionality"
-([see this article for a full explanation](https://towardsdatascience.com/curse-of-dimensionality-a-curse-to-machine-learning-c122ee33bfeb/))
-and it is a common problem in machine learning and data science.
+([see this article for a full explanation](https://towardsdatascience.com/curse-of-dimensionality-a-curse-to-machine-learning-c122ee33bfeb/)), and it is a common problem in machine learning and data science.
 
 One of the ways to combat this issue is to try and reduce the dimensionality of the dataset. The hope is that the
 data point that represents a spectrum (in N dimensions, where N is the number of energy bins in our interpolated
 spectrum) can be projected/reduced to a much smaller number of dimensions without losing the information that will
-help us separate our group the different spectra.
+help us group the different spectra.
 
 We're going to try three common dimensionality reduction techniques:
 - Principal Component Analysis (PCA)
@@ -709,7 +707,7 @@ expensive. This technique works by comparing two distributions:
 - Pairwise similarities (defined by some distance metric) of the data points in the original high-dimensional data.
 - The equivalent similarities but in the projected two-dimensional space.
 
-The goal being to minimize the divergence between the two distributions and effectively try to represent the
+The goal is to minimize the divergence between the two distributions and effectively try to represent the
 same spacings between data points in N-dimensions in a two-dimensional space.
 
 ```{code-cell} python
@@ -811,8 +809,8 @@ with each approach.
 
 Some algorithms require that you specify the number of clusters you want to find, which is not particularly easy to do
 while doing this sort of exploratory data analysis. As such, we're going to use 'DBSCAN', which identifies dense
-cores of data points and expands clusters from them. You should read about a variety of clustering techniques, and
-how they work, before deciding on one to use for your own scientific work.
+cores of data points and expands clusters from them. You should read about a variety of clustering techniques and
+how they work before deciding on one to use for your own scientific work.
 
 ```{code-cell} python
 dbs = DBSCAN(eps=0.6, min_samples=2)
@@ -823,7 +821,7 @@ clust_labels = np.unique(clusters.labels_)
 clust_labels
 ```
 
-We will once again visualize the UMAP-reduced spectral dataset, but this time we'll colour each data point by the
+We will once again visualize the UMAP-reduced spectral dataset, but this time we'll color each data point by the
 cluster that DBSCAN says it belongs to. That will give us a good idea of how well the algorithm has performed:
 
 ```{code-cell} python
@@ -905,7 +903,7 @@ ax_arr[1].set_xlabel("Energy [keV]", fontsize=15)
 plt.show()
 ```
 
-Having done all of this, we can return to the original goal of this section, and look to see whether the significant
+Having done all of this, we can return to the original goal of this section and look to see whether the significant
 time-dependent behaviors of our fitted model parameters are also identified through this model-independent
 approach. Additionally, if they are identified this way, are the spectra before and after the most significant changes
 different, or did the emission return to 'normal' after the disruption?

From 4e709403d0fd53e31023e96b9e69b6d4db198227 Mon Sep 17 00:00:00 2001
From: David Turner <turne540@msu.edu>
Date: Fri, 16 Jan 2026 17:42:23 -0500
Subject: [PATCH 17/17] Replaced import xspec with import xspec as xs

---
 .../rxte/analyze-rxte-spectra.md              | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
index a0f0ad66..53e7c8ff 100644
--- a/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
+++ b/tutorials/mission_specific_analyses/rxte/analyze-rxte-spectra.md
@@ -69,7 +69,7 @@ import astropy.io.fits as fits
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import numpy as np
-import xspec
+import xspec as xs
 from astropy.coordinates import SkyCoord
 from astropy.time import Time, TimeDelta
 from astropy.units import Quantity
@@ -295,15 +295,15 @@ We set the ```chatter``` parameter to 0 to reduce the printed text given the lar
 ### Configuring PyXspec
 
 ```{code-cell} python
-xspec.Xset.chatter = 0
+xs.Xset.chatter = 0
 
 # Other xspec settings
-xspec.Plot.area = True
-xspec.Plot.xAxis = "keV"
-xspec.Plot.background = True
-xspec.Fit.statMethod = "cstat"
-xspec.Fit.query = "no"
-xspec.Fit.nIterations = 500
+xs.Plot.area = True
+xs.Plot.xAxis = "keV"
+xs.Plot.background = True
+xs.Fit.statMethod = "cstat"
+xs.Fit.query = "no"
+xs.Fit.nIterations = 500
 
 # Store the current working directory
 cwd = os.getcwd()
@@ -336,15 +336,15 @@ src_sp_files = [rel_uri.split("/")[-1] for rel_uri in val_file_uris if "_s2" in
 with tqdm(desc="Loading/fitting RXTE spectra", total=len(src_sp_files)) as onwards:
     for sp_name in src_sp_files:
         # Clear out the previously loaded dataset and model
-        xspec.AllData.clear()
-        xspec.AllModels.clear()
+        xs.AllData.clear()
+        xs.AllModels.clear()
 
         # Loading in the spectrum
-        spec = xspec.Spectrum(sp_name)
+        spec = xs.Spectrum(sp_name)
 
         # Set up a powerlaw and then fit to the current spectrum
-        model = xspec.Model("powerlaw")
-        xspec.Fit.perform()
+        model = xs.Model("powerlaw")
+        xs.Fit.perform()
 
         # Extract the parameter values
         pho_inds.append(model.powerlaw.PhoIndex.values[:2])
@@ -352,11 +352,11 @@ with tqdm(desc="Loading/fitting RXTE spectra", total=len(src_sp_files)) as onwar
 
         # Create an XSPEC plot (not visualizaed here) and then extract the information
         #  required to let us plot it using matplotlib
-        xspec.Plot("data")
+        xs.Plot("data")
         spec_plot_data.append(
-            [xspec.Plot.x(), xspec.Plot.xErr(), xspec.Plot.y(), xspec.Plot.yErr()]
+            [xs.Plot.x(), xs.Plot.xErr(), xs.Plot.y(), xs.Plot.yErr()]
         )
-        fit_plot_data.append(xspec.Plot.model())
+        fit_plot_data.append(xs.Plot.model())
 
         onwards.update(1)