FIx two docs issues (one with surviving mass calculation, one with noise modelling example). Very minor change to noise modelling plotting to make the labelling more appropriate.

tHarvey303 · tHarvey303 · commit 57c846b31474 · 2025-11-12T19:13:09.000Z
diff --git a/docs/source/library_gen/complex_library_generation.ipynb b/docs/source/library_gen/complex_library_generation.ipynb
@@ -60,7 +60,6 @@
     "    calculate_mass_weighted_age,\n",
     "    calculate_muv,\n",
     "    calculate_sfh_quantile,\n",
-    "    calculate_surviving_mass,\n",
     "    draw_from_hypercube,\n",
     "    generate_random_DB_sfh,\n",
     ")"
@@ -479,7 +478,6 @@
     "    \"sfh_quant_75\": (calculate_sfh_quantile, 0.75, True),  # Calculate SFH quantile at 75%\n",
     "    \"UV\": (calculate_colour, \"U\", \"V\", emission_key, True),  # Calculate UV colour (rest-frame)\n",
     "    \"VJ\": (calculate_colour, \"V\", \"J\", emission_key, True),  # Calculate VJ colour (rest-frame)\n",
-    "    \"log_surviving_mass\": (calculate_surviving_mass, grid),  # Calculate surviving mass\n",
     "    \"d4000\": (calculate_d4000, emission_key),  # Calculate D4000 using the emission model\n",
     "    \"beta\": (calculate_beta, emission_key),\n",
     "    \"balmer_decrement\": (calculate_balmer_decrement, emission_key),\n",
diff --git a/docs/source/noise_modelling/noise_models.ipynb b/docs/source/noise_modelling/noise_models.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "from unyt import Jy, nJy\n",
+    "from unyt import nJy\n",
     "\n",
     "from synference import DepthUncertaintyModel\n",
     "\n",
@@ -158,7 +158,9 @@
    "source": [
     "This noise model is implemented in the ```AsinhEmpiricalUncertaintyModel``` class. It works similarly to the ```GeneralEmpiricalUncertaintyModel```, but applies the asinh transformation to the fluxes before fitting the noise model. It only excepts input fluxes in Jy units, and whilst the ```apply_noise``` method will accept the ```out_flux_unit``` argument for consistency with other noise models, the output fluxes will always be in asinh magnitudes.\n",
     "\n",
-    "For this model, the asinh softening parameter is not set directly, but in multiples of the median standard deviation of the flux uncertainties in the training data. This is set using the ```f_b_factor``` argument when initializing the model. For example, setting ```f_b_factor=1.0``` will set the softening parameter to the median flux uncertainty, while ```f_b_factor=2.0``` will set it to twice the median flux uncertainty. By default, ```f_b_factor=5.0```, so the softening parameter is set to five times the median flux uncertainty (aka the $5\\sigma$ detection limit)."
+    "For this model, the asinh softening parameter is not set directly, but in multiples of the median standard deviation of the flux uncertainties in the training data. This is set using the ```f_b_factor``` argument when initializing the model. For example, setting ```f_b_factor=1.0``` will set the softening parameter to the median flux uncertainty, while ```f_b_factor=2.0``` will set it to twice the median flux uncertainty. By default, ```f_b_factor=5.0```, so the softening parameter is set to five times the median flux uncertainty (aka the $5\\sigma$ detection limit).\n",
+    "\n",
+    "For this example we setup a more realstic scenario, with a catalogue of 50,000 fake sources with a fixed background level, a fractioanl error and an overall lognormal flux distribution."
    ]
   },
   {
@@ -169,19 +171,86 @@
    "outputs": [],
    "source": [
     "from synference import AsinhEmpiricalUncertaintyModel\n",
+    "from synference.utils import f_jy_to_asinh\n",
+    "\n",
+    "# --- 1. Define Realistic Survey Parameters ---\n",
+    "N_SOURCES: int = 50_000\n",
+    "\n",
+    "# Background noise limit (e.g., 100 uJy).\n",
+    "# This dominates the error for faint sources.\n",
+    "BACKGROUND_NOISE_JY: float = 0.0001\n",
+    "\n",
+    "# Fractional/calibration error (e.g., 2%).\n",
+    "# This dominates the error for bright sources.\n",
+    "FRACTIONAL_ERROR: float = 0.02\n",
+    "\n",
+    "# Log-normal distribution parameters for \"true\" fluxes.\n",
+    "# We set the median flux to be near the noise floor.\n",
+    "FLUX_MEDIAN_JY: float = 0.00015\n",
+    "FLUX_LOG_SIGMA: float = 1.5  # Width of the log-normal distribution\n",
+    "\n",
+    "# --- 2. Generate \"True\" Fluxes ---\n",
+    "# Use a log-normal distribution: many faint sources, few bright ones\n",
+    "true_flux_jy = np.random.lognormal(\n",
+    "    mean=np.log(FLUX_MEDIAN_JY), sigma=FLUX_LOG_SIGMA, size=N_SOURCES\n",
+    ")\n",
+    "\n",
+    "# --- 3. Calculate \"Ideal\" Error for Each True Flux ---\n",
+    "# This is the characteristic error model: sqrt(bg_noise^2 + (frac_err * flux)^2)\n",
+    "ideal_error_jy = np.sqrt(BACKGROUND_NOISE_JY**2 + (FRACTIONAL_ERROR * true_flux_jy) ** 2)\n",
+    "\n",
+    "# --- 4. Simulate \"Observed\" Fluxes by Scattering by the Error ---\n",
+    "# The observed flux is the true flux plus Gaussian noise\n",
+    "observed_flux_jy = true_flux_jy + np.random.normal(loc=0.0, scale=ideal_error_jy, size=N_SOURCES)\n",
     "\n",
-    "# Generate mock data\n",
-    "fluxes = np.random.uniform(0.1, 100, size=10_000) * Jy\n",
-    "errors = np.random.normal(0.0, 5.0, size=10_000) * Jy\n",
+    "# The \"observed error\" is the error the pipeline *reports*.\n",
+    "# We assume the pipeline correctly estimates the ideal error.\n",
+    "observed_error_jy = ideal_error_jy\n",
     "\n",
+    "# Plot a flux histogram\n",
+    "plt.hist(true_flux_jy, label=\"Fluxes\", bins=100)\n",
+    "plt.yscale(\"log\")\n",
+    "plt.xlabel(\"Flux [Jy]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5dc934bb",
+   "metadata": {},
+   "source": [
+    "From this realistic dataset we can generate our `AsinhEmpiricalUncertaintyModel`. Note that some datapoints fall below the softening parameter - these are originally negative fluxes which can be represented by asinh magnitudes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2db14a1e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "noise_model = AsinhEmpiricalUncertaintyModel(\n",
-    "    observed_phot_jy=fluxes,\n",
-    "    observed_phot_errors_jy=errors,\n",
+    "    observed_phot_jy=observed_flux_jy,\n",
+    "    observed_phot_errors_jy=observed_error_jy,\n",
     ")\n",
     "\n",
     "print(noise_model.b)\n",
     "\n",
-    "noise_model.plot()"
+    "# plot noise_model.b\n",
+    "fig, ax = plt.subplots()\n",
+    "noise_model.plot(ax=ax)\n",
+    "\n",
+    "\n",
+    "plt.axvline(f_jy_to_asinh(noise_model.b))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b18c966",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "?truncnorm"
    ]
   },
   {
diff --git a/src/synference/noise_models.py b/src/synference/noise_models.py
@@ -210,6 +210,7 @@ def _from_hdf5_group(cls, hdf5_group: h5py.Group) -> "DepthUncertaintyModel":
 
 class SpectralUncertaintyModel(UncertaintyModel):
     """Applies uncertanties to a spectrum based on a fixed error kernel or a provided table."""
+
     def __init__(self, error_kernel: np.ndarray, **kwargs: Any):
         """Initializes the model with a fixed error kernel.
 
@@ -240,7 +241,7 @@ def apply_noise(
             return noisy_flux, self.error_kernel
 
         return noisy_flux
-    
+
     def serialize_to_hdf5(self, hdf5_group: h5py.Group):
         """Serializes the model to an HDF5 group."""
         attrs = hdf5_group.attrs
@@ -256,7 +257,6 @@ def _from_hdf5_group(cls, hdf5_group: h5py.Group) -> "SpectralUncertaintyModel":
             error_kernel=error_kernel,
             return_noise=hdf5_group.attrs["return_noise"],
         )
-    
 
 
 class EmpiricalUncertaintyModel(UncertaintyModel, ABC):
@@ -321,7 +321,7 @@ def _compute_bins_from_data(
 
     def plot(self, ax: Optional[plt.Axes] = None):
         """Plots the binned median error and standard deviation."""
-        fig, ax = plt.subplots() if ax is None else (None, ax)
+        fig, ax = plt.subplots() if ax is None else (ax.get_figure(), ax)
         if self.bin_centers is None or len(self.bin_centers) < 2:
             raise AttributeError("Binned data not found. Cannot plot.")
 
@@ -335,10 +335,14 @@ def plot(self, ax: Optional[plt.Axes] = None):
             alpha=0.7,
         )
 
-        ax.set_xlabel("Flux")
+        f = "Flux"
+        if isinstance(self, AsinhEmpiricalUncertaintyModel):
+            f = "Mag [asinh]"
+
+        ax.set_xlabel(f)
         ax.set_ylabel("Error")
         ax.legend()
-        plt.show()
+        return fig
 
     def _create_interpolators(self):
         if self.bin_centers is None or len(self.bin_centers) < 2: