Add sigma_b, sigma_m calculation to the integrators (#81)

jbeilstenedmands · web-flow · commit 7c2fce91c690 · 2025-12-12T14:51:35.000Z
diff --git a/baseline/indexer/indexer.cc b/baseline/indexer/indexer.cc
@@ -31,6 +31,7 @@
 #include "flood_fill.cc"
 #include "gemmi/symmetry.hpp"
 #include "peaks_to_rlvs.cc"
+#include "scan_static_predictor.cc"
 #include "score_crystals.cc"
 #include "xyz_to_rlp.cc"
 
@@ -460,6 +461,14 @@ int main(int argc, char **argv) {
             }
         }
 
+        strong_reflections.add_column(std::string("entering"), enterings);
+        // Call the predictor to get xyzcal values in the output.
+        simple_reflection_predictor(expt.beam(),
+                                    expt.goniometer(),
+                                    expt.crystal().get_A_matrix(),
+                                    expt.detector().panels()[0],
+                                    strong_reflections);
+
         // Save the indexed reflection table.
         std::string output_filename = "indexed.refl";
         strong_reflections.write(output_filename);
diff --git a/baseline/integrator/integrator.cc b/baseline/integrator/integrator.cc
@@ -22,6 +22,8 @@
 #include "extent.cc"
 #include "ffs_logger.hpp"
 #include "kabsch.cc"
+#include "math/math_utils.cuh"
+#include "sigma_estimation.cc"
 #include "version.hpp"
 
 using json = nlohmann::json;
@@ -84,6 +86,13 @@ class BaselineIntegratorArgumentParser : public FFSArgumentParser {
           .metavar("σb")
           .scan<'f', float>();
 
+        add_argument("--sigma_estimation.min_bbox_depth", "--min_bbox_depth")
+          .help(
+            "When calculating sigma_m, only use reflections that span at least this "
+            "number of images.")
+          .default_value<int>(6)
+          .scan<'i', int>();
+
         add_argument("output")
           .help("Output file path")
           .metavar("output.h5")
@@ -102,19 +111,9 @@ int main(int argc, char **argv) {
     const auto reflection_file = parser.reflections();
     const auto experiment_file = parser.experiment();
 
-    float sigma_m =
-      parser.get<float>("sigma_m") * (M_PI / 180.0f);  // Convert to radians
-    float sigma_b =
-      parser.get<float>("sigma_b") * (M_PI / 180.0f);  // Convert to radians
     float timeout = parser.get<float>("timeout");
     std::string output_file = parser.get<std::string>("output");
 
-    logger.info("Parameters: sigma_m={:.6f}, sigma_b={:.6f}, timeout={:.1f}, output={}",
-                sigma_m,
-                sigma_b,
-                timeout,
-                output_file);
-
     // Guard against missing files
     if (!std::filesystem::exists(reflection_file)) {
         logger.error("Reflection file not found: {}", reflection_file);
@@ -202,6 +201,53 @@ int main(int argc, char **argv) {
     logger.info("  Oscillation: start={:.3f}°, width={:.3f}°", osc_start, osc_width);
     logger.info("  Image range: {} to {}", image_range_start, image_range_end);
 
+    // If input is a predicted refl, then we require sigma_b, sigma_m as we will not be
+    // able to estimate it from the data
+    // Else the input as an indexed.refl/refined.refl with the sigma variance columns,
+    // then we can calculate sigma_b, sigma_m but will have to also run the predict
+    // code in this program.
+    float sigma_b = 0.0;
+    float sigma_m = 0.0;
+    if (parser.is_used("sigma_m")) {
+        sigma_m = degrees_to_radians(
+          parser.get<float>("sigma_m"));  // Use radians for calculations
+    }
+    if (parser.is_used("sigma_b")) {
+        sigma_b = degrees_to_radians(
+          parser.get<float>("sigma_b"));  // Use radians for calculations
+    }
+
+    // Estimate sigmas
+    auto sigma_b_data = reflections.column<double>("sigma_b_variance");
+    auto sigma_m_data = reflections.column<double>("sigma_m_variance");
+    auto extent_z_data = reflections.column<int>("spot_extent_z");
+    if (sigma_b_data && sigma_m_data && extent_z_data) {
+        int min_bbox_depth = parser.get<int>("sigma_estimation.min_bbox_depth");
+        // Estimate the values from the data, and use if user hasn't specified values.
+        auto [sigma_b_calc, sigma_m_calc, sigma_rmsd_calc] =
+          estimate_sigmas(reflections, expt, min_bbox_depth);
+        // Note we might want to inflate sigma_b_calc to include the rmsd too, but we just report
+        // it for now.
+        if (sigma_m == 0.0) {
+            sigma_m = sigma_m_calc;
+        }
+        if (sigma_b == 0.0) {
+            sigma_b = sigma_b_calc;
+        }
+    }
+    if (sigma_b == 0.0) {
+        throw std::runtime_error(
+          "No value for sigma_b. This must either be provided as input, or an input "
+          "reflection "
+          "table containing sigma_b_variance must be used.");
+    }
+    if (sigma_m == 0.0) {
+        throw std::runtime_error(
+          "No value for sigma_m. This must either be provided as input, or an input "
+          "reflection "
+          "table containing sigma_m_variance and spot_extent_z must be used.");
+    }
+
     // Compute bounding boxes using baseline CPU algorithms
     logger.info("Computing Kabsch bounding boxes using baseline CPU algorithms...");
     auto computed_bounding_boxes = compute_kabsch_bounding_boxes(s0,
diff --git a/baseline/integrator/sigma_estimation.cc b/baseline/integrator/sigma_estimation.cc
@@ -0,0 +1,106 @@
+#include <dx2/beam.hpp>
+#include <dx2/detector.hpp>
+#include <dx2/experiment.hpp>
+#include <dx2/reflection.hpp>
+#include <math/math_utils.cuh>
+#include <tuple>
+
+#include "ffs_logger.hpp"
+
+constexpr size_t indexed_flag = (1 << 2);  // 4
+/*
+Function to calculate the square deviation in kabsch space between
+the predicted and observed positions.
+*/
+double squaredev_in_kabsch_space(const Vector3d &xyzcal,  //mm
+                                 const Vector3d &xyzobs,  //mm
+                                 const Vector3d &s0,
+                                 const Panel &panel) {
+    Vector3d s1cal = panel.get_lab_coord(xyzcal[0], xyzcal[1]);
+    Vector3d s1obs = panel.get_lab_coord(xyzobs[0], xyzobs[1]);
+
+    Vector3d e1 = s1cal.cross(s0);
+    e1.normalize();
+    Vector3d e2 = s1cal.cross(e1);
+    e2.normalize();
+    double mags1 = std::sqrt(s1cal.dot(s1cal));
+    Vector3d delta_s1 = s1obs - s1cal;
+    double eps1 = e1.dot(delta_s1) / mags1;
+    double eps2 = e2.dot(delta_s1) / mags1;
+    double var = (eps1 * eps1) + (eps2 * eps2);
+    return var;
+}
+
+std::tuple<double, double, double> estimate_sigmas(ReflectionTable const &indexed,
+                                                   Experiment<MonochromaticBeam> &expt,
+                                                   int min_bbox_depth = 6) {
+    auto flags = indexed.column<std::size_t>("flags");
+    auto &flags_data = flags.value();
+    std::vector<bool> selection(flags_data.extent(0), false);
+    for (int i = 0; i < flags_data.size(); ++i) {
+        if (flags_data(i, 0) & indexed_flag) {
+            selection[i] = true;
+        }
+    }
+    ReflectionTable filtered = indexed.select(selection);
+    auto filtered_sigma_b = filtered.column<double>("sigma_b_variance");
+    auto &filtered_sigma_b_data = filtered_sigma_b.value();
+    auto filtered_sigma_m = filtered.column<double>("sigma_m_variance");
+    auto &filtered_sigma_m_data = filtered_sigma_m.value();
+    auto extent_z = filtered.column<int>("spot_extent_z");
+    auto &extent_z_data = extent_z.value();
+    double sigma_b_total = 0;
+    double sigma_m_total = 0;
+    int n_sigma_m = 0;
+    for (int i = 0; i < filtered_sigma_b_data.extent(0); ++i) {
+        sigma_b_total += filtered_sigma_b_data(i, 0);
+        if (extent_z_data(i, 0) >= min_bbox_depth) {
+            sigma_m_total += filtered_sigma_m_data(i, 0);
+            n_sigma_m++;
+        }
+    }
+    double sigma_b_radians =
+      std::pow(sigma_b_total / filtered_sigma_b_data.extent(0), 0.5);
+    logger.info("Sigma b estimate (degrees): {:.6f} on {} reflections",
+                radians_to_degrees(sigma_b_radians),
+                filtered_sigma_b_data.extent(0));
+    if (n_sigma_m == 0) {
+        throw std::runtime_error(
+          "Unable to estimate sigma_m, no reflections above min_bbox_depth.");
+    }
+    double sigma_m_radians = std::pow(sigma_m_total / n_sigma_m, 0.5);
+    logger.info(
+      "Sigma m estimate (degrees): {:.6f} on {} reflections with min_bbox_depth={}",
+      radians_to_degrees(sigma_m_radians),
+      n_sigma_m,
+      min_bbox_depth);
+    // loop through refls - map s1 to recip space
+    auto xyz = filtered.column<double>("xyzobs.mm.value");
+    auto &xyzobs = xyz.value();
+    auto xyz2 = filtered.column<double>("xyzcal.mm");
+    auto &xyzcal = xyz2.value();
+    Panel p = expt.detector().panels()[0];
+    double tot_rmsd = 0;
+    int count = 0;
+    for (int i = 0; i < xyzcal.extent(0); ++i) {
+        Eigen::Map<Vector3d> xyzcal_this(&xyzcal(i, 0));
+        Eigen::Map<Vector3d> xyzobs_this(&xyzobs(i, 0));
+        double val =
+          squaredev_in_kabsch_space(xyzcal_this, xyzobs_this, expt.beam().get_s0(), p);
+        if (radians_to_degrees(std::pow(val, 0.5))
+            < 0.1) {  // Guard against mispredictions in indexing.
+            tot_rmsd += val;
+            count++;
+        }
+    }
+    if (count == 0) {
+        throw std::runtime_error(
+          "Unable to estimate rmsd deviation, predicted reflections are too far from "
+          "observed");
+    }
+    double rmsd_deviation_radians = std::pow(tot_rmsd / count, 0.5);
+    logger.info("Sigma rmsd (degrees): {:.6f} on {} reflections",
+                radians_to_degrees(rmsd_deviation_radians),
+                count);
+    return std::make_tuple(sigma_b_radians, sigma_m_radians, rmsd_deviation_radians);
+}
diff --git a/integrator/CMakeLists.txt b/integrator/CMakeLists.txt
@@ -13,6 +13,7 @@ add_executable(integrator
 target_include_directories(integrator
     PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/../spotfinder
+    ${CMAKE_CURRENT_SOURCE_DIR}/../baseline/integrator
 )
 
 target_link_libraries(integrator
diff --git a/integrator/integrator.cc b/integrator/integrator.cc
@@ -32,6 +32,7 @@
 #include "math/device_precision.cuh"
 #include "math/math_utils.cuh"
 #include "math/vector3d.cuh"
+#include "sigma_estimation.cc"
 #include "version.hpp"
 
 using namespace std::chrono_literals;
@@ -135,6 +136,13 @@ class IntegratorArgumentParser : public CUDAArgumentParser {
             "Sigma_b: Standard deviation of the beam direction in reciprocal space.")
           .metavar("σb")
           .scan<'f', float>();
+
+        add_argument("--sigma_estimation.min_bbox_depth", "--min_bbox_depth")
+          .help(
+            "When calculating sigma_m, only use reflections that span at least this "
+            "number of images.")
+          .default_value<int>(6)
+          .scan<'i', int>();
     }
 };
 #pragma endregion Argument Parsing
@@ -149,9 +157,6 @@ int main(int argc, char **argv) {
     const auto reflection_file = parser.reflections();
     const auto experiment_file = parser.experiment();
     float wait_timeout = parser.get<float>("timeout");
-    // These two will be optional later, should be gettable from reflection table
-    float sigma_m = parser.get<float>("sigma_m");
-    float sigma_b = parser.get<float>("sigma_b");
 
     // Guard against missing files
     if (!std::filesystem::exists(reflection_file)) {
@@ -252,6 +257,55 @@ int main(int argc, char **argv) {
 
 #pragma endregion Data preparation
 
+#pragma region Sigma estimation
+    // If input is a predicted refl, then we require sigma_b, sigma_m as we will not be
+    // able to estimate it from the data
+    // Else the input as an indexed.refl/refined.refl with the sigma variance columns,
+    // then we can calculate sigma_b, sigma_m but will have to also run the predict
+    // code in this program.
+    float sigma_b = 0.0;
+    float sigma_m = 0.0;
+    if (parser.is_used("sigma_m")) {
+        sigma_m = degrees_to_radians(
+          parser.get<float>("sigma_m"));  // Use radians for calculations
+    }
+    if (parser.is_used("sigma_b")) {
+        sigma_b = degrees_to_radians(
+          parser.get<float>("sigma_b"));  // Use radians for calculations
+    }
+
+    // Estimate sigmas
+    auto sigma_b_data = reflections.column<double>("sigma_b_variance");
+    auto sigma_m_data = reflections.column<double>("sigma_m_variance");
+    auto extent_z_data = reflections.column<int>("spot_extent_z");
+    if (sigma_b_data && sigma_m_data && extent_z_data) {
+        int min_bbox_depth = parser.get<int>("sigma_estimation.min_bbox_depth");
+        // Estimate the values from the data, and use if user hasn't specified values.
+        auto [sigma_b_calc, sigma_m_calc, sigma_rmsd_calc] =
+          estimate_sigmas(reflections, expt, min_bbox_depth);
+        // Note we might want to inflate sigma_b_calc to include the rmsd too, but we just report
+        // it for now.
+        if (sigma_m == 0.0) {
+            sigma_m = sigma_m_calc;
+        }
+        if (sigma_b == 0.0) {
+            sigma_b = sigma_b_calc;
+        }
+    }
+    if (sigma_b == 0.0) {
+        throw std::runtime_error(
+          "No value for sigma_b. This must either be provided as input, or an input "
+          "reflection "
+          "table containing sigma_b_variance must be used.");
+    }
+    if (sigma_m == 0.0) {
+        throw std::runtime_error(
+          "No value for sigma_m. This must either be provided as input, or an input "
+          "reflection "
+          "table containing sigma_m_variance and spot_extent_z must be used.");
+    }
+#pragma endregion Sigma estimation
+
 #pragma region Image Reading and Threading
     // Now set up for multi-threaded image reading and processing
     logger.info("Setting up image reading and threading");

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ add_executable(integrator`
`13`	`13`	`target_include_directories(integrator`
`14`	`14`	`PRIVATE`
`15`	`15`	`${CMAKE_CURRENT_SOURCE_DIR}/../spotfinder`
	`16`	`+ ${CMAKE_CURRENT_SOURCE_DIR}/../baseline/integrator`
`16`	`17`	`)`
`17`	`18`
`18`	`19`	`target_link_libraries(integrator`