first spin at new ekf algorithm to handle dilutions

CamDavidsonPilon · CamDavidsonPilon · commit d7733c116b4f · 2025-05-07T14:59:30.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### Upcoming
+
+ - adding dirs to exported data zips
+
 ### 25.5.1
 
 #### Enhancements
diff --git a/config.dev.ini b/config.dev.ini
@@ -149,10 +149,9 @@ ws_protocol=ws
 use_tls=0
 
 [growth_rate_kalman]
-acc_std=0.0008
 obs_std=1.5
-od_std=0.005
-rate_std=0.1
+od_std=0.0025
+rate_std=0.25
 
 
 [dosing_automation.config]
@@ -165,7 +164,6 @@ max_subdose=1.0
 [growth_rate_calculating.config]
 # these next two parameters control the length and magnitude
 # of the variance shift that our Kalman filter performs after a dosing event
-ekf_variance_shift_post_dosing_minutes=0.40
 ekf_variance_shift_post_dosing_factor=2500
 ekf_outlier_std_threshold=3.0
 samples_for_od_statistics=35
diff --git a/pioreactor/actions/leader/export_experiment_data.py b/pioreactor/actions/leader/export_experiment_data.py
@@ -261,9 +261,10 @@ def export_experiment_data(
             if count == 0:
                 logger.warning(f"No data present in {dataset_name}. Check database?")
 
+            zf.mkdir(dataset_name)
             for filename in filenames:
-                path_to_file = Path(Path(output).parent / filename)
-                zf.write(path_to_file, arcname=filename)
+                path_to_file = Path(output, filename)
+                zf.write(path_to_file, arcname=f"{dataset_name}/{filename}")
                 Path(path_to_file).unlink()
 
     logger.info("Finished export.")
diff --git a/pioreactor/background_jobs/growth_rate_calculating.py b/pioreactor/background_jobs/growth_rate_calculating.py
@@ -98,7 +98,14 @@ def __init__(
         self.source_obs_from_mqtt = source_obs_from_mqtt
         self.ignore_cache = ignore_cache
         self.time_of_previous_observation: datetime | None = None
-        self.expected_dt = 1 / (60 * 60 * config.getfloat("od_reading.config", "samples_per_second"))
+        self.expected_dt = 1 / (
+            60 * 60 * config.getfloat("od_reading.config", "samples_per_second")
+        )  # in hours
+
+        # ekf parameters for when a dosing event occurs
+        self._obs_since_last_dose: int | None = None
+        self._obs_required_to_reset: int | None = None
+        self._recent_dilution = False
 
     def on_ready(self) -> None:
         # this is here since the below is long running, and if kept in the init(), there is a large window where
@@ -133,7 +140,6 @@ def on_ready(self) -> None:
         self.logger.debug(f"od_normalization_mean={self.od_normalization_factors}")
         self.logger.debug(f"od_normalization_variance={self.od_variances}")
         self.ekf = self.initialize_extended_kalman_filter(
-            acc_std=config.getfloat("growth_rate_kalman", "acc_std"),
             od_std=config.getfloat("growth_rate_kalman", "od_std"),
             rate_std=config.getfloat("growth_rate_kalman", "rate_std"),
             obs_std=config.getfloat("growth_rate_kalman", "obs_std"),
@@ -143,32 +149,29 @@ def on_ready(self) -> None:
             self.start_passive_listeners()
 
     def initialize_extended_kalman_filter(
-        self, acc_std: float, od_std: float, rate_std: float, obs_std: float
+        self, od_std: float, rate_std: float, obs_std: float
     ) -> CultureGrowthEKF:
         import numpy as np
 
         initial_state = np.array(
             [
                 self.initial_nOD,
                 self.initial_growth_rate,
-                self.initial_acc,
             ]
         )
         self.logger.debug(f"Initial state: {repr(initial_state)}")
 
         initial_covariance = 1e-4 * np.eye(
-            3
+            2
         )  # empirically selected - TODO: this should probably scale with `expected_dt`
         self.logger.debug(f"Initial covariance matrix:\n{repr(initial_covariance)}")
 
-        acc_process_variance = (acc_std * self.expected_dt) ** 2
         od_process_variance = (od_std * self.expected_dt) ** 2
         rate_process_variance = (rate_std * self.expected_dt) ** 2
 
-        process_noise_covariance = np.zeros((3, 3))
+        process_noise_covariance = np.zeros((2, 2))
         process_noise_covariance[0, 0] = od_process_variance
         process_noise_covariance[1, 1] = rate_process_variance
-        process_noise_covariance[2, 2] = acc_process_variance
         self.logger.debug(f"Process noise covariance matrix:\n{repr(process_noise_covariance)}")
 
         observation_noise_covariance = self.create_obs_noise_covariance(obs_std)
@@ -371,21 +374,6 @@ def get_od_variances_from_cache(self) -> dict[pt.PdChannel, float]:
 
         return variances
 
-    def update_ekf_variance_after_event(self, minutes: float, factor: float) -> None:
-        if whoami.is_testing_env():
-            # TODO: replace with jobmanager
-            msg = subscribe(  # needs to be pubsub.subscribe (ie not sub_client.subscribe) since this is called in a callback
-                f"pioreactor/{self.unit}/{self.experiment}/od_reading/interval",
-                timeout=1.0,
-            )
-            if msg:
-                interval = float(msg.payload)
-            else:
-                interval = 5
-            self.ekf.scale_OD_variance_for_next_n_seconds(factor, minutes * (12 * interval))
-        else:
-            self.ekf.scale_OD_variance_for_next_n_seconds(factor, minutes * 60)
-
     def scale_raw_observations(self, observations: dict[pt.PdChannel, float]) -> dict[pt.PdChannel, float]:
         def _scale_and_shift(obs, shift, scale) -> float:
             return (obs - shift) / (scale - shift)
@@ -474,9 +462,19 @@ def _update_state_from_observation(
 
             self.time_of_previous_observation = timestamp
 
-        updated_state_, covariance_ = self.ekf.update(list(scaled_observations.values()), dt)
+        updated_state_, covariance_ = self.ekf.update(
+            list(scaled_observations.values()), dt, self._recent_dilution
+        )
         latest_od_filtered, latest_growth_rate = float(updated_state_[0]), float(updated_state_[1])
 
+        if self._obs_since_last_dose is not None and self._obs_required_to_reset is not None:
+            self._obs_since_last_dose += 1
+
+            if self._obs_since_last_dose >= self._obs_required_to_reset:
+                self._obs_since_last_dose = None
+                self._obs_required_to_reset = None
+                self._recent_dilution = False
+
         growth_rate = structs.GrowthRate(
             growth_rate=latest_growth_rate,
             timestamp=timestamp,
@@ -499,21 +497,9 @@ def respond_to_dosing_event_from_mqtt(self, message: pt.MQTTMessage) -> None:
         return self.respond_to_dosing_event(dosing_event)
 
     def respond_to_dosing_event(self, dosing_event: structs.DosingEvent) -> None:
-        # here we can add custom logic to handle dosing events.
-        # an improvement to this: the variance factor is proportional to the amount exchanged.
-        if dosing_event.event != "remove_waste":
-            self.update_ekf_variance_after_event(
-                minutes=config.getfloat(
-                    "growth_rate_calculating.config",
-                    "ekf_variance_shift_post_dosing_minutes",
-                    fallback=0.40,
-                ),
-                factor=config.getfloat(
-                    "growth_rate_calculating.config",
-                    "ekf_variance_shift_post_dosing_factor",
-                    fallback=2500,
-                ),
-            )
+        self._obs_since_last_dose = 0
+        self._obs_required_to_reset = 1
+        self._recent_dilution = True
 
     def start_passive_listeners(self) -> None:
         # process incoming data
diff --git a/pioreactor/background_jobs/od_reading.py b/pioreactor/background_jobs/od_reading.py
@@ -926,13 +926,13 @@ def _determine_best_ir_led_intensity(
 
         _, REF_on_signal = on_reading.popitem()
 
-        ir_intensity_argmax_REF_can_be = initial_ir_intensity / REF_on_signal.reading * 0.240
+        ir_intensity_argmax_REF_can_be = initial_ir_intensity / REF_on_signal.reading * 0.250
 
         ir_intensity_argmax_ANGLE_can_be = (
             initial_ir_intensity / culture_on_signal.reading * 3.0
         ) / 50  # divide by N since the culture is unlikely to Nx.
 
-        ir_intensity_max = 80.0
+        ir_intensity_max = 85.0
 
         return round(
             max(
diff --git a/pioreactor/pubsub.py b/pioreactor/pubsub.py
@@ -359,6 +359,7 @@ def conform_and_validate_api_endpoint(endpoint: str) -> str:
 
 
 def create_webserver_path(address: str, endpoint: str) -> str:
+    # pioreactor cluster specific (note the use of protocol and ports from our config!)
     # Most commonly, address can be an mdns name (test.local), or an IP address.
     port = config.getint("ui", "port", fallback=80)
     proto = config.get("ui", "proto", fallback="http")
@@ -367,6 +368,7 @@ def create_webserver_path(address: str, endpoint: str) -> str:
 
 
 def get_from(address: str, endpoint: str, **kwargs) -> mureq.Response:
+    # pioreactor cluster specific
     return mureq.get(create_webserver_path(address, endpoint), **kwargs)
 
 
@@ -377,6 +379,7 @@ def get_from_leader(endpoint: str, **kwargs) -> mureq.Response:
 def put_into(
     address: str, endpoint: str, body: bytes | None = None, json: dict | Struct | None = None, **kwargs
 ) -> mureq.Response:
+    # pioreactor cluster specific
     return mureq.put(create_webserver_path(address, endpoint), body=body, json=json, **kwargs)
 
 
@@ -389,6 +392,7 @@ def put_into_leader(
 def patch_into(
     address: str, endpoint: str, body: bytes | None = None, json: dict | Struct | None = None, **kwargs
 ) -> mureq.Response:
+    # pioreactor cluster specific
     return mureq.patch(create_webserver_path(address, endpoint), body=body, json=json, **kwargs)
 
 
@@ -401,6 +405,7 @@ def patch_into_leader(
 def post_into(
     address: str, endpoint: str, body: bytes | None = None, json: dict | Struct | None = None, **kwargs
 ) -> mureq.Response:
+    # pioreactor cluster specific
     return mureq.post(create_webserver_path(address, endpoint), body=body, json=json, **kwargs)
 
 
@@ -411,6 +416,7 @@ def post_into_leader(
 
 
 def delete_from(address: str, endpoint: str, **kwargs) -> mureq.Response:
+    # pioreactor cluster specific
     return mureq.delete(create_webserver_path(address, endpoint), **kwargs)
 
 
diff --git a/pioreactor/tests/conftest.py b/pioreactor/tests/conftest.py
@@ -11,9 +11,6 @@
 
 from pioreactor.mureq import Response
 from pioreactor.pubsub import publish
-from pioreactor.structs import ODReadings
-from pioreactor.structs import RawODReading
-from pioreactor.utils.timing import to_datetime
 
 
 @pytest.fixture(autouse=True)
@@ -123,30 +120,3 @@ def mock_request(method, url, **kwargs):
     # Patch the mureq.request method
     with patch("pioreactor.mureq.request", side_effect=mock_request):
         yield bucket
-
-
-class StreamODReadingsFromExport:
-    def __init__(self, filename: str, skip_first_n_rows=0):
-        self.filename = filename
-        self.skip_first_n_rows = skip_first_n_rows
-
-    def __enter__(self, *args, **kwargs):
-        import csv
-
-        self.file_instance = open(self.filename, "r")
-        self.csv_reader = csv.DictReader(self.file_instance, quoting=csv.QUOTE_MINIMAL)
-        return self
-
-    def __exit__(self, *args, **kwargs):
-        self.file_instance.close()
-
-    def __iter__(self):
-        for i, line in enumerate(self.csv_reader):
-            if i <= self.skip_first_n_rows:
-                continue
-            dt = to_datetime(line["timestamp"])
-            od = RawODReading(
-                angle=line["angle"], channel=line["channel"], timestamp=dt, od=float(line["od_reading"])
-            )
-            ods = ODReadings(timestamp=dt, ods={"2": od})
-            yield ods
diff --git a/pioreactor/tests/test_growth_rate_calculating.py b/pioreactor/tests/test_growth_rate_calculating.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 from msgspec.json import encode
-from numpy.testing import assert_array_equal
 
 from pioreactor import structs
 from pioreactor.background_jobs.growth_rate_calculating import GrowthRateCalculator
@@ -16,7 +15,7 @@
 from pioreactor.config import temporary_config_changes
 from pioreactor.pubsub import collect_all_logs_of_level
 from pioreactor.pubsub import publish
-from pioreactor.tests.conftest import StreamODReadingsFromExport
+from pioreactor.tests.utils import StreamODReadingsFromExport
 from pioreactor.utils import local_persistent_storage
 from pioreactor.utils.timing import current_utc_timestamp
 from pioreactor.utils.timing import default_datetime_for_pioreactor
@@ -357,27 +356,11 @@ def test_shock_from_dosing_works(self) -> None:
             )
             pause()
 
-            previous_covariance_matrix = calc.ekf.covariance_.copy()
-
-            # trigger dosing events, which change the "regime"
-            publish(
-                f"pioreactor/{unit}/{experiment}/dosing_events",
-                encode(
-                    structs.DosingEvent(
-                        volume_change=1.0,
-                        event="add_media",
-                        source_of_event="algo",
-                        timestamp=to_datetime("2010-01-01T12:00:48.000Z"),
-                    )
-                ),
-            )
-            pause()
-
             publish(
                 f"pioreactor/{unit}/{experiment}/od_reading/ods",
                 create_encoded_od_raw_batched(
                     ["1"],
-                    [0.49],
+                    [0.52],
                     ["90"],
                     timestamp="2010-01-01T12:00:50.000Z",
                 ),
@@ -387,15 +370,13 @@ def test_shock_from_dosing_works(self) -> None:
                 f"pioreactor/{unit}/{experiment}/od_reading/ods",
                 create_encoded_od_raw_batched(
                     ["1"],
-                    [0.48],
+                    [0.52],
                     ["90"],
                     timestamp="2010-01-01T12:00:55.000Z",
                 ),
             )
             pause()
 
-            assert not np.array_equal(previous_covariance_matrix, calc.ekf.covariance_)
-
             publish(
                 f"pioreactor/{unit}/{experiment}/dosing_events",
                 encode(
@@ -408,6 +389,8 @@ def test_shock_from_dosing_works(self) -> None:
                 ),
             )
             pause()
+            assert calc._recent_dilution
+
             publish(
                 f"pioreactor/{unit}/{experiment}/od_reading/ods",
                 create_encoded_od_raw_batched(
@@ -418,20 +401,7 @@ def test_shock_from_dosing_works(self) -> None:
                 ),
             )
             pause()
-
-            time.sleep(8)
-            assert calc.ekf._currently_scaling_covariance
-            assert not np.array_equal(previous_covariance_matrix, calc.ekf.covariance_)
-
-            time.sleep(10)
-            pause()
-
-            # should revert back
-            while calc.ekf._currently_scaling_covariance:
-                pass
-
-            assert_array_equal(calc.ekf.covariance_, previous_covariance_matrix)
-            calc.clean_up()
+            assert not calc._recent_dilution
 
     def test_end_to_end(self) -> None:
         with temporary_config_changes(
diff --git a/pioreactor/tests/utils.py b/pioreactor/tests/utils.py
diff --git a/pioreactor/utils/streaming_calculations.py b/pioreactor/utils/streaming_calculations.py
diff --git a/pioreactor/version.py b/pioreactor/version.py