STY: cleanup and adding comments

nstelter-slac · nstelter-slac · commit 6cebdf595d0d · 2025-06-18T13:01:17.000-07:00
diff --git a/process_b.py b/process_b.py
@@ -10,7 +10,10 @@
 import time
 
 class ProcessB(CustomProcessObject):
-
+    """
+    ProcessB consumes k2eg snapshots from queue_one and buffers 5 minutes of data per PV at 120hz.
+    it performs beam checks and candidate window searcing, and forwards output to queue_two for ProcessC.
+    """
     def __init__(self,
                  queue_one: 'Manager.Queue',
                  queue_two: 'Manager.Queue',
@@ -19,41 +22,42 @@ def __init__(self,
                  ):
         self.queue_one = queue_one
         self.queue_two = queue_two
+
         self.pv_list = pv_list
+
         self.logging_kwargs = logging_kwargs
         self.logging_kwargs['logger_name'] = 'process_b'
         self.logger = None
-        self.buffer = Buffer(pv_list, 36000, logging_kwargs)
+
+        # holds up to 5 minutes of 120hz data (36000 points) per pv.
+        self.buffer = Buffer(pv_list, 36000, logging_kwargs) # 3600 = 120hz * 60sec * 5mins
 
     def __call__(self):
         if self.logger is None:
             self.logger = create_worker_logger(**self.logging_kwargs)
 
-        self.logger.debug(f"number of pvs running on: {len(self.pv_list)}")
-        self.logger.debug("starting proecess_b data processing")
+        self.logger.debug(f"running provess_b on {len(self.pv_list)} pvs")
+        self.logger.debug("startin data processing loop...")
 
         while True:
             try:
                 r = self.queue_one.get(timeout=0.05) # wait 50ms
 
-                #self.logger.debug(f"ProcessB sees {r['iteration']}")
-                #self.logger.debug(f"ProcessB sees {r}")
-                #self.queue_two.put(r)
-
-                if r is None: # enqueue a None to stop this process
+                if r is None: # enqueuing a None should stop this process
                     break
 
+                #need to be sure each iteration of this processing loop is <= 1 second
+                # (new data comes each second from process_a, so data will pile-up if our processing takes over 1 second)
                 start = time.perf_counter()
 
-                self.update_pv_values(r)
-                result = self.find_candidates()
+                # parse the k2eg snapshot and update buffer
+                self.update_buffer(r)
 
-                # write to queue_2 for process_c.py to read
+                # process data in buffer and get stuff to pass to process_c and CoAD
+                self.do_beam_checks()
+                result = self.find_candidates()
 
-                # from process_c:
-                # "r should have the structure (rf_input_tensor, bpm_input_tensor, rf_station)
-                # where rf_input_tensor and bpm_input_tensor are tensors of size (1, 1066)
-                # and (8, 1066) respectively, and rf_station is a string representing the PV name"
+                # placeholder: dummy data for ProcessC:
                 fake_rf_input_tensor = np.random.rand(1, 1066).astype(np.float32)
                 fake_bpm_input_tensor = np.random.rand(8, 1066).astype(np.float32)
                 fake_pv_name = "fake_pv_name"
@@ -64,118 +68,125 @@ def __call__(self):
                 end = time.perf_counter()
                 elapsed_ms = (end - start) * 1000
                 self.logger.debug(f"process_b iteration took : {elapsed_ms:.2f} ms")
-
                 if elapsed_ms > 1000: # have to be <= 1 sec
                     self.logger.warning(f"process_b iteration is slow!! : {elapsed_ms:.2f} ms")
 
             except Empty:
                 continue
 
-        self.logger.debug("ending data processing")
-
-        #dir_name = f"buffer_txt_dump_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        #self.logger.debug(f"dump dir: {dir_name}")
-        #self.buffer.dump_to_human_readable(directory=dir_name)
+        self.logger.debug("shutting down process_b")
 
         for handler in self.logger.handlers:
             handler.close()
 
-    def update_pv_values(self, snapshot):
-
-        iteration = snapshot.get("iteration", None)
-        self.logger.debug(f"\niteration: {iteration}")
-
+    def update_buffer(self, snapshot):
+        """
+        Append the latest 120-sample PV snapshot into the buffer for each pv
+        """
         for pv in self.pv_list:
             entries = snapshot.get(pv, [])
-            num_entries = len(entries)
-            #self.logger.debug(f"{pv}: {num_entries} entries in snapshot")
-
-            if num_entries == 0:
-                continue  # skip if no data
 
-            times = np.empty(120, dtype=np.float64)
             values = np.empty(120, dtype=np.float64)
-
             for i, e in enumerate(entries):
-                ts = e.get("timeStamp", {})
-                times[i] = ts.get("secondsPastEpoch", 0)
                 values[i] = e.get("value", np.nan)
 
-        self.buffer.append(pv, values)
+            self.buffer.append(pv, values)
+        
+        # Update buffer index tracking
+        if self.buffer.index != self.buffer.buffer_len:
+            self.buffer.index += 120
+        else:
+            self.buffer_index == self.buffer_len - 120
 
     def do_beam_checks(self):
+        # placeholder: beam condition logic here.
         return True
 
     def find_candidates(self):
-        # add beam checks here
+        # placeholder: candidate determination logic here.
         return []
 
 class Buffer:
+    """
+    Fixed-length buffer for storing a sliding window of 120hz float data per pv.
+    By default stores 5 mins (36000 values) of past data.
+    """
     def __init__(self, pv_list: list[str], buffer_len: int = 36000, logging_kwargs: Optional[dict] = default_logging_kwargs):
-        self.buffer_len = buffer_len
         self.pv_list = pv_list
-        self.last_snapshot_time = 0 # for sanity check of snapshot validity
-        self.starting_pv_time = 0 # store the earliest time in buffer, and assume all data is timed at 120hz
+        
+        # max length of buffer
+        self.buffer_len = buffer_len
+
+        # default buffer length is 36000 to store 5 mins of data at 120hz.
+        # we allocate the buffer initially to avoid potential memory-copies during array append operation.
         self.buffer_map = {
             pv: np.empty(buffer_len, dtype=np.float64) for pv in self.pv_list
         }
-        self.index = 0  # tracking next write index
+        self.index = 0  # tracks the next write index
+        
+        self.passes_beam_check = np.empty(buffer_len, dtype=bool) # whether at this timestamp all
+
         self.logging_kwargs = logging_kwargs
         self.logger = create_worker_logger(**self.logging_kwargs)
         self.logging_kwargs['logger_name'] = 'buffer'
 
-    def dump_to_human_readable(self, directory: str = "buffer_dump_txt"):
-        # individual human-readable .txt file per pv, each line has one float val
-        os.makedirs(directory, exist_ok=True)
-
-        for pv in self.pv_list:
-            valid_data = self.buffer_map[pv][:self.index]
-            pv = pv[5:] # get rid of "ca://"
-            filepath = os.path.join(directory, f"{pv.replace(':', '_')}.txt")
-
-            self.logger.debug(f"writing dump file {filepath} for {pv}")
-            with open(filepath, "w") as f:
-                for v in valid_data:
-                    f.write(f"{v}\n")
-
     def append(self, key: str, values: np.ndarray):
-        # we only allocate array memory once, and then once memory is full
-        # we drop the lowest-index 120 values and shift the existing values over
-        # and append the new values to the end.
-        # this will soon be done in a time-based fashion (older than 5 min values are dropped)
-
+        """
+        Appends 120 new values for a given pv. If the buffer is full, old data is shifted to make room.
+        """
         if key not in self.buffer_map:
             raise KeyError(f"key '{key}' not found in buffer")
 
-        #if len(values) != 120:
-        #    raise ValueError(f"Expected array of length 120, got {len(values)}")
+        if len(values) != 120:
+            raise ValueError(f"Expected array of length 120, got {len(values)}")
 
+        curr_pv_arr = self.buffer_map[key]            
         idx = self.index
-        buf = self.buffer_map[key]
-
-        #if idx == 0:
-            #self.starting_pv_time = time
 
         if idx + 120 <= self.buffer_len:
             # have enough room without shifting, just write to next open index (this only happens during initial buffer fill-up)
-            self.logger.debug(f"initial filling of buffer, curr index {idx}")
-            buf[idx:idx+120] = values
-            self.index += 120
+            self.logger.debug(f"initial filling of buffer, current index {idx}")
+            curr_pv_arr[idx:idx+120] = values
         else:
-            # shift left and append to the end
-            self.logger.debug(f"buffer is full, removing oldest second of data")
-            buf[:-120] = buf[120:]
-            buf[-120:] = values
-            self.index = self.buffer_len
-            #self.starting_pv_time = time
+            # shift left and append to the end, this should be quick on a np.arr
+            self.logger.debug(f"buffer is full, removing oldest data")
+            curr_pv_arr[:-120] = curr_pv_arr[120:]
+            curr_pv_arr[-120:] = values
 
     def get(self, key: str):
+        """
+        Get data from the buffer map for given pv.
+        """
         if key not in self.buffer_map:
-            raise KeyError(f"key '{key}' not found in buffer")
+            raise KeyError(f"key '{key}' not found in buffer map")
         return self.buffer_map[key][:self.index]
 
     def clear(self, key: str):
+        """
+            Clear buffer contents for given pv.
+        """
         if key not in self.buffer_map:
-            raise KeyError(f"key '{key}' not found in buffer.")
+            raise KeyError(f"key '{key}' not found in buffer map.")
         self.buffer_map[key][:] = np.empty(self.buffer_len, dtype=np.float64)
-        self.index = 0
+        self.index = 0
+
+    def dump_to_human_readable(self, directory: str = "buffer_dump_txt"):
+        """
+        Debug util: dump each PV's data to a separate text file, 1 value per line.
+        
+        Call like this:
+            dir_name = f"buffer_txt_dump_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+            self.logger.debug(f"dump dir: {dir_name}")
+            self.buffer.dump_to_human_readable(directory=dir_name)
+        """
+        os.makedirs(directory, exist_ok=True)
+        
+        for pv in self.pv_list:
+            valid_data = self.buffer_map[pv][:self.index]
+            pv = pv[5:] # get rid of "ca://"
+            filepath = os.path.join(directory, f"{pv.replace(':', '_')}.txt")
+
+            self.logger.debug(f"writing dump file {filepath} for {pv}")
+            with open(filepath, "w") as f:
+                for v in valid_data:
+                    f.write(f"{v}\n")