anira-project · faressc · Aug 2, 2025 · Aug 1, 2025 · Aug 2, 2025 · Aug 2, 2025
diff --git a/.github/workflows/build_docs_and_deploy.yml b/.github/workflows/build_docs_and_deploy.yml
@@ -5,6 +5,8 @@ on:
   workflow_dispatch: # lets you run a build from github.com
   # Runs the workflow on all push events
   push:
+    branches:
+      - main
   pull_request:
     branches:
       - main

diff --git a/.github/workflows/build_sanitizer.yml b/.github/workflows/build_sanitizer.yml
@@ -5,6 +5,8 @@ on:
   workflow_dispatch: # lets you run a build from github.com
   # Runs the workflow on all push events
   push:
+    branches:
+      - main
   pull_request:
     branches:
       - main

diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -5,6 +5,8 @@ on:
   workflow_dispatch: # lets you run a build from github.com
   # Runs the workflow on all push events
   push:
+    branches:
+      - main
   pull_request:
     branches:
       - main

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - RTSan real-time safety CI checks and testing (not done yet)
 - JSON configuration loader with nlohmann_json dependency (not done yet)
 
+## [v2.0.2] - 2025-08-02
+
+### Added
+
+- New pop_data methods with wait_until
+- Support for TFlite Binary Models
+
+### Changed
+
+- Improved latency calculation to take parallel processing into account
+- All operating systems now use std::steady_clock for benchmarking
+- Tests for Inference Manager and Session Element now use fixed number of threads 2, which is available on all gh runners
+
 ## [v2.0.1] - 2025-07-31
 
 ### Changed

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,7 +17,6 @@ option(ANIRA_WITH_LIBTORCH "Build with LibTorch backend" ON)
 option(ANIRA_WITH_ONNXRUNTIME "Build with ONNX Runtime backend" ON)
 option(ANIRA_WITH_TFLITE "Build with TensorFlow Lite backend" ON)
 
-# Shall the use of a controlled blocking operation for further reduction of latency be enabled?
 option(ANIRA_WITH_LOGGING "Enable logging printouts" ON)
 option(ANIRA_WITH_RADSAN "Enable RealtimeSanitizer (RADSan) checks (requires RADSan clang)" OFF)
 

diff --git a/TODO.md b/TODO.md
@@ -19,6 +19,10 @@
 - [ ] Run the examples as tests in CI
 - [ ] InferenceHandler tests with buffersizes that are not a multiple of the preprocess input size
 
+## Bugs
+
+- [ ] When declaring the universal shape in HybridNNConfig.h first, tests fail on asahi linux system (tflite gets universal tensor shapes)
+
 ## Packaging
 
 - [ ] Trigger `ldconfig` in the .deb package

diff --git a/docs/sphinx/benchmarking.rst b/docs/sphinx/benchmarking.rst
@@ -90,7 +90,7 @@ Implement the main measurement loop using the Google Benchmark framework's state
             initialize_iteration();
 
             // Begin timing measurement
-            auto start = std::chrono::high_resolution_clock::now();
+            std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 
             // Process the buffer (this triggers inference)
             m_inference_handler->process(m_buffer->get_array_of_write_pointers(), get_buffer_size());
@@ -101,7 +101,7 @@ Implement the main measurement loop using the Google Benchmark framework's state
             }
 
             // End timing measurement
-            auto end = std::chrono::high_resolution_clock::now();
+            std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
 
             // Record the measured runtime
             interation_step(start, end, state);

diff --git a/docs/sphinx/usage.rst b/docs/sphinx/usage.rst
@@ -54,7 +54,7 @@ Pass the model data as binary information:
     {void* model_data, size_t model_size, anira::InferenceBackend backend}
 
 .. note::
-    Defining the model data as binary information is only possible for the ``anira::InferenceBackend::ONNX`` until now.
+    Defining the model data as binary information is only possible for the ``anira::InferenceBackend::ONNX`` and ``anira::InferenceBackend::TFLITE`` until now.
 
 The :cpp:struct:`anira::InferenceConfig` requires a vector of :cpp:struct:`anira::ModelData`.
 
@@ -452,6 +452,10 @@ The :cpp:func:`anira::InferenceHandler::push_data` and :cpp:func:`anira::Inferen
     delete[] input_data;
     delete[] output_data;
 
+.. note::
+    The :cpp:func:`anira::InferenceHandler::pop_data` method supports a wait_until parameter for blocking until data is available or timeout occurs. Use with the ``blocking_ratio`` in :cpp:struct:`anira::InferenceConfig` for proper latency compensation. Note that this blocks the real-time thread and is not fully lock-free, but this enables you to further reduce latency by waiting for the next available data.
+
+
 5.3. Processing Non-Streamable Tensors
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/examples/benchmark/advanced-benchmark/defineAdvancedBenchmark.cpp b/examples/benchmark/advanced-benchmark/defineAdvancedBenchmark.cpp
@@ -88,15 +88,15 @@ BENCHMARK_DEFINE_F(ProcessBlockFixture, BM_ADVANCED)(::benchmark::State& state)
 
         initialize_iteration();
 
-        auto start = std::chrono::high_resolution_clock::now();
+        std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 
         m_inference_handler->process(m_buffer->get_array_of_write_pointers(), get_buffer_size());
 
         while (!buffer_processed()) {
             std::this_thread::sleep_for(std::chrono::nanoseconds (10));
         }
 
-        auto end = std::chrono::high_resolution_clock::now();
+        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
 
         interation_step(start, end, state);
     }

diff --git a/examples/benchmark/cnn-size-benchmark/defineCNNSizeBenchmark.cpp b/examples/benchmark/cnn-size-benchmark/defineCNNSizeBenchmark.cpp
@@ -75,15 +75,15 @@ BENCHMARK_DEFINE_F(ProcessBlockFixture, BM_CNNSIZE)(::benchmark::State& state) {
 
         initialize_iteration();
 
-        auto start = std::chrono::high_resolution_clock::now();
+        std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 
         m_inference_handler->process(m_buffer->get_array_of_write_pointers(), get_buffer_size());
 
         while (!buffer_processed()) {
             std::this_thread::sleep_for(std::chrono::nanoseconds (10));
         }
 
-        auto end = std::chrono::high_resolution_clock::now();
+        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
 
         interation_step(start, end, state);
     }

diff --git a/examples/benchmark/simple-benchmark/defineSimpleBenchmark.cpp b/examples/benchmark/simple-benchmark/defineSimpleBenchmark.cpp
@@ -57,15 +57,15 @@ BENCHMARK_DEFINE_F(ProcessBlockFixture, BM_SIMPLE)(::benchmark::State& state) {
 
         initialize_iteration();
 
-        auto start = std::chrono::high_resolution_clock::now();
+        std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 
         m_inference_handler->process(m_buffer->get_array_of_write_pointers(), get_buffer_size());
 
         while (!buffer_processed()) {
             std::this_thread::sleep_for(std::chrono::nanoseconds (10));
         }
-        
-        auto end = std::chrono::high_resolution_clock::now();
+
+        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
 
         interation_step(start, end, state);
     }

diff --git a/extras/models/hybrid-nn/HybridNNConfig.h b/extras/models/hybrid-nn/HybridNNConfig.h
@@ -16,10 +16,10 @@ static std::vector<anira::ModelData> model_data_hybridnn_config = {
 };
 
 static std::vector<anira::TensorShape> tensor_shape_hybridnn_config = {
-        {{{256, 1, 150}}, {{256, 1}}},
 #ifdef USE_TFLITE
         {{{256, 150, 1}}, {{256, 1}}, anira::InferenceBackend::TFLITE},
 #endif
+        {{{256, 1, 150}}, {{256, 1}}}
 };
 
 static anira::ProcessingSpec processing_spec_hybridnn_config = {

diff --git a/include/anira/InferenceHandler.h b/include/anira/InferenceHandler.h
@@ -176,31 +176,67 @@ class ANIRA_API InferenceHandler {
     ANIRA_REALTIME void push_data(const float* const* const* input_data, size_t* num_input_samples);
 
     /**
-     * @brief Pops processed output data from the pipeline for a specific tensor
+     * @brief Pops processed output data from the pipeline for a specific tensor (non-blocking)
      * 
-     * This method retrieves processed data from the inference pipeline. Should be
-     * used in conjunction with push_data for decoupled processing.
+     * Retrieves processed data from the inference pipeline for a specific tensor.
+     * Should be used in conjunction with push_data for decoupled processing.
+     * This method is non-blocking and returns immediately with available samples.
      * 
      * @param output_data Output buffer organized as data[channel][sample]
      * @param num_output_samples Maximum number of samples the output buffer can hold
      * @param tensor_index Index of the tensor to retrieve data from (default: 0)
      * @return Number of samples actually written to the output buffer
      * 
-     * @note This method is real-time safe and should not allocate memory
+     * @note This method is real-time safe and does not allocate memory.
      */
     ANIRA_REALTIME size_t pop_data(float* const* output_data, size_t num_output_samples, size_t tensor_index = 0);
+
+    /**
+     * @brief Pops processed output data from the pipeline for a specific tensor (blocking with timeout)
+     * 
+     * Retrieves processed data from the inference pipeline for a specific tensor.
+     * This method blocks until data is available or until the specified timeout is reached.
+     * Should be used in conjunction with push_data for decoupled processing.
+     * 
+     * @param output_data Output buffer organized as data[channel][sample]
+     * @param num_output_samples Maximum number of samples the output buffer can hold
+     * @param wait_until Time point until which to wait for available data
+     * @param tensor_index Index of the tensor to retrieve data from (default: 0)
+     * @return Number of samples actually written to the output buffer
+     * 
+     * @note This method is not 100% real-time safe due to potential blocking.
+     */
+    size_t pop_data(float* const* output_data, size_t num_output_samples, std::chrono::steady_clock::time_point wait_until, size_t tensor_index = 0);
 
     /**
-     * @brief Pops processed output data for multiple tensors simultaneously
+     * @brief Pops processed output data for multiple tensors simultaneously (non-blocking)
+     * 
+     * Retrieves processed data for all tensors from the inference pipeline.
+     * This method is non-blocking and returns immediately with available samples for each tensor.
      * 
      * @param output_data Output buffers organized as data[tensor_index][channel][sample]
      * @param num_output_samples Array of maximum output sample counts for each tensor
      * @return Array of actual output sample counts for each tensor
      * 
-     * @note This method is real-time safe and should not allocate memory
+     * @note This method is real-time safe and does not allocate memory.
      */
     ANIRA_REALTIME size_t* pop_data(float* const* const* output_data, size_t* num_output_samples);
 
+    /**
+     * @brief Pops processed output data for multiple tensors simultaneously (blocking with timeout)
+     * 
+     * Retrieves processed data for all tensors from the inference pipeline.
+     * This method blocks until data is available for each tensor or until the specified timeout is reached.
+     * 
+     * @param output_data Output buffers organized as data[tensor_index][channel][sample]
+     * @param num_output_samples Array of maximum output sample counts for each tensor
+     * @param wait_until Time point until which to wait for available data
+     * @return Array of actual output sample counts for each tensor
+     * 
+     * @note This method is not 100% real-time safe due to potential blocking.
+     */
+    size_t* pop_data(float* const* const* output_data, size_t* num_output_samples, std::chrono::steady_clock::time_point wait_until);
+
     /**
      * @brief Gets the processing latency for a specific tensor
      * 

diff --git a/include/anira/benchmark/ProcessBlockFixture.h b/include/anira/benchmark/ProcessBlockFixture.h
@@ -104,29 +104,14 @@ class ANIRA_API ProcessBlockFixture : public ::benchmark::Fixture {
      * @brief Records timing information for a single benchmark iteration (Windows/macOS)
      * 
      * Measures and records the elapsed time for a single benchmark iteration using
-     * steady_clock for high precision timing on Windows and macOS platforms.
-     * Updates benchmark state and provides detailed logging of iteration results.
+     * steady_clock for timing. Updates benchmark state and provides detailed logging
+     * of iteration results.
      * 
      * @param start The start time point of the iteration
      * @param end The end time point of the iteration  
      * @param state Reference to the benchmark state for recording results
      */
-#if defined(_WIN32) || defined(__APPLE__)
         void interation_step(const std::chrono::steady_clock::time_point& start, const std::chrono::steady_clock::time_point& end, ::benchmark::State& state);
-#else
-    /**
-     * @brief Records timing information for a single benchmark iteration (Linux/Unix)
-     * 
-     * Measures and records the elapsed time for a single benchmark iteration using
-     * system_clock for timing on Linux and Unix platforms. Updates benchmark state
-     * and provides detailed logging of iteration results.
-     * 
-     * @param start The start time point of the iteration
-     * @param end The end time point of the iteration
-     * @param state Reference to the benchmark state for recording results
-     */
-        void interation_step(const std::chrono::system_clock::time_point& start, const std::chrono::system_clock::time_point& end, ::benchmark::State& state);
-#endif
 
     /**
      * @brief Finalizes the current benchmark repetition

diff --git a/include/anira/scheduler/Context.h b/include/anira/scheduler/Context.h
@@ -165,18 +165,28 @@ class ANIRA_API Context{
      * @param session Shared pointer to the session that has new data available
      */
     void new_data_submitted(std::shared_ptr<SessionElement> session);
-    
+
     /**
-     * @brief Requests new data processing for a session with specified buffer duration
-     * 
-     * Requests that the inference system process data for the specified session
-     * with the given buffer duration in seconds. This is used for scheduling
-     * and managing inference operations.
-     * 
+     * @brief Requests new data processing for a session
+     *
+     * Requests that the inference system process data for the specified session.
+     * This is used for scheduling and managing inference operations. The request
+     * is processed immediately.
+     *
+     * @param session Shared pointer to the session requesting data processing
+     */
+    void new_data_request(std::shared_ptr<SessionElement> session);
+
+    /**
+     * @brief Requests new data processing for a session at a specific time
+     *
+     * Requests that the inference system process data for the specified session,
+     * but waits for the data until the given time point before processing.
+     *
      * @param session Shared pointer to the session requesting data processing
-     * @param buffer_size_in_sec Duration of the buffer to process in seconds
+     * @param wait_until Time point at which to begin processing the data request
      */
-    void new_data_request(std::shared_ptr<SessionElement> session, double buffer_size_in_sec);
+    void new_data_request(std::shared_ptr<SessionElement> session, std::chrono::steady_clock::time_point wait_until);
 
     /**
      * @brief Gets a reference to all active sessions

diff --git a/include/anira/scheduler/InferenceManager.h b/include/anira/scheduler/InferenceManager.h
@@ -109,20 +109,37 @@ class ANIRA_API InferenceManager {
      * @note This method is real-time safe and should not allocate memory
      */
     void push_data(const float* const* const* input_data, size_t* num_input_samples);
-    
+
     /**
-     * @brief Pops processed output data from the inference pipeline
+     * @brief Pops processed output data from the inference pipeline (non-blocking)
      * 
-     * Retrieves processed data from the inference pipeline. Should be used in
-     * conjunction with push_data for decoupled processing patterns.
+     * Retrieves available processed data from the inference pipeline. Should be used in
+     * conjunction with push_data for decoupled processing patterns. This method does not block
+     * and returns immediately with any available output.
      * 
      * @param output_data Output buffers organized as data[tensor_index][channel][sample]
      * @param num_output_samples Array of maximum output sample counts for each tensor
      * @return Array of actual output sample counts for each tensor
      * 
-     * @note This method is real-time safe and should not allocate memory
+     * @note This method is real-time safe and should not allocate memory.
      */
     size_t* pop_data(float* const* const* output_data, size_t* num_output_samples);
+
+    /**
+     * @brief Pops processed output data from the inference pipeline with timeout
+     * 
+     * Retrieves processed data from the inference pipeline, waiting until either data is available
+     * or the specified timeout expires. Should be used in conjunction with push_data for decoupled
+     * processing patterns. This method blocks until output is available or the wait_until time is reached.
+     * 
+     * @param output_data Output buffers organized as data[tensor_index][channel][sample]
+     * @param num_output_samples Array of maximum output sample counts for each tensor
+     * @param wait_until Time point until which the method will wait for output data to become available
+     * @return Array of actual output sample counts for each tensor
+     * 
+     * @note This method is not 100% real-time safe due to potential blocking.
+     */
+    size_t* pop_data(float* const* const* output_data, size_t* num_output_samples, std::chrono::steady_clock::time_point wait_until);
 
     /**
      * @brief Sets the inference backend to use for neural network processing

diff --git a/include/anira/scheduler/SessionElement.h b/include/anira/scheduler/SessionElement.h
@@ -274,9 +274,10 @@ class ANIRA_API SessionElement {
      * @param host_buffer_size Host audio buffer size
      * @param host_sample_rate Host audio sample rate
      * @param wait_time Expected wait time for inference completion
+     * @param postprocess_output_size Size of the model's postprocessed output in samples
      * @return Additional inference-caused latency in samples
      */
-    int calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time) const;
+    int calculate_inference_caused_latency(float max_possible_inferences, float host_buffer_size, float host_sample_rate, float wait_time, size_t postprocess_output_size) const;
 
     /**
      * @brief Calculates expected wait time for inference completion

diff --git a/src/InferenceHandler.cpp b/src/InferenceHandler.cpp
@@ -109,10 +109,28 @@ size_t InferenceHandler::pop_data(float* const* output_data, size_t num_output_s
     return received_samples[tensor_index];
 }
 
+size_t InferenceHandler::pop_data(float* const* output_data, size_t num_output_samples, std::chrono::steady_clock::time_point wait_until, size_t tensor_index) {
+    size_t num_output_tensors = m_inference_config.get_tensor_output_shape().size();
+    std::vector<float* const*> output_tensor_ptrs(num_output_tensors, nullptr);
+    std::vector<size_t> output_tensor_num_samples(num_output_tensors, 0);
+
+    if (tensor_index < num_output_tensors) {
+        output_tensor_ptrs[tensor_index] = output_data;
+        output_tensor_num_samples[tensor_index] = num_output_samples;
+    }
+
+    size_t* received_samples = m_inference_manager.pop_data(output_tensor_ptrs.data(), output_tensor_num_samples.data(), wait_until);
+    return received_samples[tensor_index];
+}
+
 size_t* InferenceHandler::pop_data(float* const* const* output_data, size_t* num_output_samples) {
     return m_inference_manager.pop_data(output_data, num_output_samples);
 }
 
+size_t* InferenceHandler::pop_data(float* const* const* output_data, size_t* num_output_samples, std::chrono::steady_clock::time_point wait_until) {
+    return m_inference_manager.pop_data(output_data, num_output_samples, wait_until);
+}
+
 void InferenceHandler::set_inference_backend(InferenceBackend inference_backend) {
     m_inference_manager.set_backend(inference_backend);
 }