Tryout

atobiszei · atobiszei · commit a9dd1ca43350 · 2025-07-09T15:27:02.000+02:00
diff --git a/src/BUILD b/src/BUILD
@@ -2910,6 +2910,7 @@ cc_library(
         "@com_google_googletest//:gtest",
     ],
     copts = COPTS_TESTS,
+    local_defines = COMMON_LOCAL_DEFINES,
     linkopts = [],
 )
 cc_library(
diff --git a/src/ovinferrequestsqueue.cpp b/src/ovinferrequestsqueue.cpp
@@ -21,7 +21,8 @@
 
 namespace ovms {
 OVInferRequestsQueue::OVInferRequestsQueue(ov::CompiledModel& compiledModel, int streamsLength) :
-    Queue(streamsLength) {
+    Queue(streamsLength),
+       compiledModel(compiledModel) {
     for (int i = 0; i < streamsLength; ++i) {
         streams[i] = i;
         OV_LOGGER("ov::CompiledModel: {} compiledModel.create_infer_request()", reinterpret_cast<void*>(&compiledModel));
diff --git a/src/ovinferrequestsqueue.hpp b/src/ovinferrequestsqueue.hpp
@@ -24,6 +24,7 @@ namespace ovms {
 class OVInferRequestsQueue : public Queue<ov::InferRequest> {
 public:
     OVInferRequestsQueue(ov::CompiledModel& compiledModel, int streamsLength);
+    ov::CompiledModel& compiledModel;
 };
 
 }  // namespace ovms
diff --git a/src/queue.hpp b/src/queue.hpp
@@ -55,6 +55,16 @@ class Queue {
         return idleStreamFuture;
     }
 
+    void extendQueue() {
+        if (!constructFunc.has_value()) {
+                return;
+        }
+        size_t streamSize = streams.size();
+        streams.push_back(streamSize - 1);
+        inferRequests.reserve(streams.size());
+        inferRequests.push_back(constructFunc.value()());
+    }
+
     std::optional<int> tryToGetIdleStream() {
         // OVMS_PROFILE_FUNCTION();
         int value;
@@ -69,7 +79,6 @@ class Queue {
             return value;
         }
     }
-
     /**
     * @brief Release stream after execution
     */
@@ -95,13 +104,16 @@ class Queue {
     /**
     * @brief Constructor with initialization
     */
-    Queue(int streamsLength) :
-        streams(streamsLength),
+    // change constructor so that it can also accept lambda which returns T. This lambda
+    // is optional but if exists it will be used to construct T objects
+    Queue(int streamsLength, std::optional<std::function<T()>> constructFunc = std::nullopt) : streams(streamsLength),
+        constructFunc(constructFunc),
         front_idx{0},
         back_idx{0} {
         for (int i = 0; i < streamsLength; ++i) {
             streams[i] = i;
         }
+        streams.reserve(50);
     }
 
     /**
@@ -116,7 +128,7 @@ class Queue {
     * @brief Vector representing circular buffer for infer queue
     */
     std::vector<int> streams;
-
+    std::optional<std::function<T()>> constructFunc = std::nullopt;
     /**
     * @brief Index of the front of the idle streams list
     */
diff --git a/src/test/openvino_tests.cpp b/src/test/openvino_tests.cpp
@@ -50,7 +50,6 @@ TEST_F(OpenVINO, CallbacksTest) {
     Core core;
     auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
     const std::string inputName{"b"};
-    auto input = model->get_parameters().at(0);
     ov::element::Type_t dtype = ov::element::Type_t::f32;
     ov::Shape ovShape;
     ovShape.emplace_back(1);
@@ -92,6 +91,91 @@ TEST_F(OpenVINO, CallbacksTest) {
     EXPECT_TRUE(outOvTensor.is<ov::Tensor>());
     EXPECT_TRUE(outAutoTensor.is<ov::Tensor>());
 }
+TEST_F(OpenVINO, StressInferTest) {
+    Core core;
+    auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
+    const std::string inputName{"b"};
+    auto input = model->get_parameters().at(0);
+    ov::element::Type_t dtype = ov::element::Type_t::f32;
+    ov::Shape ovShape;
+    ovShape.emplace_back(1);
+    ovShape.emplace_back(100000);
+    std::map<std::string, ov::PartialShape> inputShapes;
+    inputShapes[inputName] = ovShape;
+    model->reshape(inputShapes);
+    auto cpuCompiledModel = core.compile_model(model, "CPU");
+    std::vector<ov::InferRequest> inferRequests;
+    SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
+    inferRequests.resize(0);
+    SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
+    inferRequests.reserve(2);
+    SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
+    //inferRequests.shrink_to_fit();
+    // we want to test workload when we increase number of infer requests vector during workload
+    // so we start with vector of 1, start workload on it
+    // then after 1s we start another thread which will add another infer request to the vector
+    // ideally we ensure that vector does realocate memory so it forces move of the objects inside it
+
+    // first write function that will be done in thread. It will get reference to inferRequests vector
+    // it will create ov::Tensor with passed dtype and ovShape. It will set the content of that vector to i-th
+    // so that we will check content of response each time. It will perform workload until it gets signal by future
+    auto loadFunction = [&cpuCompiledModel, &inferRequests, inputName, dtype, ovShape](size_t i, std::future<void> stopSignal) {
+        SPDLOG_INFO("Starting loadFunction:{}", i);
+        inferRequests.emplace_back(cpuCompiledModel.create_infer_request());
+        SPDLOG_INFO("Starting shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
+        inferRequests.shrink_to_fit();
+        SPDLOG_INFO("After shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
+        auto& inferRequest = inferRequests[i];
+        // prepare ov::Tensor data
+        ov::Tensor inputOvTensor(dtype, ovShape);
+        ov::Tensor outputOvTensor(dtype, ovShape);
+        for (size_t j = 0; j < 100000; j++) {
+            reinterpret_cast<float*>(inputOvTensor.data())[j] = i;
+            reinterpret_cast<float*>(outputOvTensor.data())[j] = (i + 1);
+            if (j<10 || j > 99990) {
+            SPDLOG_ERROR("input data: {}, expected: {}, i:{}, j:{}", reinterpret_cast<float*>(inputOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j);
+            }
+        }
+
+        // now while loop that stops only if we get stop signal
+        SPDLOG_INFO("Running infer request {}", i);
+        size_t k = 0;
+        while (stopSignal.wait_for(std::chrono::milliseconds(0)) == std::future_status::timeout) {
+            inferRequest.set_tensor(inputName, inputOvTensor);
+            inferRequest.start_async();
+            inferRequest.wait();
+            auto outOvTensor = inferRequest.get_tensor("a");
+            for (size_t j = 0; j < 100000; j++) {
+                 if (j<10 || j > 99990) {
+                 SPDLOG_ERROR("infReqRef:{} infReq[i]:{} outTensor data: {}, expected: {} i:{} j:{} k:{}", (void*)(&inferRequest), (void*)(&inferRequests[i]),reinterpret_cast<float*>(outOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j , k);
+                 }
+            }
+            ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
+            ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
+            k++;
+        }
+    };
+    size_t n = 2;
+    std::vector<std::promise<void>> stopSignal(n);
+    std::vector<std::thread> threads;
+    threads.reserve(n);
+    for (size_t i = 0; i < n; ++i) {
+        // create thread that will run loadFunction
+        SPDLOG_INFO("Starting thread {}", i);
+        threads.emplace_back(loadFunction, i, stopSignal[i].get_future());
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+    std::this_thread::sleep_for(std::chrono::seconds(5));
+    for (size_t i = 0; i < n; ++i) {
+        // create thread that will run loadFunction
+        SPDLOG_INFO("Stopping thread {}", i);
+        stopSignal[i].set_value();
+    }
+    for (size_t i = 0; i < n; ++i) {
+        SPDLOG_INFO("Joining thread {}", i);
+        threads[i].join();
+    }
+}
 TEST_F(OpenVINO, ResetOutputTensors) {
     Core core;
     auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");

Original file line number	Diff line number	Diff line change
`@@ -2910,6 +2910,7 @@ cc_library(`
`2910`	`2910`	`"@com_google_googletest//:gtest",`
`2911`	`2911`	`],`
`2912`	`2912`	`copts = COPTS_TESTS,`
	`2913`	`+ local_defines = COMMON_LOCAL_DEFINES,`
`2913`	`2914`	`linkopts = [],`
`2914`	`2915`	`)`
`2915`	`2916`	`cc_library(`