Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2910,6 +2910,7 @@ cc_library(
"@com_google_googletest//:gtest",
],
copts = COPTS_TESTS,
local_defines = COMMON_LOCAL_DEFINES,
linkopts = [],
)
cc_library(
Expand Down
2 changes: 1 addition & 1 deletion src/custom_nodes/common/buffersqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BuffersQueue::BuffersQueue(size_t singleBufferSize, int streamsLength) :
size(singleBufferSize * streamsLength),
memoryPool(std::make_unique<char[]>(size)) {
for (int i = 0; i < streamsLength; ++i) {
inferRequests.push_back(memoryPool.get() + i * singleBufferSize);
inferRequests.insert({i, memoryPool.get() + i * singleBufferSize});
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/ovinferrequestsqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ OVInferRequestsQueue::OVInferRequestsQueue(ov::CompiledModel& compiledModel, int
for (int i = 0; i < streamsLength; ++i) {
streams[i] = i;
OV_LOGGER("ov::CompiledModel: {} compiledModel.create_infer_request()", reinterpret_cast<void*>(&compiledModel));
inferRequests.push_back(compiledModel.create_infer_request());
inferRequests.insert({i, compiledModel.create_infer_request()});
}
}
} // namespace ovms
3 changes: 2 additions & 1 deletion src/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <thread>
#include <utility>
#include <vector>
#include <unordered_map>

// #include "profiler.hpp"

Expand Down Expand Up @@ -135,7 +136,7 @@ class Queue {
/**
*
*/
std::vector<T> inferRequests;
std::unordered_map<int, T> inferRequests;
std::queue<std::promise<int>> promises;
};
} // namespace ovms
87 changes: 86 additions & 1 deletion src/test/openvino_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ TEST_F(OpenVINO, CallbacksTest) {
Core core;
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
const std::string inputName{"b"};
auto input = model->get_parameters().at(0);
ov::element::Type_t dtype = ov::element::Type_t::f32;
ov::Shape ovShape;
ovShape.emplace_back(1);
Expand Down Expand Up @@ -92,6 +91,92 @@ TEST_F(OpenVINO, CallbacksTest) {
EXPECT_TRUE(outOvTensor.is<ov::Tensor>());
EXPECT_TRUE(outAutoTensor.is<ov::Tensor>());
}
TEST_F(OpenVINO, StressInferTest) {
GTEST_SKIP();
Core core;
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
const std::string inputName{"b"};
auto input = model->get_parameters().at(0);
ov::element::Type_t dtype = ov::element::Type_t::f32;
ov::Shape ovShape;
ovShape.emplace_back(1);
ovShape.emplace_back(100000);
std::map<std::string, ov::PartialShape> inputShapes;
inputShapes[inputName] = ovShape;
model->reshape(inputShapes);
auto cpuCompiledModel = core.compile_model(model, "CPU");
std::vector<ov::InferRequest> inferRequests;
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
inferRequests.resize(0);
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
inferRequests.reserve(2);
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
// inferRequests.shrink_to_fit();
// we want to test workload when we increase number of infer requests vector during workload
// so we start with vector of 1, start workload on it
// then after 1s we start another thread which will add another infer request to the vector
// ideally we ensure that vector does realocate memory so it forces move of the objects inside it

// first write function that will be done in thread. It will get reference to inferRequests vector
// it will create ov::Tensor with passed dtype and ovShape. It will set the content of that vector to i-th
// so that we will check content of response each time. It will perform workload until it gets signal by future
auto loadFunction = [&cpuCompiledModel, &inferRequests, inputName, dtype, ovShape](size_t i, std::future<void> stopSignal) {
SPDLOG_INFO("Starting loadFunction:{}", i);
inferRequests.emplace_back(cpuCompiledModel.create_infer_request());
SPDLOG_INFO("Starting shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
inferRequests.shrink_to_fit();
SPDLOG_INFO("After shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
auto& inferRequest = inferRequests[i];
// prepare ov::Tensor data
ov::Tensor inputOvTensor(dtype, ovShape);
ov::Tensor outputOvTensor(dtype, ovShape);
for (size_t j = 0; j < 100000; j++) {
reinterpret_cast<float*>(inputOvTensor.data())[j] = i;
reinterpret_cast<float*>(outputOvTensor.data())[j] = (i + 1);
if (j < 10 || j > 99990) {
SPDLOG_ERROR("input data: {}, expected: {}, i:{}, j:{}", reinterpret_cast<float*>(inputOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j);
}
}

// now while loop that stops only if we get stop signal
SPDLOG_INFO("Running infer request {}", i);
size_t k = 0;
while (stopSignal.wait_for(std::chrono::milliseconds(0)) == std::future_status::timeout) {
inferRequest.set_tensor(inputName, inputOvTensor);
inferRequest.start_async();
inferRequest.wait();
auto outOvTensor = inferRequest.get_tensor("a");
for (size_t j = 0; j < 100000; j++) {
if (j < 10 || j > 99990) {
SPDLOG_ERROR("infReqRef:{} infReq[i]:{} outTensor data: {}, expected: {} i:{} j:{} k:{}", (void*)(&inferRequest), (void*)(&inferRequests[i]), reinterpret_cast<float*>(outOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j, k);
}
}
ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
k++;
}
};
size_t n = 2;
std::vector<std::promise<void>> stopSignal(n);
std::vector<std::thread> threads;
threads.reserve(n);
for (size_t i = 0; i < n; ++i) {
// create thread that will run loadFunction
SPDLOG_INFO("Starting thread {}", i);
threads.emplace_back(loadFunction, i, stopSignal[i].get_future());
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
std::this_thread::sleep_for(std::chrono::seconds(5));
for (size_t i = 0; i < n; ++i) {
// create thread that will run loadFunction
SPDLOG_INFO("Stopping thread {}", i);
stopSignal[i].set_value();
}
for (size_t i = 0; i < n; ++i) {
SPDLOG_INFO("Joining thread {}", i);
threads[i].join();
}
}
TEST_F(OpenVINO, ResetOutputTensors) {
Core core;
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
Expand Down