Skip to content

Commit a9dd1ca

Browse files
committed
Tryout
1 parent 5179adc commit a9dd1ca

File tree

5 files changed

+105
-6
lines changed

5 files changed

+105
-6
lines changed

src/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2910,6 +2910,7 @@ cc_library(
29102910
"@com_google_googletest//:gtest",
29112911
],
29122912
copts = COPTS_TESTS,
2913+
local_defines = COMMON_LOCAL_DEFINES,
29132914
linkopts = [],
29142915
)
29152916
cc_library(

src/ovinferrequestsqueue.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121

2222
namespace ovms {
2323
OVInferRequestsQueue::OVInferRequestsQueue(ov::CompiledModel& compiledModel, int streamsLength) :
24-
Queue(streamsLength) {
24+
Queue(streamsLength),
25+
compiledModel(compiledModel) {
2526
for (int i = 0; i < streamsLength; ++i) {
2627
streams[i] = i;
2728
OV_LOGGER("ov::CompiledModel: {} compiledModel.create_infer_request()", reinterpret_cast<void*>(&compiledModel));

src/ovinferrequestsqueue.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ namespace ovms {
2424
class OVInferRequestsQueue : public Queue<ov::InferRequest> {
2525
public:
2626
OVInferRequestsQueue(ov::CompiledModel& compiledModel, int streamsLength);
27+
ov::CompiledModel& compiledModel;
2728
};
2829

2930
} // namespace ovms

src/queue.hpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ class Queue {
5555
return idleStreamFuture;
5656
}
5757

58+
void extendQueue() {
59+
if (!constructFunc.has_value()) {
60+
return;
61+
}
62+
size_t streamSize = streams.size();
63+
streams.push_back(streamSize - 1);
64+
inferRequests.reserve(streams.size());
65+
inferRequests.push_back(constructFunc.value()());
66+
}
67+
5868
std::optional<int> tryToGetIdleStream() {
5969
// OVMS_PROFILE_FUNCTION();
6070
int value;
@@ -69,7 +79,6 @@ class Queue {
6979
return value;
7080
}
7181
}
72-
7382
/**
7483
* @brief Release stream after execution
7584
*/
@@ -95,13 +104,16 @@ class Queue {
95104
/**
96105
* @brief Constructor with initialization
97106
*/
98-
Queue(int streamsLength) :
99-
streams(streamsLength),
107+
// change constructor so that it can also accept lambda which returns T. This lambda
108+
// is optional but if exists it will be used to construct T objects
109+
Queue(int streamsLength, std::optional<std::function<T()>> constructFunc = std::nullopt) : streams(streamsLength),
110+
constructFunc(constructFunc),
100111
front_idx{0},
101112
back_idx{0} {
102113
for (int i = 0; i < streamsLength; ++i) {
103114
streams[i] = i;
104115
}
116+
streams.reserve(50);
105117
}
106118

107119
/**
@@ -116,7 +128,7 @@ class Queue {
116128
* @brief Vector representing circular buffer for infer queue
117129
*/
118130
std::vector<int> streams;
119-
131+
std::optional<std::function<T()>> constructFunc = std::nullopt;
120132
/**
121133
* @brief Index of the front of the idle streams list
122134
*/

src/test/openvino_tests.cpp

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ TEST_F(OpenVINO, CallbacksTest) {
5050
Core core;
5151
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
5252
const std::string inputName{"b"};
53-
auto input = model->get_parameters().at(0);
5453
ov::element::Type_t dtype = ov::element::Type_t::f32;
5554
ov::Shape ovShape;
5655
ovShape.emplace_back(1);
@@ -92,6 +91,91 @@ TEST_F(OpenVINO, CallbacksTest) {
9291
EXPECT_TRUE(outOvTensor.is<ov::Tensor>());
9392
EXPECT_TRUE(outAutoTensor.is<ov::Tensor>());
9493
}
94+
TEST_F(OpenVINO, StressInferTest) {
95+
Core core;
96+
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");
97+
const std::string inputName{"b"};
98+
auto input = model->get_parameters().at(0);
99+
ov::element::Type_t dtype = ov::element::Type_t::f32;
100+
ov::Shape ovShape;
101+
ovShape.emplace_back(1);
102+
ovShape.emplace_back(100000);
103+
std::map<std::string, ov::PartialShape> inputShapes;
104+
inputShapes[inputName] = ovShape;
105+
model->reshape(inputShapes);
106+
auto cpuCompiledModel = core.compile_model(model, "CPU");
107+
std::vector<ov::InferRequest> inferRequests;
108+
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
109+
inferRequests.resize(0);
110+
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
111+
inferRequests.reserve(2);
112+
SPDLOG_INFO("Starting vector size:{}, vector capacity:{}", inferRequests.size(), inferRequests.capacity());
113+
//inferRequests.shrink_to_fit();
114+
// we want to test workload when we increase number of infer requests vector during workload
115+
// so we start with vector of 1, start workload on it
116+
// then after 1s we start another thread which will add another infer request to the vector
117+
// ideally we ensure that vector does realocate memory so it forces move of the objects inside it
118+
119+
// first write function that will be done in thread. It will get reference to inferRequests vector
120+
// it will create ov::Tensor with passed dtype and ovShape. It will set the content of that vector to i-th
121+
// so that we will check content of response each time. It will perform workload until it gets signal by future
122+
auto loadFunction = [&cpuCompiledModel, &inferRequests, inputName, dtype, ovShape](size_t i, std::future<void> stopSignal) {
123+
SPDLOG_INFO("Starting loadFunction:{}", i);
124+
inferRequests.emplace_back(cpuCompiledModel.create_infer_request());
125+
SPDLOG_INFO("Starting shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
126+
inferRequests.shrink_to_fit();
127+
SPDLOG_INFO("After shrinkToFit:{} vector size:{}, vector capacity:{}", i, inferRequests.size(), inferRequests.capacity());
128+
auto& inferRequest = inferRequests[i];
129+
// prepare ov::Tensor data
130+
ov::Tensor inputOvTensor(dtype, ovShape);
131+
ov::Tensor outputOvTensor(dtype, ovShape);
132+
for (size_t j = 0; j < 100000; j++) {
133+
reinterpret_cast<float*>(inputOvTensor.data())[j] = i;
134+
reinterpret_cast<float*>(outputOvTensor.data())[j] = (i + 1);
135+
if (j<10 || j > 99990) {
136+
SPDLOG_ERROR("input data: {}, expected: {}, i:{}, j:{}", reinterpret_cast<float*>(inputOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j);
137+
}
138+
}
139+
140+
// now while loop that stops only if we get stop signal
141+
SPDLOG_INFO("Running infer request {}", i);
142+
size_t k = 0;
143+
while (stopSignal.wait_for(std::chrono::milliseconds(0)) == std::future_status::timeout) {
144+
inferRequest.set_tensor(inputName, inputOvTensor);
145+
inferRequest.start_async();
146+
inferRequest.wait();
147+
auto outOvTensor = inferRequest.get_tensor("a");
148+
for (size_t j = 0; j < 100000; j++) {
149+
if (j<10 || j > 99990) {
150+
SPDLOG_ERROR("infReqRef:{} infReq[i]:{} outTensor data: {}, expected: {} i:{} j:{} k:{}", (void*)(&inferRequest), (void*)(&inferRequests[i]),reinterpret_cast<float*>(outOvTensor.data())[j], reinterpret_cast<float*>(outputOvTensor.data())[j], i, j , k);
151+
}
152+
}
153+
ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
154+
ASSERT_EQ(0, std::memcmp(outOvTensor.data(), outputOvTensor.data(), outOvTensor.get_byte_size())) << "i: " << i;
155+
k++;
156+
}
157+
};
158+
size_t n = 2;
159+
std::vector<std::promise<void>> stopSignal(n);
160+
std::vector<std::thread> threads;
161+
threads.reserve(n);
162+
for (size_t i = 0; i < n; ++i) {
163+
// create thread that will run loadFunction
164+
SPDLOG_INFO("Starting thread {}", i);
165+
threads.emplace_back(loadFunction, i, stopSignal[i].get_future());
166+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
167+
}
168+
std::this_thread::sleep_for(std::chrono::seconds(5));
169+
for (size_t i = 0; i < n; ++i) {
170+
// create thread that will run loadFunction
171+
SPDLOG_INFO("Stopping thread {}", i);
172+
stopSignal[i].set_value();
173+
}
174+
for (size_t i = 0; i < n; ++i) {
175+
SPDLOG_INFO("Joining thread {}", i);
176+
threads[i].join();
177+
}
178+
}
95179
TEST_F(OpenVINO, ResetOutputTensors) {
96180
Core core;
97181
auto model = core.read_model("/ovms/src/test/dummy/1/dummy.xml");

0 commit comments

Comments
 (0)