@@ -50,7 +50,6 @@ TEST_F(OpenVINO, CallbacksTest) {
50
50
Core core;
51
51
auto model = core.read_model (" /ovms/src/test/dummy/1/dummy.xml" );
52
52
const std::string inputName{" b" };
53
- auto input = model->get_parameters ().at (0 );
54
53
ov::element::Type_t dtype = ov::element::Type_t::f32 ;
55
54
ov::Shape ovShape;
56
55
ovShape.emplace_back (1 );
@@ -92,6 +91,91 @@ TEST_F(OpenVINO, CallbacksTest) {
92
91
EXPECT_TRUE (outOvTensor.is <ov::Tensor>());
93
92
EXPECT_TRUE (outAutoTensor.is <ov::Tensor>());
94
93
}
94
+ TEST_F (OpenVINO, StressInferTest) {
95
+ Core core;
96
+ auto model = core.read_model (" /ovms/src/test/dummy/1/dummy.xml" );
97
+ const std::string inputName{" b" };
98
+ auto input = model->get_parameters ().at (0 );
99
+ ov::element::Type_t dtype = ov::element::Type_t::f32 ;
100
+ ov::Shape ovShape;
101
+ ovShape.emplace_back (1 );
102
+ ovShape.emplace_back (100000 );
103
+ std::map<std::string, ov::PartialShape> inputShapes;
104
+ inputShapes[inputName] = ovShape;
105
+ model->reshape (inputShapes);
106
+ auto cpuCompiledModel = core.compile_model (model, " CPU" );
107
+ std::vector<ov::InferRequest> inferRequests;
108
+ SPDLOG_INFO (" Starting vector size:{}, vector capacity:{}" , inferRequests.size (), inferRequests.capacity ());
109
+ inferRequests.resize (0 );
110
+ SPDLOG_INFO (" Starting vector size:{}, vector capacity:{}" , inferRequests.size (), inferRequests.capacity ());
111
+ inferRequests.reserve (2 );
112
+ SPDLOG_INFO (" Starting vector size:{}, vector capacity:{}" , inferRequests.size (), inferRequests.capacity ());
113
+ // inferRequests.shrink_to_fit();
114
+ // we want to test workload when we increase number of infer requests vector during workload
115
+ // so we start with vector of 1, start workload on it
116
+ // then after 1s we start another thread which will add another infer request to the vector
117
+ // ideally we ensure that vector does realocate memory so it forces move of the objects inside it
118
+
119
+ // first write function that will be done in thread. It will get reference to inferRequests vector
120
+ // it will create ov::Tensor with passed dtype and ovShape. It will set the content of that vector to i-th
121
+ // so that we will check content of response each time. It will perform workload until it gets signal by future
122
+ auto loadFunction = [&cpuCompiledModel, &inferRequests, inputName, dtype, ovShape](size_t i, std::future<void > stopSignal) {
123
+ SPDLOG_INFO (" Starting loadFunction:{}" , i);
124
+ inferRequests.emplace_back (cpuCompiledModel.create_infer_request ());
125
+ SPDLOG_INFO (" Starting shrinkToFit:{} vector size:{}, vector capacity:{}" , i, inferRequests.size (), inferRequests.capacity ());
126
+ inferRequests.shrink_to_fit ();
127
+ SPDLOG_INFO (" After shrinkToFit:{} vector size:{}, vector capacity:{}" , i, inferRequests.size (), inferRequests.capacity ());
128
+ auto & inferRequest = inferRequests[i];
129
+ // prepare ov::Tensor data
130
+ ov::Tensor inputOvTensor (dtype, ovShape);
131
+ ov::Tensor outputOvTensor (dtype, ovShape);
132
+ for (size_t j = 0 ; j < 100000 ; j++) {
133
+ reinterpret_cast <float *>(inputOvTensor.data ())[j] = i;
134
+ reinterpret_cast <float *>(outputOvTensor.data ())[j] = (i + 1 );
135
+ if (j<10 || j > 99990 ) {
136
+ SPDLOG_ERROR (" input data: {}, expected: {}, i:{}, j:{}" , reinterpret_cast <float *>(inputOvTensor.data ())[j], reinterpret_cast <float *>(outputOvTensor.data ())[j], i, j);
137
+ }
138
+ }
139
+
140
+ // now while loop that stops only if we get stop signal
141
+ SPDLOG_INFO (" Running infer request {}" , i);
142
+ size_t k = 0 ;
143
+ while (stopSignal.wait_for (std::chrono::milliseconds (0 )) == std::future_status::timeout) {
144
+ inferRequest.set_tensor (inputName, inputOvTensor);
145
+ inferRequest.start_async ();
146
+ inferRequest.wait ();
147
+ auto outOvTensor = inferRequest.get_tensor (" a" );
148
+ for (size_t j = 0 ; j < 100000 ; j++) {
149
+ if (j<10 || j > 99990 ) {
150
+ SPDLOG_ERROR (" infReqRef:{} infReq[i]:{} outTensor data: {}, expected: {} i:{} j:{} k:{}" , (void *)(&inferRequest), (void *)(&inferRequests[i]),reinterpret_cast <float *>(outOvTensor.data ())[j], reinterpret_cast <float *>(outputOvTensor.data ())[j], i, j , k);
151
+ }
152
+ }
153
+ ASSERT_EQ (0 , std::memcmp (outOvTensor.data (), outputOvTensor.data (), outOvTensor.get_byte_size ())) << " i: " << i;
154
+ ASSERT_EQ (0 , std::memcmp (outOvTensor.data (), outputOvTensor.data (), outOvTensor.get_byte_size ())) << " i: " << i;
155
+ k++;
156
+ }
157
+ };
158
+ size_t n = 2 ;
159
+ std::vector<std::promise<void >> stopSignal (n);
160
+ std::vector<std::thread> threads;
161
+ threads.reserve (n);
162
+ for (size_t i = 0 ; i < n; ++i) {
163
+ // create thread that will run loadFunction
164
+ SPDLOG_INFO (" Starting thread {}" , i);
165
+ threads.emplace_back (loadFunction, i, stopSignal[i].get_future ());
166
+ std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
167
+ }
168
+ std::this_thread::sleep_for (std::chrono::seconds (5 ));
169
+ for (size_t i = 0 ; i < n; ++i) {
170
+ // create thread that will run loadFunction
171
+ SPDLOG_INFO (" Stopping thread {}" , i);
172
+ stopSignal[i].set_value ();
173
+ }
174
+ for (size_t i = 0 ; i < n; ++i) {
175
+ SPDLOG_INFO (" Joining thread {}" , i);
176
+ threads[i].join ();
177
+ }
178
+ }
95
179
TEST_F (OpenVINO, ResetOutputTensors) {
96
180
Core core;
97
181
auto model = core.read_model (" /ovms/src/test/dummy/1/dummy.xml" );
0 commit comments