Skip to content

Commit 755ac39

Browse files
author
xiexun
committed
Fix crash by improving lock handling
1 parent 052b6c3 commit 755ac39

File tree

5 files changed

+179
-15
lines changed

5 files changed

+179
-15
lines changed

orbbec_camera/include/orbbec_camera/d2c_viewer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,9 @@ class D2CViewer {
4242
sensor_msgs::msg::Image>;
4343
std::shared_ptr<message_filters::Synchronizer<MySyncPolicy>> sync_;
4444
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr d2c_viewer_pub_;
45+
46+
// Add thread safety
47+
std::mutex callback_mutex_;
48+
std::atomic<bool> is_active_{true};
4549
};
4650
} // namespace orbbec_camera

orbbec_camera/include/orbbec_camera/ob_camera_node_driver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class OBCameraNodeDriver : public rclcpp::Node {
8787
std::shared_ptr<ob::DeviceInfo> device_info_ = nullptr;
8888
std::atomic_bool is_alive_{false};
8989
std::atomic_bool device_connected_{false};
90+
std::atomic_bool device_connecting_{false};
9091
std::string serial_number_;
9192
std::string device_unique_id_;
9293
std::string usb_port_;

orbbec_camera/src/d2c_viewer.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
namespace orbbec_camera {
2929
D2CViewer::D2CViewer(rclcpp::Node* const node, rmw_qos_profile_t rgb_qos,
3030
rmw_qos_profile_t depth_qos)
31-
: node_(node), logger_(rclcpp::get_logger("d2c_viewer")) {
31+
: node_(node), logger_(rclcpp::get_logger("d2c_viewer")), is_active_(true) {
3232
rgb_sub_ = std::make_shared<message_filters::Subscriber<sensor_msgs::msg::Image>>(
3333
node_, "color/image_raw", rgb_qos);
3434
depth_sub_ = std::make_shared<message_filters::Subscriber<sensor_msgs::msg::Image>>(
@@ -43,10 +43,36 @@ D2CViewer::D2CViewer(rclcpp::Node* const node, rmw_qos_profile_t rgb_qos,
4343
d2c_viewer_pub_ =
4444
node_->create_publisher<sensor_msgs::msg::Image>("depth_to_color/image_raw", rclcpp::QoS(1));
4545
}
46-
D2CViewer::~D2CViewer() = default;
46+
47+
D2CViewer::~D2CViewer() {
48+
is_active_.store(false);
49+
50+
// Safely shut down subscribers and synchronizer
51+
{
52+
std::lock_guard<std::mutex> lock(callback_mutex_);
53+
if (sync_) {
54+
sync_.reset();
55+
}
56+
if (rgb_sub_) {
57+
rgb_sub_.reset();
58+
}
59+
if (depth_sub_) {
60+
depth_sub_.reset();
61+
}
62+
if (d2c_viewer_pub_) {
63+
d2c_viewer_pub_.reset();
64+
}
65+
}
66+
}
4767

4868
void D2CViewer::messageCallback(const sensor_msgs::msg::Image::ConstSharedPtr& rgb_msg,
4969
const sensor_msgs::msg::Image::ConstSharedPtr& depth_msg) {
70+
71+
std::lock_guard<std::mutex> lock(callback_mutex_);
72+
if (!is_active_.load()) {
73+
return;
74+
}
75+
5076
if (rgb_msg->width != depth_msg->width || rgb_msg->height != depth_msg->height) {
5177
RCLCPP_ERROR(logger_, "rgb and depth image size not match(%d, %d) vs (%d, %d)", rgb_msg->width,
5278
rgb_msg->height, depth_msg->width, depth_msg->height);

orbbec_camera/src/ob_camera_node.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ OBCameraNode::~OBCameraNode() noexcept { clean(); }
113113
void OBCameraNode::rebootDevice() {
114114
RCLCPP_INFO_STREAM(logger_, "Do clean before rebooting device");
115115
malloc_trim(0);
116-
stopStreams();
116+
clean();
117117
malloc_trim(0);
118118
std::lock_guard<decltype(device_lock_)> lock(device_lock_);
119119
RCLCPP_INFO_STREAM(logger_, "Reboot device");
@@ -125,11 +125,10 @@ void OBCameraNode::rebootDevice() {
125125
}
126126

127127
void OBCameraNode::clean() noexcept {
128-
std::lock_guard<decltype(device_lock_)> lock(device_lock_);
129-
RCLCPP_WARN_STREAM(logger_, "Do destroy ~OBCameraNode");
128+
// Set running flag to false first to signal all operations to stop
130129
is_running_.store(false);
131130

132-
// Stop diagnostic timer and updater first to prevent access to disconnected device
131+
// Stop diagnostic timer and updater first BEFORE acquiring device_lock to prevent deadlock
133132
try {
134133
if (diagnostic_timer_) {
135134
diagnostic_timer_->cancel();
@@ -142,6 +141,10 @@ void OBCameraNode::clean() noexcept {
142141
// Ignore exceptions during diagnostic cleanup
143142
}
144143

144+
// Now acquire the device lock for the rest of the cleanup
145+
std::lock_guard<decltype(device_lock_)> lock(device_lock_);
146+
RCLCPP_WARN_STREAM(logger_, "Do destroy ~OBCameraNode");
147+
145148
RCLCPP_WARN_STREAM(logger_, "Stop tf thread");
146149
try {
147150
if (tf_thread_ && tf_thread_->joinable()) {

orbbec_camera/src/ob_camera_node_driver.cpp

Lines changed: 139 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,13 @@ void OBCameraNodeDriver::onDeviceConnected(const std::shared_ptr<ob::DeviceList>
239239
if (device_list->getCount() == 0) {
240240
return;
241241
}
242+
243+
// Check if device is already connected or connecting
244+
if (device_connected_.load() || device_connecting_.load()) {
245+
RCLCPP_DEBUG_STREAM(logger_, "onDeviceConnected: device already connected or connecting");
246+
return;
247+
}
248+
242249
if (!device_) {
243250
startDevice(device_list);
244251
}
@@ -250,17 +257,28 @@ void OBCameraNodeDriver::onDeviceDisconnected(const std::shared_ptr<ob::DeviceLi
250257
return;
251258
}
252259
RCLCPP_INFO_STREAM(logger_, "onDeviceDisconnected");
260+
261+
// Check if device connection/initialization is in progress
262+
if (device_connecting_.load()) {
263+
RCLCPP_INFO_STREAM(logger_, "onDeviceDisconnected: device connection/initialization in progress, ignoring disconnect event");
264+
return;
265+
}
266+
267+
std::unique_lock<decltype(reset_device_mutex_)> reset_device_lock(reset_device_mutex_);
268+
269+
std::lock_guard<decltype(device_lock_)> lock(device_lock_);
270+
253271
for (size_t i = 0; i < device_list->getCount(); i++) {
254272
std::string uid = device_list->getUid(i);
255273
std::string serial_number = device_list->getSerialNumber(i);
256-
std::lock_guard<decltype(device_lock_)> lock(device_lock_);
257274
RCLCPP_INFO_STREAM(logger_, "device with " << uid << " disconnected");
258275
if (uid == device_unique_id_ || serial_number_ == serial_number) {
259276
RCLCPP_INFO_STREAM(logger_,
260-
"device with " << uid << " disconnected, notify reset device thread.");
261-
std::unique_lock<decltype(reset_device_mutex_)> reset_device_lock(reset_device_mutex_);
277+
"device with " << uid << " disconnected, notify reset device thread 1.");
262278
reset_device_flag_ = true;
263279
reset_device_cond_.notify_all();
280+
RCLCPP_INFO_STREAM(logger_,
281+
"device with " << uid << " disconnected, notify reset device thread 2.");
264282
break;
265283
}
266284
}
@@ -294,6 +312,26 @@ void OBCameraNodeDriver::checkConnectTimer() {
294312

295313
void OBCameraNodeDriver::queryDevice() {
296314
while (is_alive_ && rclcpp::ok() && !device_connected_.load()) {
315+
// Check if device reset is in progress before attempting to connect
316+
{
317+
std::unique_lock<decltype(reset_device_mutex_)> reset_lock(reset_device_mutex_);
318+
if (reset_device_flag_) {
319+
RCLCPP_INFO_STREAM(logger_, "queryDevice: device reset in progress, waiting...");
320+
reset_device_cond_.wait(reset_lock, [this]() { return !reset_device_flag_ || !is_alive_ || !rclcpp::ok(); });
321+
if (!is_alive_ || !rclcpp::ok()) {
322+
return;
323+
}
324+
RCLCPP_INFO_STREAM(logger_, "queryDevice: device reset completed, continuing connection");
325+
}
326+
}
327+
328+
// Check if connection is already in progress
329+
if (device_connecting_.load()) {
330+
RCLCPP_DEBUG_STREAM(logger_, "queryDevice: device connection already in progress, waiting...");
331+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
332+
continue;
333+
}
334+
297335
if (!enumerate_net_device_ && !net_device_ip_.empty() && net_device_port_ != 0) {
298336
connectNetDevice(net_device_ip_, net_device_port_);
299337
} else {
@@ -306,6 +344,9 @@ void OBCameraNodeDriver::queryDevice() {
306344
}
307345
startDevice(device_list);
308346
}
347+
348+
// Add a small delay to prevent tight loop
349+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
309350
}
310351
}
311352

@@ -330,11 +371,52 @@ void OBCameraNodeDriver::resetDevice() {
330371
}
331372
}
332373

333-
// Now reset the objects
334-
ob_camera_node_.reset();
335-
device_.reset();
336-
device_info_.reset();
374+
// Mark device as disconnected immediately to prevent other threads from accessing it
337375
device_connected_ = false;
376+
device_connecting_ = false; // Clear connecting flag
377+
378+
// Reset objects in order, with additional safety checks
379+
if (ob_camera_node_) {
380+
try {
381+
RCLCPP_INFO_STREAM(logger_, "Resetting ob_camera_node_");
382+
ob_camera_node_.reset();
383+
RCLCPP_INFO_STREAM(logger_, "ob_camera_node_ reset completed");
384+
} catch (...) {
385+
RCLCPP_WARN_STREAM(logger_, "Exception during ob_camera_node reset");
386+
}
387+
}
388+
389+
// Allow more time for internal SDK cleanup
390+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
391+
392+
if (device_) {
393+
try {
394+
RCLCPP_INFO_STREAM(logger_, "Resetting device_");
395+
// Force free any idle memory before device reset
396+
if (ctx_) {
397+
try {
398+
ctx_->freeIdleMemory();
399+
} catch (...) {
400+
// Ignore exceptions during memory cleanup
401+
}
402+
}
403+
device_.reset();
404+
RCLCPP_INFO_STREAM(logger_, "device_ reset completed");
405+
} catch (...) {
406+
RCLCPP_WARN_STREAM(logger_, "Exception during device reset");
407+
}
408+
}
409+
410+
if (device_info_) {
411+
try {
412+
RCLCPP_INFO_STREAM(logger_, "Resetting device_info_");
413+
device_info_.reset();
414+
RCLCPP_INFO_STREAM(logger_, "device_info_ reset completed");
415+
} catch (...) {
416+
RCLCPP_WARN_STREAM(logger_, "Exception during device_info reset");
417+
}
418+
}
419+
338420
device_unique_id_.clear();
339421
}
340422
reset_device_flag_ = false;
@@ -620,19 +702,57 @@ void OBCameraNodeDriver::connectNetDevice(const std::string &net_device_ip, int
620702
RCLCPP_ERROR_STREAM(logger_, "Invalid net device ip or port");
621703
return;
622704
}
705+
706+
// Check if already connecting
707+
bool expected = false;
708+
if (!device_connecting_.compare_exchange_strong(expected, true)) {
709+
RCLCPP_DEBUG_STREAM(logger_, "connectNetDevice: connection already in progress");
710+
return;
711+
}
712+
713+
// Use RAII to ensure connecting flag is cleared
714+
std::shared_ptr<int> connecting_guard(nullptr, [this](int *) {
715+
device_connecting_.store(false);
716+
});
717+
623718
std::this_thread::sleep_for(std::chrono::milliseconds(connection_delay_));
624719
auto device = ctx_->createNetDevice(net_device_ip.c_str(), net_device_port);
625720
if (device == nullptr) {
626721
RCLCPP_ERROR_STREAM(logger_, "Failed to connect to net device " << net_device_ip);
627722
return;
628723
}
629-
initializeDevice(device);
724+
try {
725+
initializeDevice(device);
726+
if (!device_connected_) {
727+
RCLCPP_ERROR_STREAM(logger_, "Failed to initialize net device " << net_device_ip);
728+
}
729+
} catch (const std::exception &e) {
730+
RCLCPP_ERROR_STREAM(logger_, "Exception during net device initialization: " << e.what());
731+
device_connected_ = false;
732+
} catch (...) {
733+
RCLCPP_ERROR_STREAM(logger_, "Unknown exception during net device initialization");
734+
device_connected_ = false;
735+
}
630736
}
631737

632738
void OBCameraNodeDriver::startDevice(const std::shared_ptr<ob::DeviceList> &list) {
633-
if (device_connected_) {
739+
if (device_connected_.load()) {
740+
return;
741+
}
742+
743+
// Try to set connecting flag atomically
744+
bool expected = false;
745+
if (!device_connecting_.compare_exchange_strong(expected, true)) {
746+
RCLCPP_DEBUG_STREAM(logger_, "startDevice: connection already in progress by another thread");
634747
return;
635748
}
749+
750+
// Use RAII to ensure connecting flag is cleared
751+
std::shared_ptr<int> connecting_guard(nullptr, [this](int *) {
752+
device_connecting_.store(false);
753+
RCLCPP_DEBUG_STREAM(logger_, "startDevice: connecting flag cleared");
754+
});
755+
636756
if (list->getCount() == 0) {
637757
RCLCPP_WARN(logger_, "No device found");
638758
return;
@@ -687,6 +807,16 @@ void OBCameraNodeDriver::startDevice(const std::shared_ptr<ob::DeviceList> &list
687807
time_cost = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
688808
RCLCPP_INFO_STREAM(logger_, "Initialize device cost " << time_cost.count() << " ms");
689809

810+
if(firmware_update_success_)
811+
{
812+
firmware_update_success_ = false;
813+
device_connected_ = false;
814+
std::unique_lock<decltype(reset_device_mutex_)> reset_device_lock(reset_device_mutex_);
815+
reset_device_flag_ = true;
816+
reset_device_cond_.notify_all();
817+
return;
818+
}
819+
690820
auto pid = device->getDeviceInfo()->getPid();
691821
if (GEMINI_335LG_PID == pid) {
692822
ob_camera_node_->startGmslTrigger();

0 commit comments

Comments
 (0)