-
Notifications
You must be signed in to change notification settings - Fork 27
Fix sigabrt in module integration tests #421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
d7d4856
f9a273c
ee5c4e8
c1f3d55
57a9221
4d2902b
2316a02
d7e41da
62617f3
2fd5c29
c43c973
b7e6ecd
0e13a9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -129,9 +129,7 @@ RobotClient::~RobotClient() { | |
|
|
||
| void RobotClient::close() { | ||
| should_refresh_.store(false); | ||
| for (const std::shared_ptr<std::thread>& t : threads_) { | ||
| t->~thread(); | ||
| } | ||
| threads_.clear(); | ||
| stop_all(); | ||
| viam_channel_->close(); | ||
| } | ||
|
|
@@ -292,13 +290,12 @@ std::shared_ptr<RobotClient> RobotClient::with_channel(std::shared_ptr<ViamChann | |
| robot->refresh_interval_ = options.refresh_interval(); | ||
| robot->should_refresh_ = (robot->refresh_interval_ > 0); | ||
| if (robot->should_refresh_) { | ||
| const std::shared_ptr<std::thread> t = | ||
| std::make_shared<std::thread>(&RobotClient::refresh_every, robot); | ||
| auto t = std::thread(&RobotClient::refresh_every, robot); | ||
| // TODO(RSDK-1743): this was leaking, confirm that adding thread catching in | ||
| // close/destructor lets us shutdown gracefully. See also address sanitizer, | ||
| // UB sanitizer | ||
| t->detach(); | ||
| robot->threads_.push_back(t); | ||
| t.detach(); | ||
|
||
| robot->threads_.push_back(std::move(t)); | ||
| }; | ||
|
|
||
| robot->refresh(); | ||
|
|
@@ -318,11 +315,10 @@ std::shared_ptr<RobotClient> RobotClient::at_address(const std::string& address, | |
| std::shared_ptr<RobotClient> RobotClient::at_local_socket(const std::string& address, | ||
| const Options& options) { | ||
| const std::string addr = "unix://" + address; | ||
| const char* uri = addr.c_str(); | ||
| const std::shared_ptr<grpc::Channel> channel = | ||
| sdk::impl::create_viam_channel(uri, grpc::InsecureChannelCredentials()); | ||
| auto viam_channel = std::make_shared<ViamChannel>(channel, address.c_str(), nullptr); | ||
| std::shared_ptr<RobotClient> robot = RobotClient::with_channel(viam_channel, options); | ||
| sdk::impl::create_viam_channel(addr, grpc::InsecureChannelCredentials()); | ||
| std::shared_ptr<RobotClient> robot = | ||
|
||
| RobotClient::with_channel(std::make_shared<ViamChannel>(channel), options); | ||
| robot->should_close_channel_ = true; | ||
|
|
||
| return robot; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,18 +18,33 @@ | |
| namespace viam { | ||
| namespace sdk { | ||
|
|
||
| const std::shared_ptr<grpc::Channel>& ViamChannel::channel() const { | ||
| return channel_; | ||
| ViamChannel::RustDialData::RustDialData(const char* path_, void* runtime) | ||
| : path(path_), rust_runtime(runtime) {} | ||
|
|
||
| ViamChannel::RustDialData::RustDialData(RustDialData&& other) noexcept | ||
| : path(std::exchange(other.path, nullptr)), | ||
| rust_runtime(std::exchange(other.rust_runtime, nullptr)) {} | ||
|
|
||
| ViamChannel::RustDialData& ViamChannel::RustDialData::operator=(RustDialData&& other) noexcept { | ||
| path = std::exchange(other.path, nullptr); | ||
| rust_runtime = std::exchange(other.rust_runtime, nullptr); | ||
|
|
||
| return *this; | ||
| } | ||
|
||
|
|
||
| void ViamChannel::close() { | ||
| if (closed_) { | ||
| return; | ||
| } | ||
| closed_ = true; | ||
| free_string(path_); | ||
| free_rust_runtime(rust_runtime_); | ||
| }; | ||
| ViamChannel::RustDialData::~RustDialData() { | ||
| free_string(path); | ||
| free_rust_runtime(rust_runtime); | ||
| } | ||
|
|
||
| ViamChannel::ViamChannel(std::shared_ptr<grpc::Channel> channel, const char* path, void* runtime) | ||
| : channel_(std::move(channel)), rust_data_(RustDialData(path, runtime)) {} | ||
|
|
||
| ViamChannel::ViamChannel(std::shared_ptr<grpc::Channel> channel) : channel_(std::move(channel)) {} | ||
|
|
||
| ViamChannel::~ViamChannel() { | ||
| close(); | ||
| } | ||
|
|
||
| const std::string& Credentials::type() const { | ||
| return type_; | ||
|
|
@@ -39,9 +54,6 @@ const std::string& Credentials::payload() const { | |
| return payload_; | ||
| } | ||
|
|
||
| ViamChannel::ViamChannel(std::shared_ptr<grpc::Channel> channel, const char* path, void* runtime) | ||
| : channel_(std::move(channel)), path_(path), closed_(false), rust_runtime_(runtime) {} | ||
|
|
||
| DialOptions::DialOptions() = default; | ||
|
|
||
| DialOptions& DialOptions::set_credentials(boost::optional<Credentials> creds) { | ||
|
|
@@ -162,7 +174,15 @@ std::shared_ptr<ViamChannel> ViamChannel::dial(const char* uri, | |
| const std::unique_ptr<viam::robot::v1::RobotService::Stub> st = | ||
| viam::robot::v1::RobotService::NewStub(channel); | ||
| return std::make_shared<ViamChannel>(channel, socket_path, ptr); | ||
| }; | ||
| } | ||
|
|
||
| const std::shared_ptr<grpc::Channel>& ViamChannel::channel() const { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this just unimplemented before? If so, maybe that means nobody was using it, and therefore it could be renamed to something like
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry weird
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So yes this was used, but I have taken this opportunity to remove the data member |
||
| return channel_; | ||
| } | ||
|
|
||
| void ViamChannel::close() { | ||
| rust_data_.reset(); | ||
| } | ||
|
|
||
| unsigned int Options::refresh_interval() const { | ||
| return refresh_interval_; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,9 +14,12 @@ namespace sdk { | |
| class DialOptions; | ||
| class ViamChannel { | ||
| public: | ||
| void close(); | ||
| explicit ViamChannel(std::shared_ptr<GrpcChannel> channel); | ||
|
|
||
| ViamChannel(std::shared_ptr<GrpcChannel> channel, const char* path, void* runtime); | ||
|
|
||
| ~ViamChannel(); | ||
|
|
||
| /// @brief Connects to a robot at the given URI address, using the provided dial options (or | ||
| /// default options is none are provided). Ignores initial connection options specifying | ||
| /// how many times to attempt to connect and with what timeout. | ||
|
|
@@ -38,11 +41,27 @@ class ViamChannel { | |
|
|
||
| const std::shared_ptr<GrpcChannel>& channel() const; | ||
|
|
||
| void close(); | ||
|
|
||
| private: | ||
| struct RustDialData { | ||
|
||
| RustDialData(const char* path_, void* runtime); | ||
|
||
|
|
||
| RustDialData(const RustDialData&) = delete; | ||
| RustDialData(RustDialData&&) noexcept; | ||
|
|
||
| RustDialData& operator=(const RustDialData&) = delete; | ||
| RustDialData& operator=(RustDialData&&) noexcept; | ||
|
|
||
| ~RustDialData(); | ||
|
|
||
| const char* path; | ||
| void* rust_runtime; | ||
|
||
| }; | ||
|
|
||
| std::shared_ptr<GrpcChannel> channel_; | ||
| const char* path_; | ||
| bool closed_; | ||
| void* rust_runtime_; | ||
|
|
||
| boost::optional<RustDialData> rust_data_; | ||
| }; | ||
|
|
||
| class Credentials { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Woah. This was also a double destroy for all these
std::threadobjects, right? I'm sort of amazed this went unnoticed for so long. Are we doing ASAN / UBSAN builds anywhere in CI? If not, I think it might be time to do so.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
our unit tests in CI run with santized builds for shared libraries, but our sanitizer seems to only be ubsan, not asan. i'm pretty surprised this hasn't been caught before either, and same with the other main issue being resolved in this PR which is that we had an instance of rust-utils
free_stringbeing called on ac_strmanaged by astd::string. perhaps a blessing in disguise that logging directed us to these!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ASAN and UBSAN work together just fine so it might be pretty easy to light it up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
iirc turning on ASAN causes compilation to fail because it modifies the underlying memory shape but gRPC is not being compiled with ASAN, leading to ABI errors (I might have the details here somewhat wrong). Definitely fixable but it means compiling gRPC by hand in tests, which I believe is why we haven't done it thus far.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's right. That's a nuisance, and one I'd forgotten about since most of my time with ASAN was in a world where all third party C++ dependencies were vendored (in large part, exactly to solve this). I wonder if
conancan be leveraged to solve this for us since it will build dependencies from source.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Realizing I encountered this too yesterday when trying to debug the original issue in this PR. As for conan, here is a discussion from 1.x but the answer is 'kinda sorta' conan-io/conan#4754