-
Notifications
You must be signed in to change notification settings - Fork 17
Antalya 25.8: Fix use after free in rescheduleTasksFromReplica #1561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: antalya-25.8
Are you sure you want to change the base?
Changes from 2 commits
78304b3
c71caec
bbe007e
c6dda09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,13 +45,16 @@ ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getNextTask(size_t numb | |
|
|
||
| saveLastNodeActivity(number_of_current_replica); | ||
|
|
||
| auto processed_file_list_ptr = replica_to_files_to_be_processed.find(number_of_current_replica); | ||
| if (processed_file_list_ptr == replica_to_files_to_be_processed.end()) | ||
| throw Exception( | ||
| ErrorCodes::LOGICAL_ERROR, | ||
| "Replica number {} was marked as lost, can't set task for it anymore", | ||
| number_of_current_replica | ||
| ); | ||
| { | ||
| std::lock_guard lock(mutex); | ||
| auto processed_file_list_ptr = replica_to_files_to_be_processed.find(number_of_current_replica); | ||
| if (processed_file_list_ptr == replica_to_files_to_be_processed.end()) | ||
| throw Exception( | ||
| ErrorCodes::LOGICAL_ERROR, | ||
| "Replica number {} was marked as lost, can't set task for it anymore", | ||
| number_of_current_replica | ||
| ); | ||
| } | ||
|
|
||
| // 1. Check pre-queued files first | ||
| auto file = getPreQueuedFile(number_of_current_replica); | ||
|
|
@@ -63,7 +66,19 @@ ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getNextTask(size_t numb | |
| file = getAnyUnprocessedFile(number_of_current_replica); | ||
|
|
||
| if (file) | ||
| processed_file_list_ptr->second.push_back(file); | ||
| { | ||
| std::lock_guard lock(mutex); | ||
| auto processed_file_list_ptr = replica_to_files_to_be_processed.find(number_of_current_replica); | ||
| if (processed_file_list_ptr == replica_to_files_to_be_processed.end()) | ||
| { // It is possible that replica was lost after check in the begining of the method | ||
| auto file_identifier = file->getAbsolutePath().value_or(file->getPath()); | ||
| auto file_replica_idx = getReplicaForFile(file_identifier); | ||
| unprocessed_files.emplace(file_identifier, std::make_pair(file, file_replica_idx)); | ||
|
||
| connection_to_files[file_replica_idx].push_back(file); | ||
| } | ||
| else | ||
| processed_file_list_ptr->second.push_back(file); | ||
| } | ||
|
|
||
| return file; | ||
| } | ||
|
|
@@ -192,7 +207,13 @@ ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getMatchingFileFromIter | |
| file_identifier = object_info->getIdentifier(); | ||
| } | ||
|
|
||
| size_t file_replica_idx = getReplicaForFile(file_identifier); | ||
| size_t file_replica_idx; | ||
|
|
||
| { | ||
| std::lock_guard lock(mutex); | ||
| file_replica_idx = getReplicaForFile(file_identifier); | ||
| } | ||
|
|
||
| if (file_replica_idx == number_of_current_replica) | ||
| { | ||
| LOG_TRACE( | ||
|
|
@@ -308,8 +329,9 @@ void StorageObjectStorageStableTaskDistributor::rescheduleTasksFromReplica(size_ | |
| "All replicas were marked as lost" | ||
| ); | ||
|
|
||
| auto files = std::move(processed_file_list_ptr->second); | ||
| replica_to_files_to_be_processed.erase(number_of_current_replica); | ||
| for (const auto & file : processed_file_list_ptr->second) | ||
| for (const auto & file : files) | ||
| { | ||
| auto file_replica_idx = getReplicaForFile(file->getAbsolutePath().value_or(file->getPath())); | ||
| unprocessed_files.emplace(file->getAbsolutePath().value_or(file->getPath()), std::make_pair(file, file_replica_idx)); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a replica disappears mid-request, this branch re-queues the task using
getAbsolutePath().value_or(getPath())as the map key, butgetPreQueuedFilelater looks up queued tasks bysend_over_whole_archive ? getPathOrPathToArchiveIfArchive() : getIdentifier(). For bucketed objects (getIdentifier()includes bucket suffix) or archive mode, the key no longer matches, so pre-queued tasks are skipped, andemplacecan collapse multiple buckets with the same path into one entry.Useful? React with 👍 / 👎.