userver-framework
diff --git a/‎.mapping.json‎
Lines changed: 6 additions & 0 deletions b/‎.mapping.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎core/include/userver/engine/task_queue_type.hpp‎
Lines changed: 2 additions & 0 deletions b/‎core/include/userver/engine/task_queue_type.hpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/concurrent/impl/wait_wake.hpp‎
Lines changed: 112 additions & 0 deletions b/‎core/src/concurrent/impl/wait_wake.hpp‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎core/src/concurrent/impl/wait_wake_test.cpp‎
Lines changed: 174 additions & 0 deletions b/‎core/src/concurrent/impl/wait_wake_test.cpp‎
Lines changed: 174 additions & 0 deletions
@@ -1378,6 +1378,8 @@
   "core/src/concurrent/impl/striped_read_indicator.cpp":"taxi/uservices/userver/core/src/concurrent/impl/striped_read_indicator.cpp",
   "core/src/concurrent/impl/striped_read_indicator_benchmark.cpp":"taxi/uservices/userver/core/src/concurrent/impl/striped_read_indicator_benchmark.cpp",
   "core/src/concurrent/impl/striped_read_indicator_test.cpp":"taxi/uservices/userver/core/src/concurrent/impl/striped_read_indicator_test.cpp",
+  "core/src/concurrent/impl/wait_wake.hpp":"taxi/uservices/userver/core/src/concurrent/impl/wait_wake.hpp",
+  "core/src/concurrent/impl/wait_wake_test.cpp":"taxi/uservices/userver/core/src/concurrent/impl/wait_wake_test.cpp",
   "core/src/concurrent/intrusive_walkable_pool.hpp":"taxi/uservices/userver/core/src/concurrent/intrusive_walkable_pool.hpp",
   "core/src/concurrent/intrusive_walkable_pool_benchmark.cpp":"taxi/uservices/userver/core/src/concurrent/intrusive_walkable_pool_benchmark.cpp",
   "core/src/concurrent/intrusive_walkable_pool_test.cpp":"taxi/uservices/userver/core/src/concurrent/intrusive_walkable_pool_test.cpp",
@@ -1662,11 +1664,15 @@
   "core/src/engine/task/task_processor_test.cpp":"taxi/uservices/userver/core/src/engine/task/task_processor_test.cpp",
   "core/src/engine/task/task_queue.cpp":"taxi/uservices/userver/core/src/engine/task/task_queue.cpp",
   "core/src/engine/task/task_queue.hpp":"taxi/uservices/userver/core/src/engine/task/task_queue.hpp",
+  "core/src/engine/task/task_queue_pull_pin.cpp":"taxi/uservices/userver/core/src/engine/task/task_queue_pull_pin.cpp",
+  "core/src/engine/task/task_queue_pull_pin.hpp":"taxi/uservices/userver/core/src/engine/task/task_queue_pull_pin.hpp",
+  "core/src/engine/task/task_queue_pull_pin_test.cpp":"taxi/uservices/userver/core/src/engine/task/task_queue_pull_pin_test.cpp",
   "core/src/engine/task/task_queue_tsan.cpp":"taxi/uservices/userver/core/src/engine/task/task_queue_tsan.cpp",
   "core/src/engine/task/task_queue_tsan.hpp":"taxi/uservices/userver/core/src/engine/task/task_queue_tsan.hpp",
   "core/src/engine/task/task_queue_tsan_test.cpp":"taxi/uservices/userver/core/src/engine/task/task_queue_tsan_test.cpp",
   "core/src/engine/task/task_test.cpp":"taxi/uservices/userver/core/src/engine/task/task_test.cpp",
   "core/src/engine/task/task_with_result_test.cpp":"taxi/uservices/userver/core/src/engine/task/task_with_result_test.cpp",
+  "core/src/engine/task/thread_id_test.hpp":"taxi/uservices/userver/core/src/engine/task/thread_id_test.hpp",
   "core/src/engine/task/thread_started_hook_test.cpp":"taxi/uservices/userver/core/src/engine/task/thread_started_hook_test.cpp",
   "core/src/engine/task/work_stealing_queue/consumer.cpp":"taxi/uservices/userver/core/src/engine/task/work_stealing_queue/consumer.cpp",
   "core/src/engine/task/work_stealing_queue/consumer.hpp":"taxi/uservices/userver/core/src/engine/task/work_stealing_queue/consumer.hpp",
 
@@ -13,6 +13,8 @@ namespace engine {
 enum class TaskQueueType {
     kGlobalTaskQueue,        /// < Global `moodycamel` queue from which each thread gets tasks
     kWorkStealingTaskQueue,  /// < Global+thread-specific queues with interqueues work stealing (experimental queue)
+    kPullPinTaskQueue,  /// < Global+thread-specific queues. Each task gets pinned to a thread-specific queue and is
+                        /// executed only in that thread (experimental queue)
     kTSanTaskQueue,  /// < Queue for TSan runs. Each task gets pinned to a thread-specific queue and is executed only in
                      /// that thread (experimental queue). Thread Sanitizer runs are automatically switched to this
                      /// queue
 
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <condition_variable>
+
+#include <userver/utils/assert.hpp>
+
+#if __has_include(<linux/futex.h>)
+
+#include <linux/futex.h> /* Definition of FUTEX_* constants */
+#include <sys/syscall.h> /* Definition of SYS_* constants */
+#include <unistd.h>
+
+#include <limits>
+
+#endif
+
+USERVER_NAMESPACE_BEGIN
+
+namespace concurrent::impl {
+
+class WaitWakeCondvar final {
+public:
+    WaitWakeCondvar() = default;
+
+    std::size_t WakeupAll() {
+        {
+            const std::lock_guard guard{mutex_};
+        }
+        condvar_.notify_all();
+        return std::numeric_limits<std::size_t>::max();
+    }
+
+    std::size_t WakeupSome(int /*wakeup_at_most*/) {
+        WakeupAll();
+        return std::numeric_limits<std::size_t>::max();
+    }
+
+    // May wake up more than one waiter due to internal limitations
+    std::size_t WakeupByIndex(std::size_t /*index*/) {
+        WakeupAll();
+        return std::numeric_limits<std::size_t>::max();
+    }
+
+    template <class Predicate>
+    void WaitByIndex(std::size_t /*index*/, Predicate pred) {
+        std::unique_lock lock{mutex_};
+        condvar_.wait(lock, std::move(pred));
+    }
+
+private:
+    std::mutex mutex_{};
+    std::condition_variable condvar_{};
+};
+
+#if __has_include(<linux/futex.h>)
+
+class WaitWakeFutex final {
+public:
+    WaitWakeFutex() = default;
+
+    std::size_t WakeupAll() { return WakeupByBitmask(FUTEX_BITSET_MATCH_ANY, kAllBitsetWaiters); }
+
+    // Returns number of woken up waiters that were sleeping in OS (may return less than actually woke up)
+    std::size_t WakeupSome(int wakeup_at_most) { return WakeupByBitmask(FUTEX_BITSET_MATCH_ANY, wakeup_at_most); }
+
+    // May wake up more than one waiter due to internal limitations (may return less than actually woke up)
+    std::size_t WakeupByIndex(std::size_t index) { return WakeupByBitmask(IndexToBitmask(index), kAllBitsetWaiters); }
+
+    template <class Predicate>
+    void WaitByIndex(std::size_t index, Predicate pred) {
+        const auto bitmask = IndexToBitmask(index);
+
+        for (;;) {
+            std::uint32_t snapshot;  // NOLINT(cppcoreguidelines-init-variables)
+            __atomic_load(&generation_, &snapshot, __ATOMIC_SEQ_CST);
+            if (pred()) {
+                break;
+            }
+
+            const auto ret = syscall(SYS_futex, &generation_, FUTEX_WAIT_BITSET, snapshot, NULL, NULL, bitmask);
+            UINVARIANT(ret != -1 || errno == EAGAIN || errno == EWOULDBLOCK, "Failure in futex(FUTEX_WAIT_BITSET)");
+        }
+    }
+
+private:
+    static constexpr int kAllBitsetWaiters = std::numeric_limits<int>::max();
+    static constexpr int IndexToBitmask(std::size_t index) { return static_cast<int>(1 << (index % kBitsInBitset)); }
+
+    std::size_t WakeupByBitmask(int bitmask, int wakeup_at_most) {
+        __atomic_add_fetch(&generation_, 1, __ATOMIC_SEQ_CST);
+
+        const auto ret = syscall(SYS_futex, &generation_, FUTEX_WAKE_BITSET, wakeup_at_most, NULL, NULL, bitmask);
+        UINVARIANT(ret != -1 || errno == EAGAIN || errno == EWOULDBLOCK, "Failure in futex(FUTEX_WAKE_BITSET)");
+        return ret;
+    }
+
+    static constexpr int kBitsInBitset = 32;
+
+    std::uint32_t generation_{0};
+};
+
+using WaitWake = WaitWakeFutex;
+
+#else
+
+using WaitWake = WaitWakeCondvar;
+
+#endif
+
+}  // namespace concurrent::impl
+
+USERVER_NAMESPACE_END
@@ -0,0 +1,174 @@
+#include <concurrent/impl/wait_wake.hpp>
+
+#include <thread>
+
+#include <gtest/gtest.h>
+
+USERVER_NAMESPACE_BEGIN
+
+constexpr std::size_t kTestThreadsCount = 100;
+
+void GiveTimeToEnterSysCall() { std::this_thread::sleep_for(std::chrono::milliseconds(4)); }
+
+TEST(WaitWake, SingleThread) {
+    concurrent::impl::WaitWake ww;
+    std::atomic<int> state = 0;
+    std::atomic<bool> predicate_was_called = false;
+
+    std::thread t{[&ww, &state, &predicate_was_called]() {
+        ww.WaitByIndex(0, [&state, &predicate_was_called]() {
+            if (!predicate_was_called.exchange(true)) {
+                ++state;
+            }
+            return state.load() == 2;
+        });
+        EXPECT_EQ(state.load(), 2);
+        ++state;
+    }};
+
+    while (state != 1) {
+        std::this_thread::yield();
+    }
+    EXPECT_TRUE(predicate_was_called);
+    state = 2;
+    ww.WakeupAll();
+
+    t.join();
+    EXPECT_EQ(state.load(), 3);
+}
+
+TEST(WaitWake, WakeupTriggersPredicateRecheck) {
+    concurrent::impl::WaitWake ww;
+    std::atomic<int> state = 0;
+    std::thread t{[&ww, &state]() {
+        ww.WaitByIndex(0, [&state]() {
+            ++state;
+            return state.load() >= 2;
+        });
+    }};
+
+    while (state != 1) {
+        std::this_thread::yield();
+    }
+
+    // Make sure that wakeup not lost
+    const auto woken_up = ww.WakeupAll();
+
+    // Other thread may not have entered the OS sleep yet, but must wake up anyway
+    EXPECT_LE(woken_up, 1);
+
+    t.join();
+    EXPECT_GE(state.load(), 2);
+}
+
+TEST(WaitWake, MultipleThreads) {
+    concurrent::impl::WaitWake ww;
+    std::atomic<int> state = 0;
+
+    std::vector<std::thread> threads;
+    threads.reserve(kTestThreadsCount);
+    for (std::size_t i = 0; i < kTestThreadsCount; ++i) {
+        threads.emplace_back([&ww, &state]() {
+            ++state;
+            ww.WaitByIndex(0, [&state]() { return state.load() == kTestThreadsCount + 1; });
+            EXPECT_EQ(state.load(), kTestThreadsCount + 1);
+        });
+    }
+
+    while (state != kTestThreadsCount) {
+        std::this_thread::yield();
+    }
+
+    ++state;
+    ww.WakeupAll();
+    for (auto& t : threads) {
+        t.join();
+    }
+}
+
+#if __has_include(<linux/futex.h>)
+
+TEST(WaitWake, FutexWakeupByIndex) {
+    concurrent::impl::WaitWake ww;
+    std::atomic<std::size_t> state = 0;
+
+    std::vector<std::thread> threads;
+    threads.reserve(kTestThreadsCount);
+    for (std::size_t i = 0; i < kTestThreadsCount; ++i) {
+        threads.emplace_back([i, &ww, &state]() {
+            ++state;
+            ww.WaitByIndex(i, [&state]() { return state.load() >= kTestThreadsCount + 1; });
+            ++state;
+        });
+    }
+
+    while (state != kTestThreadsCount) {
+        std::this_thread::yield();
+    }
+    GiveTimeToEnterSysCall();
+
+    ++state;
+    const auto woken_up = ww.WakeupByIndex(0);
+
+    // WaitWake distinguishes 32 indexes, but some may not entered the OS sleep yet or some spurious wakeups could
+    // happen. Just checking that not all of the waiters were woken up
+    EXPECT_LE(woken_up, kTestThreadsCount / 2);
+    EXPECT_GE(woken_up, 1);  //  not guaranteed, but holds due to GiveTimeToEnterSysCall() and huge kTestThreadsCount
+
+    // Give quite some time to wake up some of the affected
+    do {
+        std::this_thread::sleep_for(std::chrono::milliseconds(4));
+    } while (state == kTestThreadsCount + 1);
+    EXPECT_LT(state.load(), kTestThreadsCount * 2);
+
+    for (std::size_t i = 1; i < kTestThreadsCount; ++i) {
+        ww.WakeupByIndex(i);
+    }
+
+    for (auto& t : threads) {
+        t.join();
+    }
+}
+
+TEST(WaitWake, FutexWakeupSome) {
+    concurrent::impl::WaitWake ww;
+    std::atomic<std::size_t> state = 0;
+
+    std::vector<std::thread> threads;
+    threads.reserve(kTestThreadsCount);
+    for (std::size_t i = 0; i < kTestThreadsCount; ++i) {
+        threads.emplace_back([i, &ww, &state]() {
+            ++state;
+            ww.WaitByIndex(i, [&state]() { return state.load() >= kTestThreadsCount + 1; });
+            ++state;
+        });
+    }
+
+    while (state != kTestThreadsCount) {
+        std::this_thread::yield();
+    }
+    GiveTimeToEnterSysCall();
+
+    ++state;
+    const int half_of_waiters{kTestThreadsCount / 2};
+    const int woken_up = ww.WakeupSome(half_of_waiters);
+
+    // WaitWake distinguishes 32 indexes, but some may not entered the OS sleep yet or some spurious wakeups could
+    // happen. Just checking that not all of the waiters were woken up
+    EXPECT_LE(woken_up, half_of_waiters);
+    EXPECT_GE(woken_up, 1);  // not guaranteed, but holds due to GiveTimeToEnterSysCall() and huge kTestThreadsCount
+
+    // Give quite some time to wake up all the affected
+    do {
+        std::this_thread::sleep_for(std::chrono::milliseconds(4));
+    } while (state < kTestThreadsCount + half_of_waiters + 1);
+
+    ww.WakeupSome(kTestThreadsCount - half_of_waiters);
+    for (auto& t : threads) {
+        t.join();
+    }
+}
+
+#endif
+
+USERVER_NAMESPACE_END