diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index d0c4312..67babd3 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -3,7 +3,7 @@ if(${MSVC}) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /debug") -elseif(${UNIX}) +else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if(NOT ${CYGWIN}) # Don't specify -pthread on Cygwin set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") diff --git a/common/sema.h b/common/sema.h index 62d17ba..39fa827 100644 --- a/common/sema.h +++ b/common/sema.h @@ -8,6 +8,7 @@ #include #include +#include #if defined(_WIN32) @@ -15,14 +16,23 @@ // Semaphore (Windows) //--------------------------------------------------------- -#include -#undef min -#undef max +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { + struct _SECURITY_ATTRIBUTES; + __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); + __declspec(dllimport) int __stdcall CloseHandle(void* hObject); + __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); + __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} class Semaphore { private: - HANDLE m_hSema; + void* m_hSema; Semaphore(const Semaphore& other) = delete; Semaphore& operator=(const Semaphore& other) = delete; @@ -31,7 +41,8 @@ class Semaphore Semaphore(int initialCount = 0) { assert(initialCount >= 0); - m_hSema = CreateSemaphore(NULL, initialCount, MAXLONG, NULL); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); } ~Semaphore() @@ -41,12 +52,13 @@ class Semaphore void wait() { - WaitForSingleObject(m_hSema, INFINITE); + const unsigned long infinite = 0xffffffff; + WaitForSingleObject(m_hSema, infinite); } void signal(int count = 1) { - ReleaseSemaphore(m_hSema, count, NULL); + ReleaseSemaphore(m_hSema, count, nullptr); } }; @@ -164,13 +176,22 @@ class Semaphore //--------------------------------------------------------- class LightweightSemaphore { +public: + // The underlying semaphores are limited to int-sized counts, + // but there's no reason we can't scale higher on platforms with + // a wider size_t than int -- the only counts we pass on to the + // underlying semaphores are the number of waiting threads, which + // will always fit in an int for all platforms regardless of our + // high-level count. + typedef std::make_signed::type ssize_t; + private: - std::atomic m_count; + std::atomic m_count; Semaphore m_sema; void waitWithPartialSpinning() { - int oldCount; + ssize_t oldCount; // Is there a better way to set the initial spin count? // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC, // as threads start hitting the kernel semaphore. @@ -178,7 +199,7 @@ class LightweightSemaphore while (spin--) { oldCount = m_count.load(std::memory_order_relaxed); - if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire)) + if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) return; std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. } @@ -189,16 +210,45 @@ class LightweightSemaphore } } + ssize_t waitManyWithPartialSpinning(ssize_t max) + { + assert(max > 0); + ssize_t oldCount; + int spin = 10000; + while (spin--) + { + oldCount = m_count.load(std::memory_order_relaxed); + if (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + std::atomic_signal_fence(std::memory_order_acquire); + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount <= 0) + m_sema.wait(); + if (max > 1) + return 1 + tryWaitMany(max - 1); + return 1; + } + public: - LightweightSemaphore(int initialCount = 0) : m_count(initialCount) + LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount) { assert(initialCount >= 0); } bool tryWait() { - int oldCount = m_count.load(std::memory_order_relaxed); - return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire)); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + } + return false; } void wait() @@ -206,16 +256,47 @@ class LightweightSemaphore if (!tryWait()) waitWithPartialSpinning(); } + + // Acquires between 0 and (greedily) max, inclusive + ssize_t tryWaitMany(ssize_t max) + { + assert(max >= 0); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + return 0; + } + + // Acquires at least one, and (greedily) at most max + ssize_t waitMany(ssize_t max) + { + assert(max >= 0); + ssize_t result = tryWaitMany(max); + if (result == 0 && max > 0) + result = waitManyWithPartialSpinning(max); + return result; + } - void signal(int count = 1) + void signal(ssize_t count = 1) { - int oldCount = m_count.fetch_add(count, std::memory_order_release); - int toRelease = -oldCount < count ? -oldCount : count; + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); + ssize_t toRelease = -oldCount < count ? -oldCount : count; if (toRelease > 0) { - m_sema.signal(toRelease); + m_sema.signal((int)toRelease); } } + + ssize_t availableApprox() const + { + ssize_t count = m_count.load(std::memory_order_relaxed); + return count > 0 ? count : 0; + } };