Skip to content

Various tweaks to semaphore (and makefile) #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/BuildSettings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if(${MSVC})
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /debug")
elseif(${UNIX})
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(NOT ${CYGWIN}) # Don't specify -pthread on Cygwin
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
Expand Down
115 changes: 98 additions & 17 deletions common/sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,31 @@

#include <atomic>
#include <cassert>
#include <type_traits>


#if defined(_WIN32)
//---------------------------------------------------------
// Semaphore (Windows)
//---------------------------------------------------------

#include <windows.h>
#undef min
#undef max
// Avoid including windows.h in a header; we only need a handful of
// items, so we'll redeclare them here (this is relatively safe since
// the API generally has to remain stable between Windows versions).
// I know this is an ugly hack but it still beats polluting the global
// namespace with thousands of generic names or adding a .cpp for nothing.
extern "C" {
struct _SECURITY_ATTRIBUTES;
__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
}

class Semaphore
{
private:
HANDLE m_hSema;
void* m_hSema;

Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
Expand All @@ -31,7 +41,8 @@ class Semaphore
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
m_hSema = CreateSemaphore(NULL, initialCount, MAXLONG, NULL);
const long maxLong = 0x7fffffff;
m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
}

~Semaphore()
Expand All @@ -41,12 +52,13 @@ class Semaphore

void wait()
{
WaitForSingleObject(m_hSema, INFINITE);
const unsigned long infinite = 0xffffffff;
WaitForSingleObject(m_hSema, infinite);
}

void signal(int count = 1)
{
ReleaseSemaphore(m_hSema, count, NULL);
ReleaseSemaphore(m_hSema, count, nullptr);
}
};

Expand Down Expand Up @@ -164,21 +176,30 @@ class Semaphore
//---------------------------------------------------------
class LightweightSemaphore
{
public:
// The underlying semaphores are limited to int-sized counts,
// but there's no reason we can't scale higher on platforms with
// a wider size_t than int -- the only counts we pass on to the
// underlying semaphores are the number of waiting threads, which
// will always fit in an int for all platforms regardless of our
// high-level count.
typedef std::make_signed<std::size_t>::type ssize_t;

private:
std::atomic<int> m_count;
std::atomic<ssize_t> m_count;
Semaphore m_sema;

void waitWithPartialSpinning()
{
int oldCount;
ssize_t oldCount;
// Is there a better way to set the initial spin count?
// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
// as threads start hitting the kernel semaphore.
int spin = 10000;
while (spin--)
{
oldCount = m_count.load(std::memory_order_relaxed);
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
return;
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
}
Expand All @@ -189,33 +210,93 @@ class LightweightSemaphore
}
}

ssize_t waitManyWithPartialSpinning(ssize_t max)
{
assert(max > 0);
ssize_t oldCount;
int spin = 10000;
while (spin--)
{
oldCount = m_count.load(std::memory_order_relaxed);
if (oldCount > 0)
{
ssize_t newCount = oldCount > max ? oldCount - max : 0;
if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
return oldCount - newCount;
}
std::atomic_signal_fence(std::memory_order_acquire);
}
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
if (oldCount <= 0)
m_sema.wait();
if (max > 1)
return 1 + tryWaitMany(max - 1);
return 1;
}

public:
LightweightSemaphore(int initialCount = 0) : m_count(initialCount)
LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
{
assert(initialCount >= 0);
}

bool tryWait()
{
int oldCount = m_count.load(std::memory_order_relaxed);
return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));
ssize_t oldCount = m_count.load(std::memory_order_relaxed);
while (oldCount > 0)
{
if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
return true;
}
return false;
}

void wait()
{
if (!tryWait())
waitWithPartialSpinning();
}

// Acquires between 0 and (greedily) max, inclusive
ssize_t tryWaitMany(ssize_t max)
{
assert(max >= 0);
ssize_t oldCount = m_count.load(std::memory_order_relaxed);
while (oldCount > 0)
{
ssize_t newCount = oldCount > max ? oldCount - max : 0;
if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
return oldCount - newCount;
}
return 0;
}

// Acquires at least one, and (greedily) at most max
ssize_t waitMany(ssize_t max)
{
assert(max >= 0);
ssize_t result = tryWaitMany(max);
if (result == 0 && max > 0)
result = waitManyWithPartialSpinning(max);
return result;
}

void signal(int count = 1)
void signal(ssize_t count = 1)
{
int oldCount = m_count.fetch_add(count, std::memory_order_release);
int toRelease = -oldCount < count ? -oldCount : count;
assert(count >= 0);
ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
ssize_t toRelease = -oldCount < count ? -oldCount : count;
if (toRelease > 0)
{
m_sema.signal(toRelease);
m_sema.signal((int)toRelease);
}
}

ssize_t availableApprox() const
{
ssize_t count = m_count.load(std::memory_order_relaxed);
return count > 0 ? count : 0;
}
};


Expand Down