Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit f638167

Browse files
author
Koundinya Veluri
committed
Named mutex: Use flock instead of pthread process-shared mutex in some more cases
Workaround for #5456: - Sometimes, a timed wait operation is not getting released, causing a hang - Due to the hang, it is not possible to detect this issue with code - Temporarily disabled the use of pthread process-shared mutexes on ARM/ARM64. File locks will be used instead. Workaround for #5872: - On Alpine Linux, a pthread process-shared robust mutex is detecting the case where a process abandons the mutex when it exits while holding the lock, but is putting the mutex into an unrecoverable state (ENOTRECOVERABLE) instead of assigning lock ownership to the next thread that is released from a wait for a lock and notifying of abandonment (EOWNERDEAD). - Added a test case to detect this issue, to have it use file locks instead Close #5456
1 parent 7782e7d commit f638167

File tree

4 files changed

+248
-49
lines changed

4 files changed

+248
-49
lines changed

src/pal/src/config.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
#cmakedefine01 HAS_FTRUNCATE_LENGTH_ISSUE
136136
#cmakedefine01 UNWIND_CONTEXT_IS_UCONTEXT_T
137137
#cmakedefine01 HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
138+
#cmakedefine01 HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES
138139
#cmakedefine BSD_REGS_STYLE(reg, RR, rr) @BSD_REGS_STYLE@
139140
#cmakedefine01 HAVE_SCHED_OTHER_ASSIGNABLE
140141

src/pal/src/configure.cmake

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,195 @@ int main()
10251025
}" HAVE_FULLY_FEATURED_PTHREAD_MUTEXES)
10261026
set(CMAKE_REQUIRED_LIBRARIES)
10271027

1028+
if(NOT CLR_CMAKE_PLATFORM_ARCH_ARM AND NOT CLR_CMAKE_PLATFORM_ARCH_ARM64)
1029+
set(CMAKE_REQUIRED_LIBRARIES pthread)
1030+
check_cxx_source_runs("
1031+
// This test case verifies the pthread process-shared robust mutex's cross-process abandon detection. The parent process starts
1032+
// a child process that locks the mutex, the process process then waits to acquire the lock, and the child process abandons the
1033+
// mutex by exiting the process while holding the lock. The parent process should then be released from its wait, be assigned
1034+
// ownership of the lock, and be notified that the mutex was abandoned.
1035+
1036+
#include <sys/mman.h>
1037+
#include <sys/time.h>
1038+
1039+
#include <errno.h>
1040+
#include <pthread.h>
1041+
#include <stdio.h>
1042+
#include <unistd.h>
1043+
1044+
#include <new>
1045+
using namespace std;
1046+
1047+
struct Shm
1048+
{
1049+
pthread_mutex_t syncMutex;
1050+
pthread_cond_t syncCondition;
1051+
pthread_mutex_t robustMutex;
1052+
int conditionValue;
1053+
1054+
Shm() : conditionValue(0)
1055+
{
1056+
}
1057+
} *shm;
1058+
1059+
int GetFailTimeoutTime(struct timespec *timeoutTimeRef)
1060+
{
1061+
int getTimeResult = clock_gettime(CLOCK_REALTIME, timeoutTimeRef);
1062+
if (getTimeResult != 0)
1063+
{
1064+
struct timeval tv;
1065+
getTimeResult = gettimeofday(&tv, NULL);
1066+
if (getTimeResult != 0)
1067+
return 1;
1068+
timeoutTimeRef->tv_sec = tv.tv_sec;
1069+
timeoutTimeRef->tv_nsec = tv.tv_usec * 1000;
1070+
}
1071+
timeoutTimeRef->tv_sec += 30;
1072+
return 0;
1073+
}
1074+
1075+
int WaitForConditionValue(int desiredConditionValue)
1076+
{
1077+
struct timespec timeoutTime;
1078+
if (GetFailTimeoutTime(&timeoutTime) != 0)
1079+
return 1;
1080+
if (pthread_mutex_timedlock(&shm->syncMutex, &timeoutTime) != 0)
1081+
return 1;
1082+
1083+
if (shm->conditionValue != desiredConditionValue)
1084+
{
1085+
if (GetFailTimeoutTime(&timeoutTime) != 0)
1086+
return 1;
1087+
if (pthread_cond_timedwait(&shm->syncCondition, &shm->syncMutex, &timeoutTime) != 0)
1088+
return 1;
1089+
if (shm->conditionValue != desiredConditionValue)
1090+
return 1;
1091+
}
1092+
1093+
if (pthread_mutex_unlock(&shm->syncMutex) != 0)
1094+
return 1;
1095+
return 0;
1096+
}
1097+
1098+
int SetConditionValue(int newConditionValue)
1099+
{
1100+
struct timespec timeoutTime;
1101+
if (GetFailTimeoutTime(&timeoutTime) != 0)
1102+
return 1;
1103+
if (pthread_mutex_timedlock(&shm->syncMutex, &timeoutTime) != 0)
1104+
return 1;
1105+
1106+
shm->conditionValue = newConditionValue;
1107+
if (pthread_cond_signal(&shm->syncCondition) != 0)
1108+
return 1;
1109+
1110+
if (pthread_mutex_unlock(&shm->syncMutex) != 0)
1111+
return 1;
1112+
return 0;
1113+
}
1114+
1115+
void DoTest_Child();
1116+
1117+
int DoTest()
1118+
{
1119+
// Map some shared memory
1120+
void *shmBuffer = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
1121+
if (shmBuffer == MAP_FAILED)
1122+
return 1;
1123+
shm = new(shmBuffer) Shm;
1124+
1125+
// Create sync mutex
1126+
pthread_mutexattr_t syncMutexAttributes;
1127+
if (pthread_mutexattr_init(&syncMutexAttributes) != 0)
1128+
return 1;
1129+
if (pthread_mutexattr_setpshared(&syncMutexAttributes, PTHREAD_PROCESS_SHARED) != 0)
1130+
return 1;
1131+
if (pthread_mutex_init(&shm->syncMutex, &syncMutexAttributes) != 0)
1132+
return 1;
1133+
if (pthread_mutexattr_destroy(&syncMutexAttributes) != 0)
1134+
return 1;
1135+
1136+
// Create sync condition
1137+
pthread_condattr_t syncConditionAttributes;
1138+
if (pthread_condattr_init(&syncConditionAttributes) != 0)
1139+
return 1;
1140+
if (pthread_condattr_setpshared(&syncConditionAttributes, PTHREAD_PROCESS_SHARED) != 0)
1141+
return 1;
1142+
if (pthread_cond_init(&shm->syncCondition, &syncConditionAttributes) != 0)
1143+
return 1;
1144+
if (pthread_condattr_destroy(&syncConditionAttributes) != 0)
1145+
return 1;
1146+
1147+
// Create the robust mutex that will be tested
1148+
pthread_mutexattr_t robustMutexAttributes;
1149+
if (pthread_mutexattr_init(&robustMutexAttributes) != 0)
1150+
return 1;
1151+
if (pthread_mutexattr_setpshared(&robustMutexAttributes, PTHREAD_PROCESS_SHARED) != 0)
1152+
return 1;
1153+
if (pthread_mutexattr_setrobust(&robustMutexAttributes, PTHREAD_MUTEX_ROBUST) != 0)
1154+
return 1;
1155+
if (pthread_mutex_init(&shm->robustMutex, &robustMutexAttributes) != 0)
1156+
return 1;
1157+
if (pthread_mutexattr_destroy(&robustMutexAttributes) != 0)
1158+
return 1;
1159+
1160+
// Start child test process
1161+
int error = fork();
1162+
if (error == -1)
1163+
return 1;
1164+
if (error == 0)
1165+
{
1166+
DoTest_Child();
1167+
return -1;
1168+
}
1169+
1170+
// Wait for child to take a lock
1171+
WaitForConditionValue(1);
1172+
1173+
// Wait to try to take a lock. Meanwhile, child abandons the robust mutex.
1174+
struct timespec timeoutTime;
1175+
if (GetFailTimeoutTime(&timeoutTime) != 0)
1176+
return 1;
1177+
error = pthread_mutex_timedlock(&shm->robustMutex, &timeoutTime);
1178+
if (error != EOWNERDEAD) // expect to be notified that the robust mutex was abandoned
1179+
return 1;
1180+
if (pthread_mutex_consistent(&shm->robustMutex) != 0)
1181+
return 1;
1182+
1183+
if (pthread_mutex_unlock(&shm->robustMutex) != 0)
1184+
return 1;
1185+
if (pthread_mutex_destroy(&shm->robustMutex) != 0)
1186+
return 1;
1187+
return 0;
1188+
}
1189+
1190+
void DoTest_Child()
1191+
{
1192+
// Lock the robust mutex
1193+
struct timespec timeoutTime;
1194+
if (GetFailTimeoutTime(&timeoutTime) != 0)
1195+
return;
1196+
if (pthread_mutex_timedlock(&shm->robustMutex, &timeoutTime) != 0)
1197+
return;
1198+
1199+
// Notify parent that robust mutex is locked
1200+
if (SetConditionValue(1) != 0)
1201+
return;
1202+
1203+
// Wait a short period to let the parent block on waiting for a lock
1204+
sleep(1);
1205+
1206+
// Abandon the mutex by exiting the process while holding the lock. Parent's wait should be released by EOWNERDEAD.
1207+
}
1208+
1209+
int main()
1210+
{
1211+
int result = DoTest();
1212+
return result >= 0 ? result : 0;
1213+
}" HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES)
1214+
set(CMAKE_REQUIRED_LIBRARIES)
1215+
endif()
1216+
10281217
if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
10291218
if(NOT HAVE_LIBUUID_H)
10301219
unset(HAVE_LIBUUID_H CACHE)

src/pal/src/include/pal/mutex.hpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ DWORD SPINLOCKTryAcquire (LONG * lock);
6969
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
7070
// Named mutex
7171

72+
// Temporarily disabling usage of pthread process-shared mutexes on ARM/ARM64 due to functional issues that cannot easily be
73+
// detected with code due to hangs. See https://github.com/dotnet/coreclr/issues/5456.
74+
#if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES && HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES && !(defined(_ARM_) || defined(_ARM64_))
75+
#define NAMED_MUTEX_USE_PTHREAD_MUTEX 1
76+
#else
77+
#define NAMED_MUTEX_USE_PTHREAD_MUTEX 0
78+
#endif
79+
7280
enum class NamedMutexError : DWORD
7381
{
7482
MaximumRecursiveLocksReached = ERROR_NOT_ENOUGH_MEMORY,
@@ -83,7 +91,7 @@ enum class MutexTryAcquireLockResult
8391
TimedOut
8492
};
8593

86-
#if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
94+
#if NAMED_MUTEX_USE_PTHREAD_MUTEX
8795
class MutexHelpers
8896
{
8997
public:
@@ -93,16 +101,16 @@ class MutexHelpers
93101
static MutexTryAcquireLockResult TryAcquireLock(pthread_mutex_t *mutex, DWORD timeoutMilliseconds);
94102
static void ReleaseLock(pthread_mutex_t *mutex);
95103
};
96-
#endif // HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
104+
#endif // NAMED_MUTEX_USE_PTHREAD_MUTEX
97105

98106
class NamedMutexSharedData
99107
{
100108
private:
101-
#if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
109+
#if NAMED_MUTEX_USE_PTHREAD_MUTEX
102110
pthread_mutex_t m_lock;
103-
#else // !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
111+
#else // !NAMED_MUTEX_USE_PTHREAD_MUTEX
104112
UINT32 m_timedWaiterCount;
105-
#endif // HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
113+
#endif // NAMED_MUTEX_USE_PTHREAD_MUTEX
106114
UINT32 m_lockOwnerProcessId;
107115
UINT64 m_lockOwnerThreadId;
108116
bool m_isAbandoned;
@@ -111,15 +119,15 @@ class NamedMutexSharedData
111119
NamedMutexSharedData();
112120
~NamedMutexSharedData();
113121

114-
#if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
122+
#if NAMED_MUTEX_USE_PTHREAD_MUTEX
115123
public:
116124
pthread_mutex_t *GetLock();
117-
#else // !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
125+
#else // !NAMED_MUTEX_USE_PTHREAD_MUTEX
118126
public:
119127
bool HasAnyTimedWaiters() const;
120128
void IncTimedWaiterCount();
121129
void DecTimedWaiterCount();
122-
#endif // HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
130+
#endif // NAMED_MUTEX_USE_PTHREAD_MUTEX
123131

124132
public:
125133
bool IsAbandoned() const;
@@ -142,10 +150,10 @@ class NamedMutexProcessData : public SharedMemoryProcessDataBase
142150
SharedMemoryProcessDataHeader *m_processDataHeader;
143151
NamedMutexSharedData *m_sharedData;
144152
SIZE_T m_lockCount;
145-
#if !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
153+
#if !NAMED_MUTEX_USE_PTHREAD_MUTEX
146154
HANDLE m_processLockHandle;
147155
int m_sharedLockFileDescriptor;
148-
#endif // !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
156+
#endif // !NAMED_MUTEX_USE_PTHREAD_MUTEX
149157
CorUnix::CPalThread *m_lockOwnerThread;
150158
NamedMutexProcessData *m_nextInThreadOwnedNamedMutexList;
151159

@@ -158,10 +166,10 @@ class NamedMutexProcessData : public SharedMemoryProcessDataBase
158166
public:
159167
NamedMutexProcessData(
160168
SharedMemoryProcessDataHeader *processDataHeader
161-
#if !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
169+
#if !NAMED_MUTEX_USE_PTHREAD_MUTEX
162170
,
163171
int sharedLockFileDescriptor
164-
#endif // !HAVE_FULLY_FEATURED_PTHREAD_MUTEXES
172+
#endif // !NAMED_MUTEX_USE_PTHREAD_MUTEX
165173
);
166174
virtual void Close(bool isAbruptShutdown, bool releaseSharedData) override;
167175

0 commit comments

Comments
 (0)