Skip to content

Commit aab40f6

Browse files
author
Anthony Mallet
committed
[middleware/pocolibc] Workaround a posix timer issue in recent Linux
* patch-aa: from upstream commit 67c2eed67, detecting at compile time an issue with posix timers and thread creation/destruction.
1 parent c9f048b commit aab40f6

File tree

3 files changed

+204
-0
lines changed

3 files changed

+204
-0
lines changed

middleware/pocolibs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Created: Anthony Mallet on Sun, 28 Jan 2007
33
#
44

5+
PKGREVISION= 1
56
DISTNAME= pocolibs-3.2
67
CATEGORIES= middleware
78
MASTER_SITES= ${MASTER_SITE_OPENROBOTS:=pocolibs/}

middleware/pocolibs/distinfo

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
SHA1 (pocolibs-3.2.tar.gz) = 5c9acd1b2fc6bb02cd50275bec41fd8aed13b578
22
RMD160 (pocolibs-3.2.tar.gz) = c769d2bb6f9694258a6610b9f25bfe5354eb7e31
33
Size (pocolibs-3.2.tar.gz) = 515821 bytes
4+
SHA1 (patch-aa) = 7a6f2cf238ba5b92373b757ca461cf963925f1f4
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
Update compile-time test for working posix timers
2+
3+
There is an issue spotted on ubuntu-22.04 and 24.04, as well as other
4+
distributions, that affects posix timer signal delivrery. It is easily
5+
triggered by spawning and terminating threads quickly while the timer is
6+
running. At some point, the timer signal will stop being delivered.
7+
Inspecting the timer with timer_gettime(2) shows it still counting,
8+
reconfiguring the timer with timer_settime(2) makes it work again and
9+
manually sending the timer signal also always work.
10+
11+
This seems to affect kernels from 6.4 to 6.11 inclusive, i.e. only 6.3 and
12+
earlier are immune to the issue at this time.
13+
14+
In practice, this issue is triggering in existing sofware doing some
15+
aio(7) work and thus involving thread creation/destruction. So a new test is
16+
added in configure script to (hopefully) detect the issue at compile time and
17+
disable the use of posix timers in favor of setitimer(2).
18+
19+
This is from upstream commit 67c2eed67, but here configure script is patched
20+
directly to avoid the burden of autoreconf et al.
21+
22+
--- configure~ 2024-08-05 18:13:57.000000000 +0200
23+
+++ configure 2024-11-06 18:44:30.587710075 +0100
24+
@@ -13241,72 +13241,124 @@
25+
else
26+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
27+
/* end confdefs.h. */
28+
+/* Public domain - Anthony Mallet on Mon Nov 4 2024 */
29+
30+
-#include <sys/time.h>
31+
+#include <err.h>
32+
+#include <errno.h>
33+
+#include <pthread.h>
34+
#include <signal.h>
35+
+#include <stdint.h>
36+
#include <stdio.h>
37+
-#include <stdlib.h>
38+
#include <time.h>
39+
40+
-static volatile int done = 0;
41+
-static void timerInt(int sig) { done++; }
42+
+static volatile int ticks;
43+
44+
-static long
45+
-time_difference(struct timeval *t1, struct timeval *t2)
46+
+/* SIGALRM handler */
47+
+void
48+
+tick(int arg)
49+
{
50+
- long usec_diff = t1->tv_usec - t2->tv_usec, retenue = 0;
51+
+ (void)arg; /* unused */
52+
53+
- if (usec_diff < 0) {
54+
- usec_diff = 1000000 + usec_diff;
55+
- retenue = 1;
56+
- }
57+
- return (t1->tv_sec - t2->tv_sec - retenue)*1000000 + usec_diff;
58+
+ /* global counter - even if access is not atomic, we don't care here as the
59+
+ * exact value is not used, only the fact that the value changes is relevant
60+
+ */
61+
+ ticks++;
62+
+}
63+
+
64+
+/* thread forking thread */
65+
+void *
66+
+thr(void *arg)
67+
+{
68+
+ pthread_attr_t attr;
69+
+ pthread_t t;
70+
+ (void)arg; /* unused */
71+
+
72+
+ /* spwan a new thread in detached state so that we don't grow too much */
73+
+ pthread_attr_init(&attr);
74+
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
75+
+ if (pthread_create(&t, &attr, thr, NULL))
76+
+ err(2, "pthread_create");
77+
+
78+
+ return NULL;
79+
}
80+
81+
int
82+
-main(int argc, char *argv)
83+
+main()
84+
{
85+
- timer_t t;
86+
- struct itimerspec tv;
87+
- sigset_t sigset;
88+
- struct sigaction act;
89+
- struct timeval tp1, tp2;
90+
-
91+
- sigemptyset(&sigset);
92+
- sigaddset(&sigset, SIGALRM);
93+
- if (sigprocmask(SIG_UNBLOCK, &sigset, NULL) == -1) {
94+
- perror("sigprocmask");
95+
- exit(2);
96+
- }
97+
- act.sa_handler = timerInt;
98+
- sigemptyset(&act.sa_mask);
99+
- act.sa_flags = 0;
100+
- if (sigaction(SIGALRM, &act, NULL) == -1) {
101+
- perror("sigaction");
102+
- exit(2);
103+
- }
104+
- if (timer_create(CLOCK_REALTIME, NULL, &t) == -1) {
105+
- perror("timer_create");
106+
- exit(2);
107+
- }
108+
- tv.it_interval.tv_nsec = 10000000;
109+
- tv.it_interval.tv_sec = 0;
110+
- tv.it_value.tv_nsec = 10000000;
111+
- tv.it_value.tv_sec = 0;
112+
- if (timer_settime(t, 0, &tv, NULL) == -1) {
113+
- perror("timer_settime");
114+
- exit(2);
115+
- }
116+
- gettimeofday(&tp1, NULL);
117+
- while (done < 100)
118+
- ;
119+
- gettimeofday(&tp2, NULL);
120+
- if (time_difference(&tp2, &tp1) < 1200000)
121+
- exit(0);
122+
- else {
123+
- fprintf(stderr, "no able to generate 100 ticks/s\n");
124+
- exit(2);
125+
- }
126+
+ int hz = 1000; /* 1kHz timer - the higher, the faster the issue happens */
127+
+
128+
+ struct sigaction act;
129+
+ struct itimerspec tv;
130+
+ struct timespec pts, ts, rem;
131+
+ sigset_t sigset;
132+
+ timer_t timer;
133+
+ int i, c1, c2;
134+
+
135+
+ /* SIGALRM handler */
136+
+ act.sa_handler = tick;
137+
+ sigemptyset(&act.sa_mask);
138+
+ act.sa_flags = 0;
139+
+ if (sigaction(SIGALRM, &act, NULL) == -1)
140+
+ err(2, "sigaction");
141+
+
142+
+ sigemptyset(&sigset);
143+
+ sigaddset(&sigset, SIGALRM);
144+
+ if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1)
145+
+ err(2, "pthread_sigmask");
146+
+
147+
+
148+
+ /* SIGALRM timer at 'hz' frequency */
149+
+ if (timer_create(CLOCK_REALTIME, NULL, &timer) == -1)
150+
+ err(2, "timer_create");
151+
+
152+
+ tv.it_interval.tv_nsec = 1000000000/hz;
153+
+ tv.it_interval.tv_sec = 0;
154+
+ tv.it_value = tv.it_interval;
155+
+
156+
+
157+
+ /* thread forking threads - this is an issue spotted on ubuntu-22.04 and
158+
+ * 24.04, as well as other architectures, that affects timer signal
159+
+ * delivrery. This seems to affect kernels from 6.4 to 6.11 inclusive. */
160+
+ thr(NULL);
161+
+
162+
+
163+
+ /* start timer */
164+
+ if (timer_settime(timer, 0, &tv, NULL) == -1)
165+
+ err(2, "timer_settime");
166+
+
167+
+ /* 100 periods delay */
168+
+ pts.tv_sec = 0;
169+
+ pts.tv_nsec = tv.it_interval.tv_nsec * 100; /* 100ms */
170+
+ while(pts.tv_nsec >= 1000000000) {
171+
+ pts.tv_nsec -= 1000000000;
172+
+ pts.tv_sec++;
173+
+ }
174+
+ /* for 1s */
175+
+ for (i = 0; i < 10; i++) {
176+
+ ts = pts;
177+
+ c1 = ticks;
178+
+ while (nanosleep(&ts, &rem) != 0 && errno == EINTR) ts = rem;
179+
+ c2 = ticks;
180+
+
181+
+ if (c1 == c2) {
182+
+ /* the counter is stuck, SIGALRM not firing anymore */
183+
+ fprintf(stderr, "SIGALRM issue after %d ticks\n", c1);
184+
+ return 2;
185+
+
186+
+ /* just resetting the timer at this point makes it work again: */
187+
+ /* if (timer_settime(timer, 0, &tv, NULL) == -1) */
188+
+ /* err(2, "timer_settime"); */
189+
+ /* but the issue will trigger again after some time */
190+
+
191+
+ /* also note that timer_gettime(timer, &tv) will show both correct
192+
+ * tv.it_interval and tv.it_value changing normally */
193+
+
194+
+ /* manually sending SIGALRM also still works: */
195+
+ /* raise(SIGALRM); */
196+
+ }
197+
+ }
198+
+
199+
+ return 0;
200+
}
201+
202+
_ACEOF

0 commit comments

Comments
 (0)