@@ -91,7 +91,19 @@ typedef struct {
9191 int64_t delta_usec ;
9292} sample_time_t ;
9393
94+ /* We need to ensure that various memory operations are visible across
95+ * threads. Ruby doesn't offer a portable way to do this sort of detection
96+ * across all the Ruby versions we support, so we use something that casts a
97+ * wide net (Clang, along with ICC, defines __GNUC__). */
98+ #if defined(__GNUC__ ) && defined(__ATOMIC_SEQ_CST )
99+ #define STACKPROF_HAVE_ATOMICS 1
100+ #else
101+ #define STACKPROF_HAVE_ATOMICS 0
102+ #endif
103+
94104static struct {
105+ /* Access this field with the `STACKPROF_RUNNING` macro, below, since we
106+ * can't properly express that this field has an atomic type. */
95107 int running ;
96108 int raw ;
97109 int aggregate ;
@@ -133,6 +145,12 @@ static struct {
133145 pthread_t target_thread ;
134146} _stackprof ;
135147
148+ #if STACKPROF_HAVE_ATOMICS
149+ #define STACKPROF_RUNNING () __atomic_load_n(&_stackprof.running, __ATOMIC_ACQUIRE)
150+ #else
151+ #define STACKPROF_RUNNING () _stackprof.running
152+ #endif
153+
136154static VALUE sym_object , sym_wall , sym_cpu , sym_custom , sym_name , sym_file , sym_line ;
137155static VALUE sym_samples , sym_total_samples , sym_missed_samples , sym_edges , sym_lines ;
138156static VALUE sym_version , sym_mode , sym_interval , sym_raw , sym_raw_lines , sym_metadata , sym_frames , sym_ignore_gc , sym_out ;
@@ -154,7 +172,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
154172 int raw = 0 , aggregate = 1 ;
155173 VALUE metadata_val ;
156174
157- if (_stackprof . running )
175+ if (STACKPROF_RUNNING () )
158176 return Qfalse ;
159177
160178 rb_scan_args (argc , argv , "0:" , & opts );
@@ -217,7 +235,6 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
217235 rb_raise (rb_eArgError , "unknown profiler mode" );
218236 }
219237
220- _stackprof .running = 1 ;
221238 _stackprof .raw = raw ;
222239 _stackprof .aggregate = aggregate ;
223240 _stackprof .mode = mode ;
@@ -226,6 +243,13 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
226243 _stackprof .metadata = metadata ;
227244 _stackprof .out = out ;
228245 _stackprof .target_thread = pthread_self ();
246+ /* We need to ensure previous initialization stores are visible across
247+ * threads. */
248+ #if STACKPROF_HAVE_ATOMICS
249+ __atomic_store_n (& _stackprof .running , 1 , __ATOMIC_SEQ_CST );
250+ #else
251+ _stackprof .running = 1 ;
252+ #endif
229253
230254 if (raw ) {
231255 capture_timestamp (& _stackprof .last_sample_at );
@@ -240,9 +264,15 @@ stackprof_stop(VALUE self)
240264 struct sigaction sa ;
241265 struct itimerval timer ;
242266
267+ #if STACKPROF_HAVE_ATOMICS
268+ int was_running = __atomic_exchange_n (& _stackprof .running , 0 , __ATOMIC_SEQ_CST );
269+ if (!was_running )
270+ return Qfalse ;
271+ #else
243272 if (!_stackprof .running )
244273 return Qfalse ;
245274 _stackprof .running = 0 ;
275+ #endif
246276
247277 if (_stackprof .mode == sym_object ) {
248278 rb_tracepoint_disable (objtracer );
@@ -351,7 +381,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
351381{
352382 VALUE results , frames ;
353383
354- if (!_stackprof .frames || _stackprof . running )
384+ if (!_stackprof .frames || STACKPROF_RUNNING () )
355385 return Qnil ;
356386
357387 results = rb_hash_new ();
@@ -455,7 +485,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
455485static VALUE
456486stackprof_running_p (VALUE self )
457487{
458- return _stackprof . running ? Qtrue : Qfalse ;
488+ return STACKPROF_RUNNING () ? Qtrue : Qfalse ;
459489}
460490
461491static inline frame_data_t *
@@ -719,23 +749,23 @@ stackprof_sample_and_record(void)
719749static void
720750stackprof_job_record_gc (void * data )
721751{
722- if (!_stackprof . running ) return ;
752+ if (!STACKPROF_RUNNING () ) return ;
723753
724754 stackprof_record_gc_samples ();
725755}
726756
727757static void
728758stackprof_job_sample_and_record (void * data )
729759{
730- if (!_stackprof . running ) return ;
760+ if (!STACKPROF_RUNNING () ) return ;
731761
732762 stackprof_sample_and_record ();
733763}
734764
735765static void
736766stackprof_job_record_buffer (void * data )
737767{
738- if (!_stackprof . running ) return ;
768+ if (!STACKPROF_RUNNING () ) return ;
739769
740770 stackprof_record_buffer ();
741771}
@@ -747,7 +777,7 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
747777
748778 _stackprof .overall_signals ++ ;
749779
750- if (!_stackprof . running ) return ;
780+ if (!STACKPROF_RUNNING () ) return ;
751781
752782 // There's a possibility that the signal handler is invoked *after* the Ruby
753783 // VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
@@ -810,7 +840,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
810840static VALUE
811841stackprof_sample (VALUE self )
812842{
813- if (!_stackprof . running )
843+ if (!STACKPROF_RUNNING () )
814844 return Qfalse ;
815845
816846 _stackprof .overall_signals ++ ;
@@ -854,7 +884,7 @@ static void
854884stackprof_atfork_prepare (void )
855885{
856886 struct itimerval timer ;
857- if (_stackprof . running ) {
887+ if (STACKPROF_RUNNING () ) {
858888 if (_stackprof .mode == sym_wall || _stackprof .mode == sym_cpu ) {
859889 memset (& timer , 0 , sizeof (timer ));
860890 setitimer (_stackprof .mode == sym_wall ? ITIMER_REAL : ITIMER_PROF , & timer , 0 );
@@ -866,7 +896,7 @@ static void
866896stackprof_atfork_parent (void )
867897{
868898 struct itimerval timer ;
869- if (_stackprof . running ) {
899+ if (STACKPROF_RUNNING () ) {
870900 if (_stackprof .mode == sym_wall || _stackprof .mode == sym_cpu ) {
871901 timer .it_interval .tv_sec = 0 ;
872902 timer .it_interval .tv_usec = NUM2LONG (_stackprof .interval );
0 commit comments