77import sys
88import threading
99from types import ModuleType
10+ from fysom import FysomError
1011
1112from pkg_resources import DistributionNotFound , get_distribution
1213
@@ -105,16 +106,19 @@ def to_dict(self):
105106
106107class Meter (object ):
107108 SNAPSHOT_PERIOD = 600
108- snapshot_countdown = 0
109+ THREAD_NAME = "Instana Metric Collection"
109110
110111 # The agent that this instance belongs to
111112 agent = None
112113
114+ # We send Snapshot data every 10 minutes. This is the countdown variable.
115+ snapshot_countdown = 0
116+
113117 last_usage = None
114118 last_collect = None
115119 last_metrics = None
116120 djmw = None
117- thr = None
121+ thread = None
118122
119123 # A True value signals the metric reporting thread to shutdown
120124 _shutdown = False
@@ -123,33 +127,57 @@ def __init__(self, agent):
123127 self .agent = agent
124128 pass
125129
126- def run (self ):
127- """ Spawns the metric reporting thread """
128- self .thr = threading .Thread (target = self .collect_and_report )
129- self .thr .daemon = True
130- self .thr .name = "Instana Metric Collection"
131- self .thr .start ()
130+ def start (self ):
131+ """
132+ This function can be called at first boot or after a fork. In either case, it will
133+ assure that the Meter is in a proper state (via reset()) and spawn a new background
134+ thread to periodically report queued spans
135+
136+ Note that this will abandon any previous thread object that (in the case of an `os.fork()`)
137+ should no longer exist in the forked process.
138+
139+ (Forked processes carry forward only the thread that called `os.fork()`
140+ into the new process space. All other background threads need to be recreated.)
141+
142+ Calling this directly more than once without an actual fork will cause errors.
143+ """
144+ self .reset ()
145+
146+ if self .thread .isAlive () is False :
147+ self .thread .start ()
132148
133149 def reset (self ):
134150 """" Reset the state as new """
135151 self .last_usage = None
136152 self .last_collect = None
137153 self .last_metrics = None
138154 self .snapshot_countdown = 0
155+ self .thread = None
156+
157+ # Prepare the thread for metric collection/reporting
158+ for thread in threading .enumerate ():
159+ if thread .getName () == self .THREAD_NAME :
160+ # Metric thread already exists; Make sure we re-use this one.
161+ self .thread = thread
162+
163+ if self .thread is None :
164+ self .thread = threading .Thread (target = self .collect_and_report )
165+ self .thread .daemon = True
166+ self .thread .name = self .THREAD_NAME
139167
140168 def handle_fork (self ):
141- self .reset ()
142- self .run ()
169+ self .start ()
143170
144171 def collect_and_report (self ):
145172 """
146173 Target function for the metric reporting thread. This is a simple loop to
147174 collect and report entity data every 1 second.
148175 """
149- logger .debug ("Metric reporting thread is now alive" )
176+ logger .debug (" -> Metric reporting thread is now alive" )
150177
151178 def metric_work ():
152179 self .process ()
180+
153181 if self .agent .is_timed_out ():
154182 logger .warn ("Host agent offline for >1 min. Going to sit in a corner..." )
155183 self .agent .reset ()
@@ -160,12 +188,17 @@ def metric_work():
160188
161189 def process (self ):
162190 """ Collects, processes & reports metrics """
163- if self .agent .machine .fsm .current is "wait4init" :
164- # Test the host agent if we're ready to send data
165- if self .agent .is_agent_ready ():
166- self .agent .machine .fsm .ready ()
167- else :
168- return
191+ try :
192+ if self .agent .machine .fsm .current is "wait4init" :
193+ # Test the host agent if we're ready to send data
194+ if self .agent .is_agent_ready ():
195+ if self .agent .machine .fsm .current is not "good2go" :
196+ self .agent .machine .fsm .ready ()
197+ else :
198+ return
199+ except FysomError :
200+ logger .debug ('Harmless state machine thread disagreement. Will self-correct on next timer cycle.' )
201+ return
169202
170203 if self .agent .can_send ():
171204 self .snapshot_countdown = self .snapshot_countdown - 1
0 commit comments