Skip to content

Commit fc891a1

Browse files
authored
Better Fork detection & handling (#96)
* Detect pid change and reset if so. * Fix log message formatting. * Remove unnecessary parens * Check queue size before more expensive can_send * Cleanup & simplify boot architecture * Simpler on import process * Clearer relationships and inter-access between Agent, Sensor, Meter & Tracer * Better fork detection and handling of forks * Restore boot message; remove unused import * Centralized singletons; safeties * Remove remnant debug lines * Linter; Better exceptions; Safety
1 parent d6a7b18 commit fc891a1

24 files changed

+209
-194
lines changed

instana/__init__.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,22 @@
44
from pkg_resources import get_distribution
55

66
"""
7-
The Instana package has two core components: the sensor and the tracer.
7+
The Instana package has two core components: the agent and the tracer.
88
9-
The sensor is individual to each python process and handles process metric
9+
The agent is individual to each python process and handles process metric
1010
collection and reporting.
1111
1212
The tracer upholds the OpenTracing API and is responsible for reporting
1313
span data to Instana.
14+
15+
The following outlines the hierarchy of classes for these two components.
16+
17+
Agent
18+
Sensor
19+
Meter
20+
21+
Tracer
22+
Recorder
1423
"""
1524

1625
__author__ = 'Instana Inc.'
@@ -33,23 +42,13 @@ def load(module):
3342
print("==========================================================")
3443

3544

36-
# Optional application wide service name.
37-
# Can be configured via environment variable or via code:
38-
#
39-
# export INSTANA_SERVICE_NAME=myservice
40-
# or
41-
# instana.service_name = "myservice"
42-
service_name = None
43-
4445
# User configurable EUM API key for instana.helpers.eum_snippet()
4546
eum_api_key = ''
4647

47-
if "INSTANA_SERVICE_NAME" in os.environ:
48-
service_name = os.environ["INSTANA_SERVICE_NAME"]
48+
import instana.singletons #noqa
4949

5050
if "INSTANA_DISABLE_AUTO_INSTR" not in os.environ:
5151
# Import & initialize instrumentation
52-
# noqa: ignore=W0611
5352
from .instrumentation import urllib3 # noqa
5453
from .instrumentation import sudsjurko # noqa
5554
from .instrumentation import mysqlpython # noqa

instana/agent.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
from __future__ import absolute_import
2+
13
import json
4+
import os
25
import threading
36
from datetime import datetime
47

5-
import instana.agent_const as a
6-
import instana.fsm as f
7-
from instana import log
8+
from .log import logger
9+
from .agent_const import AGENT_DEFAULT_HOST, AGENT_DEFAULT_PORT
10+
from .fsm import Fsm
11+
from .sensor import Sensor
12+
import instana.singletons
813

914
try:
1015
import urllib.request as urllib2
@@ -34,24 +39,25 @@ def get_method(self):
3439

3540
class Agent(object):
3641
sensor = None
37-
host = a.AGENT_DEFAULT_HOST
38-
port = a.AGENT_DEFAULT_PORT
42+
host = AGENT_DEFAULT_HOST
43+
port = AGENT_DEFAULT_PORT
3944
fsm = None
4045
from_ = From()
4146
last_seen = None
47+
last_fork_check = None
48+
_boot_pid = os.getpid()
4249

43-
def __init__(self, sensor):
44-
log.debug("initializing agent")
45-
46-
self.sensor = sensor
47-
self.fsm = f.Fsm(self)
50+
def __init__(self):
51+
logger.debug("initializing agent")
52+
self.sensor = Sensor(self)
53+
self.fsm = Fsm(self)
4854

4955
def to_json(self, o):
5056
try:
5157
return json.dumps(o, default=lambda o: {k.lower(): v for k, v in o.__dict__.items()},
5258
sort_keys=False, separators=(',', ':')).encode()
5359
except Exception as e:
54-
log.info("to_json: ", e, o)
60+
logger.info("to_json: ", e, o)
5561

5662
def is_timed_out(self):
5763
if self.last_seen and self.can_send:
@@ -61,7 +67,17 @@ def is_timed_out(self):
6167
return False
6268

6369
def can_send(self):
64-
return self.fsm.fsm.current == "good2go"
70+
# Watch for pid change in the case of ; if so, re-announce
71+
current_pid = os.getpid()
72+
if self._boot_pid != current_pid:
73+
self._boot_pid = current_pid
74+
self.handle_fork()
75+
return False
76+
77+
if (self.fsm.fsm.current == "good2go"):
78+
return True
79+
80+
return False
6581

6682
def head(self, url):
6783
return self.request(url, "HEAD", None)
@@ -96,7 +112,7 @@ def full_request_response(self, url, method, o, body, header):
96112
self.reset()
97113
else:
98114
if response.getcode() < 200 or response.getcode() >= 300:
99-
log.error("Request returned erroneous code", response.getcode())
115+
logger.error("Request returned erroneous code", response.getcode())
100116
if self.can_send():
101117
self.reset()
102118
else:
@@ -113,8 +129,8 @@ def full_request_response(self, url, method, o, body, header):
113129
# No need to show the initial 404s or timeouts. The agent
114130
# should handle those correctly.
115131
if not (type(e) is urllib2.HTTPError and e.code == 404):
116-
log.debug("%s: full_request_response: %s" %
117-
(threading.current_thread().name, str(e)))
132+
logger.debug("%s: full_request_response: %s" %
133+
(threading.current_thread().name, str(e)))
118134

119135
return (b, h)
120136

@@ -124,7 +140,7 @@ def make_url(self, prefix):
124140
def make_host_url(self, host, prefix):
125141
port = self.sensor.options.agent_port
126142
if port == 0:
127-
port = a.AGENT_DEFAULT_PORT
143+
port = AGENT_DEFAULT_PORT
128144

129145
return self.make_full_url(host, port, prefix)
130146

@@ -135,21 +151,20 @@ def make_full_url(self, host, port, prefix):
135151

136152
return s
137153

138-
def reset(self):
139-
self.last_seen = None
140-
self.from_ = From()
141-
self.fsm.reset()
142-
143-
def set_host(self, host):
144-
self.host = host
145-
146-
def set_port(self, port):
147-
self.port = port
148-
149154
def set_from(self, json_string):
150155
if type(json_string) is bytes:
151156
raw_json = json_string.decode("UTF-8")
152157
else:
153158
raw_json = json_string
154159

155160
self.from_ = From(**json.loads(raw_json))
161+
162+
def reset(self):
163+
self.last_seen = None
164+
self.from_ = From()
165+
self.fsm.reset()
166+
167+
def handle_fork(self):
168+
self.reset()
169+
self.sensor.handle_fork()
170+
instana.singletons.tracer.handle_fork()

instana/fsm.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import fysom as f
1111
from pkg_resources import get_distribution
1212

13-
from . import agent_const as a
14-
from . import log
13+
from .agent_const import AGENT_HEADER, AGENT_DISCOVERY_URL, AGENT_DATA_URL, AGENT_DEFAULT_HOST, AGENT_DEFAULT_PORT
14+
from .log import logger
1515

1616

1717
class Discovery(object):
@@ -44,8 +44,9 @@ class Fsm(object):
4444
warnedPeriodic = False
4545

4646
def __init__(self, agent):
47-
log.info("Stan is on the scene. Starting Instana instrumentation version", get_distribution('instana').version)
48-
log.debug("initializing fsm")
47+
logger.info("Stan is on the scene. Starting Instana instrumentation version: %s" %
48+
get_distribution('instana').version)
49+
logger.debug("initializing fsm")
4950

5051
self.agent = agent
5152
self.fsm = f.Fysom({
@@ -65,8 +66,8 @@ def __init__(self, agent):
6566
timer.start()
6667

6768
def printstatechange(self, e):
68-
log.debug('========= (%i#%s) FSM event: %s, src: %s, dst: %s ==========' %
69-
(os.getpid(), t.current_thread().name, e.event, e.src, e.dst))
69+
logger.debug('========= (%i#%s) FSM event: %s, src: %s, dst: %s ==========' %
70+
(os.getpid(), t.current_thread().name, e.event, e.src, e.dst))
7071

7172
def reset(self):
7273
self.fsm.lookup()
@@ -78,30 +79,30 @@ def lookup_agent_host(self, e):
7879
host, port = self.__get_agent_host_port()
7980

8081
h = self.check_host(host, port)
81-
if h == a.AGENT_HEADER:
82-
self.agent.set_host(host)
83-
self.agent.set_port(port)
82+
if h == AGENT_HEADER:
83+
self.agent.host = host
84+
self.agent.port = port
8485
self.fsm.announce()
8586
return True
8687
elif os.path.exists("/proc/"):
8788
host = self.get_default_gateway()
8889
if host:
8990
h = self.check_host(host, port)
90-
if h == a.AGENT_HEADER:
91-
self.agent.set_host(host)
92-
self.agent.set_port(port)
91+
if h == AGENT_HEADER:
92+
self.agent.host = host
93+
self.agent.port = port
9394
self.fsm.announce()
9495
return True
9596

9697
if (self.warnedPeriodic is False):
97-
log.warn("Instana Host Agent couldn't be found. Will retry periodically...")
98+
logger.warn("Instana Host Agent couldn't be found. Will retry periodically...")
9899
self.warnedPeriodic = True
99100

100101
self.schedule_retry(self.lookup_agent_host, e, "agent_lookup")
101102
return False
102103

103104
def get_default_gateway(self):
104-
log.debug("checking default gateway")
105+
logger.debug("checking default gateway")
105106

106107
try:
107108
proc = subprocess.Popen(
@@ -111,20 +112,20 @@ def get_default_gateway(self):
111112
addr = proc.stdout.read()
112113
return addr.decode("UTF-8")
113114
except Exception as e:
114-
log.error(e)
115+
logger.error(e)
115116

116117
return None
117118

118119
def check_host(self, host, port):
119-
log.debug("checking %s:%d" % (host, port))
120+
logger.debug("checking %s:%d" % (host, port))
120121

121122
(_, h) = self.agent.request_header(
122123
self.agent.make_host_url(host, "/"), "GET", "Server")
123124

124125
return h
125126

126127
def announce_sensor(self, e):
127-
log.debug("announcing sensor to the agent")
128+
logger.debug("announcing sensor to the agent")
128129
s = None
129130
pid = os.getpid()
130131
cmdline = []
@@ -144,9 +145,9 @@ def announce_sensor(self, e):
144145
(out, err) = proc.communicate()
145146
parts = out.split(b'\n')
146147
cmdline = [parts[1].decode("utf-8")]
147-
except Exception as err:
148+
except Exception:
148149
cmdline = sys.argv
149-
log.debug(err)
150+
logger.debug("announce_sensor", exc_info=True)
150151

151152
d = Discovery(pid=self.__get_real_pid(),
152153
name=cmdline[0],
@@ -161,29 +162,29 @@ def announce_sensor(self, e):
161162
d.inode = os.readlink(path)
162163

163164
(b, _) = self.agent.request_response(
164-
self.agent.make_url(a.AGENT_DISCOVERY_URL), "PUT", d)
165+
self.agent.make_url(AGENT_DISCOVERY_URL), "PUT", d)
165166
if b:
166167
self.agent.set_from(b)
167168
self.fsm.ready()
168-
log.info("Host agent available. We're in business. Announced pid: %i (true pid: %i)" %
169-
(pid, self.agent.from_.pid))
169+
logger.info("Host agent available. We're in business. Announced pid: %s (true pid: %s)" %
170+
(str(pid), str(self.agent.from_.pid)))
170171
return True
171172
else:
172-
log.debug("Cannot announce sensor. Scheduling retry.")
173+
logger.debug("Cannot announce sensor. Scheduling retry.")
173174
self.schedule_retry(self.announce_sensor, e, "announce")
174175
return False
175176

176177
def schedule_retry(self, fun, e, name):
177-
log.debug("Scheduling: " + name)
178+
logger.debug("Scheduling: " + name)
178179
self.timer = t.Timer(self.RETRY_PERIOD, fun, [e])
179180
self.timer.daemon = True
180181
self.timer.name = name
181182
self.timer.start()
182183

183184
def test_agent(self, e):
184-
log.debug("testing communication with the agent")
185+
logger.debug("testing communication with the agent")
185186

186-
(b, _) = self.agent.head(self.agent.make_url(a.AGENT_DATA_URL))
187+
(b, _) = self.agent.head(self.agent.make_url(AGENT_DATA_URL))
187188

188189
if not b:
189190
self.schedule_retry(self.test_agent, e, "agent test")
@@ -209,7 +210,7 @@ def __get_real_pid(self):
209210
if len(g.groups()) == 1:
210211
pid = int(g.groups()[0])
211212
except Exception:
212-
log.debug("parsing sched file failed", exc_info=True)
213+
logger.debug("parsing sched file failed", exc_info=True)
213214
pass
214215

215216
if pid is None:
@@ -222,8 +223,8 @@ def __get_agent_host_port(self):
222223
Iterates the the various ways the host and port of the Instana host
223224
agent may be configured: default, env vars, sensor options...
224225
"""
225-
host = a.AGENT_DEFAULT_HOST
226-
port = a.AGENT_DEFAULT_PORT
226+
host = AGENT_DEFAULT_HOST
227+
port = AGENT_DEFAULT_PORT
227228

228229
if "INSTANA_AGENT_HOST" in os.environ:
229230
host = os.environ["INSTANA_AGENT_HOST"]

instana/helpers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from string import Template
33

44
from instana import eum_api_key as global_eum_api_key
5-
from instana.tracer import internal_tracer
5+
from .singletons import tracer
66
from instana.log import logger
77

88
# Usage:
@@ -33,7 +33,7 @@ def eum_snippet(trace_id=None, eum_api_key=None, meta={}):
3333
ids = {}
3434
ids['meta_kvs'] = ''
3535

36-
parent_span = internal_tracer.active_span
36+
parent_span = tracer.active_span
3737

3838
if trace_id or parent_span:
3939
ids['trace_id'] = trace_id or parent_span.trace_id
@@ -78,7 +78,7 @@ def eum_test_snippet(trace_id=None, eum_api_key=None, meta={}):
7878
ids = {}
7979
ids['meta_kvs'] = ''
8080

81-
parent_span = internal_tracer.active_span
81+
parent_span = tracer.active_span
8282
if trace_id or parent_span:
8383
ids['trace_id'] = trace_id or parent_span.trace_id
8484
else:

0 commit comments

Comments
 (0)