Skip to content

Commit 90f7fa0

Browse files
authored
Opt-in config and logic for deferral of instrumentation to only WSGI worker processes (#243)
### Issue: OTel Python [has issues](open-telemetry/opentelemetry-python#2767) where the SDK is unable to report metrics for applications using a fork process model WSGI server. This affects ADOT when it tries to generate the OTel or Application Signals metrics. A solution to this is to [re-initialize the SDK in the worker processes after the process forking as happened](https://opentelemetry-python.readthedocs.io/en/latest/examples/fork-process-model/README.html). A small caveat is that if the SDK has been initialized in the master process, the worker process SDK won't work because Tracer/Meter providers can be set globally only once. So to circumvent this, we need to skip initializing the SDK in the master process and only do so in the worker processes. ### Description of changes: - Introducing an opt-in configuration environment variable `OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED` to enable if they are using a WSGI (or a fork process model) server and want the ADOT SDK to defer auto-instrumentation to worker processes. - Whenever the ADOT SDK auto-instrumentation is loaded (either via the `sitecustomize.py` file or the `opentelemetry-instrument` command), the SDK will check if the above configuration is enabled and if the current process is the master process, and will skip the instrumentation. - The way we determine if the current process is master or worker is by using an internal marker environment variable `IS_WSGI_MASTER_PROCESS_ALREADY_SEEN`. The first time the ADOT SDK sees a python process, this env var is not set and it will know this should be a WSGI master process. We then set the env var and when a new worker process forks, the master environment is copied to it (and so the env var). So when the ADOT SDK checks this env var again (in worker) it finds that the env var was already set to `true` in the master. ### Testing: - Unit tests covering the functionalities bases on different configurations of the `OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED` and `IS_WSGI_MASTER_PROCESS_ALREADY_SEEN` variables. - Manual test using a sample application. Since this is an opt-in configuration (a 2-way door), testing manually gives us a fair bit of confidence. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 1c675d3 commit 90f7fa0

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
DEFAULT_METRIC_EXPORT_INTERVAL = 60000.0
6666
AWS_LAMBDA_FUNCTION_NAME_CONFIG = "AWS_LAMBDA_FUNCTION_NAME"
6767
AWS_XRAY_DAEMON_ADDRESS_CONFIG = "AWS_XRAY_DAEMON_ADDRESS"
68+
OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED_CONFIG = "OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED"
6869

6970
_logger: Logger = getLogger(__name__)
7071

@@ -85,6 +86,11 @@ class AwsOpenTelemetryConfigurator(_OTelSDKConfigurator):
8586
# pylint: disable=no-self-use
8687
@override
8788
def _configure(self, **kwargs):
89+
if _is_defer_to_workers_enabled() and _is_wsgi_master_process():
90+
_logger.info(
91+
"Skipping ADOT initialization since deferral to worker is enabled, and this is a master process."
92+
)
93+
return
8894
_initialize_components()
8995

9096

@@ -156,6 +162,27 @@ def _init_tracing(
156162
# END The OpenTelemetry Authors code
157163

158164

165+
def _is_defer_to_workers_enabled():
166+
return os.environ.get(OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED_CONFIG, "false").strip().lower() == "true"
167+
168+
169+
def _is_wsgi_master_process():
170+
# Since the auto-instrumentation loads whenever a process is created and due to known issues with instrumenting
171+
# WSGI apps using OTel, we want to skip the instrumentation of master process.
172+
# This function is used to identify if the current process is a WSGI server's master process or not.
173+
# Typically, a WSGI fork process model server spawns a single master process and multiple worker processes.
174+
# When the master process starts, we use an environment variable as a marker. Since child worker processes inherit
175+
# the master process environment, checking this marker in worker will tell that master process has been seen.
176+
# Note: calling this function more than once in the same master process will return incorrect result.
177+
# So use carefully.
178+
if os.environ.get("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", "false").lower() == "true":
179+
_logger.info("pid %s identified as a worker process", str(os.getpid()))
180+
return False
181+
os.environ["IS_WSGI_MASTER_PROCESS_ALREADY_SEEN"] = "true"
182+
_logger.info("pid %s identified as a master process", str(os.getpid()))
183+
return True
184+
185+
159186
def _exclude_urls_for_instrumentations():
160187
urls_to_exclude_instr = "SamplingTargets,GetSamplingRules"
161188
requests_excluded_urls = os.environ.pop("OTEL_PYTHON_REQUESTS_EXCLUDED_URLS", "")

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
_customize_sampler,
1717
_customize_span_processors,
1818
_is_application_signals_enabled,
19+
_is_defer_to_workers_enabled,
20+
_is_wsgi_master_process,
1921
)
2022
from amazon.opentelemetry.distro.aws_opentelemetry_distro import AwsOpenTelemetryDistro
2123
from amazon.opentelemetry.distro.aws_span_metrics_processor import AwsSpanMetricsProcessor
@@ -305,6 +307,50 @@ def test_application_signals_exporter_provider(self):
305307
self.assertEqual("127.0.0.1:2000", exporter._udp_exporter._endpoint)
306308
os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None)
307309

310+
def test_is_defer_to_workers_enabled(self):
311+
os.environ.setdefault("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", "True")
312+
self.assertTrue(_is_defer_to_workers_enabled())
313+
os.environ.pop("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", None)
314+
315+
os.environ.setdefault("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", "False")
316+
self.assertFalse(_is_defer_to_workers_enabled())
317+
os.environ.pop("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", None)
318+
self.assertFalse(_is_defer_to_workers_enabled())
319+
320+
def test_is_wsgi_master_process_first_time(self):
321+
self.assertTrue(_is_wsgi_master_process())
322+
self.assertEqual(os.environ["IS_WSGI_MASTER_PROCESS_ALREADY_SEEN"], "true")
323+
os.environ.pop("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", None)
324+
325+
@patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._initialize_components")
326+
def test_initialize_components_skipped_in_master_when_deferred_enabled(self, mock_initialize_components):
327+
os.environ.setdefault("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", "True")
328+
os.environ.pop("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", None)
329+
self.assertTrue(_is_defer_to_workers_enabled())
330+
AwsOpenTelemetryConfigurator()._configure()
331+
mock_initialize_components.assert_not_called()
332+
os.environ.pop("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", None)
333+
os.environ.pop("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", None)
334+
335+
@patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._initialize_components")
336+
def test_initialize_components_called_in_worker_when_deferred_enabled(self, mock_initialize_components):
337+
os.environ.setdefault("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", "True")
338+
os.environ.setdefault("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", "true")
339+
self.assertTrue(_is_defer_to_workers_enabled())
340+
self.assertFalse(_is_wsgi_master_process())
341+
AwsOpenTelemetryConfigurator()._configure()
342+
mock_initialize_components.assert_called_once()
343+
os.environ.pop("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", None)
344+
os.environ.pop("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", None)
345+
346+
@patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._initialize_components")
347+
def test_initialize_components_called_when_deferred_disabled(self, mock_initialize_components):
348+
os.environ.pop("OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED", None)
349+
self.assertFalse(_is_defer_to_workers_enabled())
350+
AwsOpenTelemetryConfigurator()._configure()
351+
mock_initialize_components.assert_called_once()
352+
os.environ.pop("IS_WSGI_MASTER_PROCESS_ALREADY_SEEN", None)
353+
308354

309355
def validate_distro_environ():
310356
tc: TestCase = TestCase()

0 commit comments

Comments
 (0)