Skip to content

Commit 9d1ff4b

Browse files
xrmxtheletterf
andauthored
Handle log level change from environment variable (#397)
* distro: introduce handling of ELASTIC_OTEL_LOG_LEVEL That this our version of OTEL_LOG_LEVEL with the values supported by the central config logging_level setting. * Fix default logging level * Update configuration docs * Add tests * Please pyright * Always set the logging level * Update docs/reference/edot-python/configuration.md * Update docs/reference/edot-python/configuration.md * Switch to OTEL_LOG_LEVEL --------- Co-authored-by: Fabrizio Ferri-Benedetti <[email protected]>
1 parent 1d2fb2e commit 9d1ff4b

File tree

4 files changed

+121
-28
lines changed

4 files changed

+121
-28
lines changed

docs/reference/edot-python/configuration.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,23 +90,27 @@ EDOT Python uses different defaults than OpenTelemetry Python for the following
9090
| Option | EDOT Python default | OpenTelemetry Python default |
9191
|---|---|---|
9292
| `OTEL_EXPERIMENTAL_RESOURCE_DETECTORS` | `process_runtime,os,otel,telemetry_distro,service_instance,containerid,_gcp,aws_ec2,aws_ecs,aws_elastic_beanstalk,azure_app_service,azure_vm` | `otel` |
93-
| `OTEL_METRICS_EXEMPLAR_FILTER` | `always_off` | `trace_based` |
9493
| `OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE` | `DELTA` | `CUMULATIVE` |
94+
| `OTEL_LOG_LEVEL` | `warn` | Not handled |
95+
| `OTEL_METRICS_EXEMPLAR_FILTER` | `always_off` | `trace_based` |
9596
| `OTEL_TRACES_SAMPLER` | `parentbased_traceidratio` | `parentbased_always_on` |
9697
| `OTEL_TRACES_SAMPLER_ARG` | `1.0` | |
9798

9899
:::{note}
99100
`OTEL_EXPERIMENTAL_RESOURCE_DETECTORS` cloud resource detectors are dynamically set. When running in a Kubernetes Pod it will be set to `process_runtime,os,otel,telemetry_distro,service_instance,_gcp,aws_eks`.
100101
:::
101102

103+
:::{note}
104+
`OTEL_LOG_LEVEL` accepts the following levels: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, `off`.
105+
:::
102106

103107
### Configuration options only available in EDOT Python
104108

105109
`ELASTIC_OTEL_` options are specific to Elastic and will always live in EDOT Python include the following.
106110

107111
| Option(s) | Default | Description |
108112
|---|---|---|
109-
| `ELASTIC_OTEL_SYSTEM_METRICS_ENABLED` | `false` | When sets to `true`, sends *system namespace* metrics. |
113+
| `ELASTIC_OTEL_SYSTEM_METRICS_ENABLED` | `false` | When set to `true`, sends *system namespace* metrics. |
110114

111115
## LLM settings
112116

src/elasticotel/distro/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,12 @@
5353
from opentelemetry._opamp.proto import opamp_pb2 as opamp_pb2
5454

5555
from elasticotel.distro import version
56-
from elasticotel.distro.environment_variables import ELASTIC_OTEL_OPAMP_ENDPOINT, ELASTIC_OTEL_SYSTEM_METRICS_ENABLED
56+
from elasticotel.distro.environment_variables import (
57+
ELASTIC_OTEL_OPAMP_ENDPOINT,
58+
ELASTIC_OTEL_SYSTEM_METRICS_ENABLED,
59+
)
5760
from elasticotel.distro.resource_detectors import get_cloud_resource_detectors
58-
from elasticotel.distro.config import opamp_handler, DEFAULT_SAMPLING_RATE
61+
from elasticotel.distro.config import opamp_handler, DEFAULT_SAMPLING_RATE, _initialize_config
5962

6063

6164
logger = logging.getLogger(__name__)
@@ -88,6 +91,9 @@ def _configure(self, **kwargs):
8891
}
8992
super()._configure(**kwargs)
9093

94+
# set our local config based on environment variables
95+
_initialize_config()
96+
9197
enable_opamp = False
9298
endpoint = os.environ.get(ELASTIC_OTEL_OPAMP_ENDPOINT)
9399
if endpoint:

src/elasticotel/distro/config.py

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,16 @@
1717
from __future__ import annotations
1818

1919
import logging
20+
import os
2021
from dataclasses import dataclass
2122

2223
from opentelemetry import trace
23-
24-
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
2524
from opentelemetry._opamp import messages
2625
from opentelemetry._opamp.agent import OpAMPAgent
2726
from opentelemetry._opamp.client import OpAMPClient
2827
from opentelemetry._opamp.proto import opamp_pb2 as opamp_pb2
28+
from opentelemetry.sdk.environment_variables import OTEL_LOG_LEVEL, OTEL_TRACES_SAMPLER_ARG
29+
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
2930

3031

3132
logger = logging.getLogger(__name__)
@@ -41,33 +42,69 @@
4142
}
4243

4344
DEFAULT_SAMPLING_RATE = 1.0
44-
DEFAULT_LOGGING_LEVEL = "info"
45+
DEFAULT_LOGGING_LEVEL = "warn"
4546

4647
LOGGING_LEVEL_CONFIG_KEY = "logging_level"
4748
SAMPLING_RATE_CONFIG_KEY = "sampling_rate"
4849

50+
_config: Config | None = None
51+
4952

50-
@dataclass
5153
class ConfigItem:
52-
value: str
54+
def __init__(self, default: str, from_env_var: str | None = None):
55+
self._default = default
56+
self._env_var = from_env_var
57+
58+
def init(self):
59+
if self._env_var is not None:
60+
value = os.environ.get(self._env_var, self._default)
61+
else:
62+
value = self._default
63+
self.value = value
64+
65+
def update(self, value: str):
66+
"""Update value"""
67+
self.value = value
68+
69+
def reset(self) -> str:
70+
"""Value is not good, reset to default"""
71+
self.value = self._default
72+
return self.value
5373

5474

5575
@dataclass
5676
class ConfigUpdate:
5777
error_message: str = ""
5878

5979

60-
# TODO: this should grow into a proper configuration store initialized from env vars and so on
80+
# TODO: this should grow into a proper configuration store in the OpenTelemetry SDK
6181
@dataclass
6282
class Config:
63-
sampling_rate = ConfigItem(value=str(DEFAULT_SAMPLING_RATE))
64-
logging_level = ConfigItem(value=DEFAULT_LOGGING_LEVEL)
83+
sampling_rate = ConfigItem(default=str(DEFAULT_SAMPLING_RATE), from_env_var=OTEL_TRACES_SAMPLER_ARG)
84+
# currently the sdk does not handle OTEL_LOG_LEVEL, so we use handle it on our own
85+
logging_level = ConfigItem(default=DEFAULT_LOGGING_LEVEL, from_env_var=OTEL_LOG_LEVEL)
6586

6687
def to_dict(self):
6788
return {LOGGING_LEVEL_CONFIG_KEY: self.logging_level.value, SAMPLING_RATE_CONFIG_KEY: self.sampling_rate.value}
6889

90+
def _handle_logging(self):
91+
# do validation, we only validate logging_level because sampling_rate is handled by the sdk already
92+
logging_level = _LOG_LEVELS_MAP.get(self.logging_level.value)
93+
if logging_level is None:
94+
logger.error("Logging level not handled: %s", self.logging_level.value)
95+
self.logging_level.reset()
96+
return
6997

70-
_config = Config()
98+
# apply logging_level changes since these are not handled by the sdk
99+
logging.getLogger("opentelemetry").setLevel(logging_level)
100+
logging.getLogger("elasticotel").setLevel(logging_level)
101+
102+
def __post_init__(self):
103+
# we need to initialize each config item when we instantiate the Config and not at declaration time
104+
self.sampling_rate.init()
105+
self.logging_level.init()
106+
107+
self._handle_logging()
71108

72109

73110
def _handle_logging_level(remote_config) -> ConfigUpdate:
@@ -83,7 +120,8 @@ def _handle_logging_level(remote_config) -> ConfigUpdate:
83120
# update upstream and distro logging levels
84121
logging.getLogger("opentelemetry").setLevel(logging_level)
85122
logging.getLogger("elasticotel").setLevel(logging_level)
86-
_config.logging_level = ConfigItem(value=config_logging_level)
123+
if _config:
124+
_config.logging_level.update(value=config_logging_level)
87125
error_message = ""
88126
return ConfigUpdate(error_message=error_message)
89127

@@ -118,14 +156,22 @@ def _handle_sampling_rate(remote_config) -> ConfigUpdate:
118156
root_sampler._rate = sampling_rate # type: ignore[reportAttributeAccessIssue]
119157
root_sampler._bound = root_sampler.get_bound_for_rate(root_sampler._rate) # type: ignore[reportAttributeAccessIssue]
120158
logger.debug("Updated sampler rate to %s", sampling_rate)
121-
_config.sampling_rate = ConfigItem(value=config_sampling_rate)
159+
if _config:
160+
_config.sampling_rate.update(value=config_sampling_rate)
122161
return ConfigUpdate()
123162

124163

125164
def _report_full_state(message: opamp_pb2.ServerToAgent):
126165
return message.flags & opamp_pb2.ServerToAgentFlags_ReportFullState
127166

128167

168+
def _initialize_config():
169+
"""This is called by the SDK Configurator"""
170+
global _config
171+
_config = Config()
172+
return _config
173+
174+
129175
def _get_config():
130176
global _config
131177
return _config
@@ -164,8 +210,9 @@ def opamp_handler(agent: OpAMPAgent, client: OpAMPClient, message: opamp_pb2.Ser
164210
)
165211

166212
# update the cached effective config with what we updated
167-
effective_config = {"elastic": _config.to_dict()}
168-
client._update_effective_config(effective_config)
213+
if _config:
214+
effective_config = {"elastic": _config.to_dict()}
215+
client._update_effective_config(effective_config)
169216

170217
# if we changed the config send an ack to the server so we don't receive the same config at every heartbeat response
171218
if updated_remote_config is not None:

tests/distro/test_distro.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,41 @@ def test_configurator_ignores_opamp_without_endpoint(self, client_mock, agent_mo
235235
client_mock.assert_not_called()
236236
agent_mock.assert_not_called()
237237

238+
@mock.patch.dict("os.environ", {"OTEL_LOG_LEVEL": "debug"}, clear=True)
239+
@mock.patch("elasticotel.distro.config.logger")
240+
def test_configurator_applies_elastic_otel_log_level(self, logger_mock):
241+
ElasticOpenTelemetryConfigurator()._configure()
242+
243+
logger_mock.error.assert_not_called()
244+
245+
self.assertEqual(logging.getLogger("opentelemetry").getEffectiveLevel(), logging.DEBUG)
246+
self.assertEqual(logging.getLogger("elasticotel").getEffectiveLevel(), logging.DEBUG)
247+
248+
@mock.patch.dict("os.environ", {}, clear=True)
249+
@mock.patch("elasticotel.distro.config.logger")
250+
def test_configurator_handles_elastic_otel_log_level_not_set(self, logger_mock):
251+
ElasticOpenTelemetryConfigurator()._configure()
252+
253+
logger_mock.error.assert_not_called()
254+
255+
self.assertEqual(logging.getLogger("opentelemetry").getEffectiveLevel(), logging.WARNING)
256+
self.assertEqual(logging.getLogger("elasticotel").getEffectiveLevel(), logging.WARNING)
257+
258+
@mock.patch.dict("os.environ", {"OTEL_LOG_LEVEL": "invalid"}, clear=True)
259+
def test_configurator_handles_invalid_elastic_otel_log_level(self):
260+
with self.assertLogs("elasticotel", level="ERROR") as cm:
261+
ElasticOpenTelemetryConfigurator()._configure()
262+
263+
self.assertEqual(
264+
cm.output,
265+
[
266+
"ERROR:elasticotel.distro.config:Logging level not handled: invalid",
267+
],
268+
)
269+
270+
self.assertEqual(logging.getLogger("opentelemetry").getEffectiveLevel(), logging.WARNING)
271+
self.assertEqual(logging.getLogger("elasticotel").getEffectiveLevel(), logging.WARNING)
272+
238273

239274
class TestOpAMPHandler(TestCase):
240275
@mock.patch.object(logging, "getLogger")
@@ -247,8 +282,9 @@ def test_does_nothing_without_remote_config(self, get_logger_mock):
247282
get_logger_mock.assert_not_called()
248283

249284
@mock.patch("elasticotel.distro.config._get_config")
285+
@mock.patch.object(Config, "_handle_logging")
250286
@mock.patch.object(logging, "getLogger")
251-
def test_ignores_non_elastic_filename(self, get_logger_mock, get_config_mock):
287+
def test_ignores_non_elastic_filename(self, get_logger_mock, handle_logging_mock, get_config_mock):
252288
get_config_mock.return_value = Config()
253289
agent = mock.Mock()
254290
client = mock.Mock()
@@ -265,7 +301,7 @@ def test_ignores_non_elastic_filename(self, get_logger_mock, get_config_mock):
265301
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
266302
)
267303
client._update_effective_config.assert_called_once_with(
268-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
304+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
269305
)
270306
client._build_remote_config_status_response_message.assert_called_once_with(
271307
client._update_remote_config_status()
@@ -318,17 +354,17 @@ def test_sets_logging_to_default_info_without_logging_level_entry_in_config(self
318354
get_logger_mock.assert_has_calls(
319355
[
320356
mock.call("opentelemetry"),
321-
mock.call().setLevel(logging.INFO),
357+
mock.call().setLevel(logging.WARNING),
322358
mock.call("elasticotel"),
323-
mock.call().setLevel(logging.INFO),
359+
mock.call().setLevel(logging.WARNING),
324360
]
325361
)
326362

327363
client._update_remote_config_status.assert_called_once_with(
328364
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
329365
)
330366
client._update_effective_config.assert_called_once_with(
331-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
367+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
332368
)
333369
client._build_remote_config_status_response_message.assert_called_once_with(
334370
client._update_remote_config_status()
@@ -356,7 +392,7 @@ def test_warns_if_logging_level_does_not_match_our_map(self, get_logger_mock, ge
356392
client._update_remote_config_status()
357393
)
358394
client._update_effective_config.assert_called_once_with(
359-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
395+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
360396
)
361397
agent.send.assert_called_once_with(payload=mock.ANY)
362398
client._build_full_state_message.assert_not_called()
@@ -382,7 +418,7 @@ def test_sets_matching_sampling_rate(self, get_tracer_provider_mock, get_config_
382418
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
383419
)
384420
client._update_effective_config.assert_called_once_with(
385-
{"elastic": {"logging_level": "info", "sampling_rate": "0.5"}}
421+
{"elastic": {"logging_level": "warn", "sampling_rate": "0.5"}}
386422
)
387423
client._build_remote_config_status_response_message.assert_called_once_with(
388424
client._update_remote_config_status()
@@ -413,7 +449,7 @@ def test_sets_sampling_rate_to_default_info_without_sampling_rate_entry_in_confi
413449
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
414450
)
415451
client._update_effective_config.assert_called_once_with(
416-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
452+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
417453
)
418454
client._build_remote_config_status_response_message.assert_called_once_with(
419455
client._update_remote_config_status()
@@ -447,7 +483,7 @@ def test_warns_if_sampling_rate_value_is_invalid(self, get_tracer_provider_mock,
447483
error_message="Invalid sampling_rate unexpected",
448484
)
449485
client._update_effective_config.assert_called_once_with(
450-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
486+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
451487
)
452488
client._build_remote_config_status_response_message.assert_called_once_with(
453489
client._update_remote_config_status()
@@ -479,7 +515,7 @@ def test_warns_if_sampler_is_not_what_we_expect(self, get_tracer_provider_mock,
479515
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
480516
)
481517
client._update_effective_config.assert_called_once_with(
482-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
518+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
483519
)
484520
client._build_remote_config_status_response_message.assert_called_once_with(
485521
client._update_remote_config_status()
@@ -508,7 +544,7 @@ def test_ignores_tracer_provider_without_a_sampler(self, get_tracer_provider_moc
508544
remote_config_hash=b"1234", status=opamp_pb2.RemoteConfigStatuses_APPLIED, error_message=""
509545
)
510546
client._update_effective_config.assert_called_once_with(
511-
{"elastic": {"logging_level": "info", "sampling_rate": "1.0"}}
547+
{"elastic": {"logging_level": "warn", "sampling_rate": "1.0"}}
512548
)
513549
client._build_remote_config_status_response_message.assert_called_once_with(
514550
client._update_remote_config_status()

0 commit comments

Comments
 (0)