Skip to content

Commit 1e87217

Browse files
umaannamalaiTimPansinomergify[bot]
authored
Add Agent Control Health Checks (#1294)
* Initial commit. * Add status change logic. * Checkpoint. * Remove unused arg. * Clear configuration leaking from test_configuration.py. * Remove log statement when fleet ID is not found. * Add support testing and support for new status codes. * Update thread names to use NR-Control. * Refactoring. * Address review feedback. * Capture urlparse logic in try except. * Add additional shutdown check in shutdown_agent. * Refactor status code and message usage in superagent * Update regex assertion Co-authored-by: Timothy Pansino <[email protected]> * Add super agent supportability metric. * Add newline. * Rename super agent to agent control. * Fix supportability metric test. * Update agent control config. * Add explicit check for empty delivery location. * Change delivery location to property on health check class. * Initial commit. * Add status change logic. * Checkpoint. * Remove unused arg. * Clear configuration leaking from test_configuration.py. * Remove log statement when fleet ID is not found. * Add support testing and support for new status codes. * Update thread names to use NR-Control. * Refactoring. * Address review feedback. * Capture urlparse logic in try except. * Add additional shutdown check in shutdown_agent. * Refactor status code and message usage in superagent * Update regex assertion Co-authored-by: Timothy Pansino <[email protected]> * Add super agent supportability metric. * Add newline. * Rename super agent to agent control. * Fix supportability metric test. * Use environ_as_bool/ int. * Add max 3 app names unhealthy status code. * Switch order of tests to avoid config collisions. * Reset app name. * Reuse app name list variable. * Fix line spacing. * [Mega-Linter] Apply linters fixes * Initial commit. * Add status change logic. * Checkpoint. * Remove unused arg. * Clear configuration leaking from test_configuration.py. * Remove log statement when fleet ID is not found. * Add support testing and support for new status codes. * Update thread names to use NR-Control. * Refactoring. * Address review feedback. * Capture urlparse logic in try except. * Add additional shutdown check in shutdown_agent. * Refactor status code and message usage in superagent * Update regex assertion Co-authored-by: Timothy Pansino <[email protected]> * Add super agent supportability metric. * Add newline. * Rename super agent to agent control. * Fix supportability metric test. * Update agent control config. * Add explicit check for empty delivery location. * Change delivery location to property on health check class. * Initial commit. * Add status change logic. * Checkpoint. * Remove unused arg. * Clear configuration leaking from test_configuration.py. * Remove log statement when fleet ID is not found. * Add support testing and support for new status codes. * Update thread names to use NR-Control. * Refactoring. * Address review feedback. * Capture urlparse logic in try except. * Add additional shutdown check in shutdown_agent. * Refactor status code and message usage in superagent * Update regex assertion Co-authored-by: Timothy Pansino <[email protected]> * Rename super agent to agent control. * Use environ_as_bool/ int. * Add license header to HTTP client recorder. --------- Co-authored-by: Tim Pansino <[email protected]> Co-authored-by: Timothy Pansino <[email protected]> Co-authored-by: umaannamalai <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent fb171c0 commit 1e87217

File tree

10 files changed

+701
-56
lines changed

10 files changed

+701
-56
lines changed

newrelic/config.py

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import logging
1818
import os
1919
import sys
20+
import threading
21+
import time
2022
import traceback
2123

2224
import newrelic.api.application
@@ -40,12 +42,19 @@
4042
from newrelic.common.log_file import initialize_logging
4143
from newrelic.common.object_names import callable_name, expand_builtin_exception_name
4244
from newrelic.core import trace_cache
45+
from newrelic.core.agent_control_health import (
46+
HealthStatus,
47+
agent_control_health_instance,
48+
agent_control_healthcheck_loop,
49+
)
4350
from newrelic.core.config import (
4451
Settings,
4552
apply_config_setting,
4653
default_host,
4754
fetch_config_setting,
4855
)
56+
from newrelic.core.agent_control_health import HealthStatus, agent_control_health_instance, agent_control_healthcheck_loop
57+
4958

5059
__all__ = ["initialize", "filter_app_factory"]
5160

@@ -100,6 +109,7 @@ def _map_aws_account_id(s):
100109
# all the settings have been read.
101110

102111
_cache_object = []
112+
agent_control_health = agent_control_health_instance()
103113

104114

105115
def _reset_config_parser():
@@ -592,12 +602,16 @@ def _process_app_name_setting():
592602
# primary application name and link it with the other applications.
593603
# When activating the application the linked names will be sent
594604
# along to the core application where the association will be
595-
# created if the do not exist.
605+
# created if it does not exist.
606+
607+
app_name_list = _settings.app_name.split(";")
608+
name = app_name_list[0].strip() or "Python Application"
596609

597-
name = _settings.app_name.split(";")[0].strip() or "Python Application"
610+
if len(app_name_list) > 3:
611+
agent_control_health.set_health_status(HealthStatus.MAX_APP_NAME.value)
598612

599613
linked = []
600-
for altname in _settings.app_name.split(";")[1:]:
614+
for altname in app_name_list[1:]:
601615
altname = altname.strip()
602616
if altname:
603617
linked.append(altname)
@@ -1033,21 +1047,25 @@ def _load_configuration(
10331047

10341048
# Now read in the configuration file. Cache the config file
10351049
# name in internal settings object as indication of succeeding.
1036-
if config_file.endswith(".toml"):
1037-
try:
1038-
import tomllib
1039-
except ImportError:
1040-
raise newrelic.api.exceptions.ConfigurationError(
1041-
"TOML configuration file can only be used if tomllib is available (Python 3.11+)."
1042-
)
1043-
with open(config_file, "rb") as f:
1044-
content = tomllib.load(f)
1045-
newrelic_section = content.get("tool", {}).get("newrelic")
1046-
if not newrelic_section:
1047-
raise newrelic.api.exceptions.ConfigurationError("New Relic configuration not found in TOML file.")
1048-
_config_object.read_dict(_toml_config_to_configparser_dict(newrelic_section))
1049-
elif not _config_object.read([config_file]):
1050-
raise newrelic.api.exceptions.ConfigurationError(f"Unable to open configuration file {config_file}.")
1050+
try:
1051+
if config_file.endswith(".toml"):
1052+
try:
1053+
import tomllib
1054+
except ImportError:
1055+
raise newrelic.api.exceptions.ConfigurationError(
1056+
"TOML configuration file can only be used if tomllib is available (Python 3.11+)."
1057+
)
1058+
with open(config_file, "rb") as f:
1059+
content = tomllib.load(f)
1060+
newrelic_section = content.get("tool", {}).get("newrelic")
1061+
if not newrelic_section:
1062+
raise newrelic.api.exceptions.ConfigurationError("New Relic configuration not found in TOML file.")
1063+
_config_object.read_dict(_toml_config_to_configparser_dict(newrelic_section))
1064+
elif not _config_object.read([config_file]):
1065+
raise newrelic.api.exceptions.ConfigurationError(f"Unable to open configuration file {config_file}.")
1066+
except Exception:
1067+
agent_control_health.set_health_status(HealthStatus.INVALID_CONFIG.value)
1068+
raise
10511069

10521070
_settings.config_file = config_file
10531071

@@ -4826,13 +4844,29 @@ def _setup_agent_console():
48264844
newrelic.core.agent.Agent.run_on_startup(_startup_agent_console)
48274845

48284846

4847+
agent_control_health_thread = threading.Thread(
4848+
name="Agent-Control-Health-Main-Thread", target=agent_control_healthcheck_loop
4849+
)
4850+
agent_control_health_thread.daemon = True
4851+
4852+
4853+
def _setup_agent_control_health():
4854+
if agent_control_health_thread.is_alive():
4855+
return
4856+
4857+
if agent_control_health.health_check_enabled:
4858+
agent_control_health_thread.start()
4859+
4860+
48294861
def initialize(
48304862
config_file=None,
48314863
environment=None,
48324864
ignore_errors=None,
48334865
log_file=None,
48344866
log_level=None,
48354867
):
4868+
agent_control_health.start_time_unix_nano = time.time_ns()
4869+
48364870
if config_file is None:
48374871
config_file = os.environ.get("NEW_RELIC_CONFIG_FILE", None)
48384872

@@ -4844,6 +4878,12 @@ def initialize(
48444878

48454879
_load_configuration(config_file, environment, ignore_errors, log_file, log_level)
48464880

4881+
_setup_agent_control_health()
4882+
4883+
if _settings.monitor_mode:
4884+
if not _settings.license_key:
4885+
agent_control_health.set_health_status(HealthStatus.MISSING_LICENSE.value)
4886+
48474887
if _settings.monitor_mode or _settings.developer_mode:
48484888
_settings.enabled = True
48494889
_setup_instrumentation()
@@ -4852,6 +4892,7 @@ def initialize(
48524892
_setup_agent_console()
48534893
else:
48544894
_settings.enabled = False
4895+
agent_control_health.set_health_status(HealthStatus.AGENT_DISABLED.value)
48554896

48564897

48574898
def filter_app_factory(app, global_conf, config_file, environment=None):

newrelic/console.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from newrelic.common.object_wrapper import ObjectProxy
3737
from newrelic.core.agent import agent_instance
3838
from newrelic.core.config import flatten_settings, global_settings
39+
from newrelic.core.agent_control_health import HealthStatus, agent_control_health_instance
3940
from newrelic.core.trace_cache import trace_cache
4041

4142

@@ -512,6 +513,8 @@ def __init__(self, config_file, stdin=None, stdout=None, log=None):
512513
self.__log_object = log
513514

514515
if not self.__config_object.read([config_file]):
516+
agent_control_instance = agent_control_health_instance()
517+
agent_control_instance.set_health_status(HealthStatus.INVALID_CONFIG.value)
515518
raise RuntimeError(f"Unable to open configuration file {config_file}.")
516519

517520
listener_socket = self.__config_object.get("newrelic", "console.listener_socket") % {"pid": "*"}

newrelic/core/agent.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from newrelic.samplers.cpu_usage import cpu_usage_data_source
3636
from newrelic.samplers.gc_data import garbage_collector_data_source
3737
from newrelic.samplers.memory_usage import memory_usage_data_source
38+
from newrelic.core.agent_control_health import HealthStatus, agent_control_health_instance
3839

3940
_logger = logging.getLogger(__name__)
4041

@@ -217,6 +218,9 @@ def __init__(self, config):
217218
self._scheduler = sched.scheduler(self._harvest_timer, self._harvest_shutdown.wait)
218219

219220
self._process_shutdown = False
221+
self._agent_control = agent_control_health_instance()
222+
223+
self._agent_control = agent_control_health_instance()
220224

221225
self._lock = threading.Lock()
222226

@@ -734,6 +738,11 @@ def shutdown_agent(self, timeout=None):
734738
if self._harvest_shutdown_is_set():
735739
return
736740

741+
self._agent_control.set_health_status(HealthStatus.AGENT_SHUTDOWN.value)
742+
743+
if self._agent_control.health_check_enabled:
744+
self._agent_control.write_to_health_file()
745+
737746
if timeout is None:
738747
timeout = self._config.shutdown_timeout
739748

0 commit comments

Comments
 (0)