Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 36 additions & 25 deletions azurelinuxagent/ga/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,21 +110,29 @@ class FirewallState(object):


class EnableFirewall(PeriodicOperation):
_REPORTING_PERIOD = datetime.timedelta(hours=24) # we set a limit on the number of messages logged within this period
_REPORTING_PERIOD = datetime.timedelta(hours=24) # We set limits on the number of reports for this period. Limits are reset after the period elapses.

def __init__(self, wire_server_address):
super(EnableFirewall, self).__init__(conf.get_enable_firewall_period())
self._wire_server_address = wire_server_address
self._firewall_manager = None # initialized on demand in the _operation method
self._firewall_state = FirewallState.OK # Initialized to OK to prevent turning on verbose mode on the initial invocation of _operation(). It is properly initialized as soon as we do the first check of the firewall.
self._report_count = 0
self._next_report_time = datetime.datetime.now(UTC)
#
# We use the below members to limit the number of reports we emit during a reporting period (a report consist of all the telemetry events/logging emitted during 1 execution of the _operation() method).
# When the firewall state is OK we emit 1 single report per period, otherwise we emit up to 3 reports per period. The self._report_count member is used to set this limit.
# However, if the state of the firewall changes after the limit has been exceeded, we want to know immediately, not until the full reporting period has elapsed. So, when the state changes, we reset self._report_count.
# Now, if the state of the firewall changes multiple times during the same reporting period, the above strategy can produce to many reports, so we set an absolute limit per period. The self_period_report_count member
# is used to set that limit and is reset each reporting period.
#
self._reporting_period_end = datetime.datetime.now(UTC) + EnableFirewall._REPORTING_PERIOD
self._report_count = 0 # we can reset this count more than once per period depending on the state of the firewall
self._period_report_count = 0 # this count is reset only once per period
self._should_report = True

def _operation(self):
try:
#
# Each check of the firewall can produce a report, but we limit the number of reports we emit within a fix period of time.
# We check the reporting limits and set self._should_report at the beginning of the method. Note that self._report_count and self._should_report can be reset to their initial values if the state of the firewall changes.
#
self._update_reporting_state()

Expand All @@ -140,49 +148,52 @@ def _operation(self):
try:
if self._firewall_manager.check():
self._update_firewall_state(FirewallState.OK)
self._report(event.info, WALAEventOperation.Firewall, "The firewall is configured correctly. Current state:\n{0}", self._firewall_manager.get_state())
self._emit_event(event.info, WALAEventOperation.Firewall, "The firewall is configured correctly. Current state:\n{0}", self._firewall_manager.get_state())
return
self._update_firewall_state(FirewallState.NotSet)
self._report(event.warn, WALAEventOperation.Firewall, "The firewall has not been setup. Will set it up.")
self._emit_event(event.warn, WALAEventOperation.Firewall, "The firewall has not been setup. Will set it up.")
except IptablesInconsistencyError as e:
self._update_firewall_state(FirewallState.Inconsistent)
self._report(event.warn, WALAEventOperation.FirewallInconsistency, "The results returned by iptables are inconsistent, will not change the current state of the firewall: {0}", ustr(e))
self._emit_event(event.warn, WALAEventOperation.FirewallInconsistency, "The results returned by iptables are inconsistent, will not change the current state of the firewall: {0}", ustr(e))
return
except FirewallStateError as e:
self._update_firewall_state(FirewallState.Invalid)
self._report(event.warn, WALAEventOperation.ResetFirewall, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state())
self._emit_event(event.warn, WALAEventOperation.ResetFirewall, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state())
self._firewall_manager.remove()

self._firewall_manager.setup()
self._update_firewall_state(FirewallState.OK)
self._report(event.info, WALAEventOperation.Firewall, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state())
self._emit_event(event.info, WALAEventOperation.Firewall, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state())
except Exception as e:
self._update_firewall_state(FirewallState.Unknown)
self._report(event.warn, WALAEventOperation.Firewall, "An error occurred while verifying the state of the firewall: {0}. Current state:\n{1}", textutil.format_exception(e), self._firewall_manager.get_state())
self._emit_event(event.warn, WALAEventOperation.Firewall, "An error occurred while verifying the state of the firewall: {0}. Current state:\n{1}", textutil.format_exception(e), self._firewall_manager.get_state())

def _report(self, report_function, operation, message, *args):
def _emit_event(self, event_function, operation, message, *args):
if self._should_report:
report_function(operation, message, *args)
event_function(operation, message, *args)

def _update_reporting_state(self):
if self._next_report_time > datetime.datetime.now(UTC):
self._should_report = False
return
if datetime.datetime.now(UTC) >= self._reporting_period_end: # Reset the all the report counts every time a reporting period has elapsed
self._report_count = 0
self._period_report_count = 0
self._reporting_period_end = datetime.datetime.now(UTC) + EnableFirewall._REPORTING_PERIOD

# if the state of the firewall does not change within a reporting period, we report only once when the state is OK and max 3 times when it is not.
# if the state changes, we report at most 8 times total.
self._report_count += 1
self._period_report_count += 1
max_reports = 1 if self._firewall_state == FirewallState.OK else 3
if self._report_count <= max_reports:
self._should_report = True
else:
self._report_count = 0
self._next_report_time = datetime.datetime.now(UTC) + self._REPORTING_PERIOD
self._should_report = False
self._should_report = self._report_count <= max_reports

def _update_firewall_state(self, firewall_state):
if (self._firewall_state == FirewallState.OK) != (firewall_state == FirewallState.OK): # reset the reporting limits if the firewall state goes from OK to not-OK or vice versa
self._report_count = 0
self._next_report_time = datetime.datetime.now(UTC)
self._should_report = True
if (self._firewall_state == FirewallState.OK) != (firewall_state == FirewallState.OK):
# Reset the self._report_count and self._should_report to their initial values if the firewall state goes from OK to not-OK or vice versa. We do this because we want to know about state changes
# immediately, even if the report count for the current period has already been exceeded. Otherwise, we would need to wait for the entire reporting period to report those changes.
# Now, if the state of the firewall changes too often, this strategy may emit too many events, so we use self._period_report_count, to set an absolute limit that is only reset when the full
# period elapses.
if self._period_report_count <= 8:
self._report_count = 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like if we enter this condition, we don't increment self._period_count and self._report_count before we call self._report() on lines 147, 150, 154, 162

Copy link
Member Author

@narrieta narrieta Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the limit is per-report (1 iteration == 1 report), rather than per message, as it was 2 agent versions ago.

we check the limits at the top of the iteration (line 130) and then update them later, the updated values will take effect on the next iteration

i'll add this as a comment in the code

self._should_report = True
self._firewall_state = firewall_state


Expand Down
49 changes: 46 additions & 3 deletions tests/ga/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from azurelinuxagent.ga.env import MonitorDhcpClientRestart, EnableFirewall

from tests.lib.event import get_events_from_mock
from tests.lib.tools import AgentTestCase, patch, DEFAULT
from tests.lib.tools import AgentTestCase, patch, DEFAULT, Mock
from tests.lib.mock_firewall_command import MockIpTables


Expand Down Expand Up @@ -220,8 +220,8 @@ def test_it_should_not_modify_the_firewall_rules_when_the_check_command_is_incon

def test_it_should_log_the_state_of_the_firewall_once_per_reporting_period(self):
with MockIpTables() as mock_iptables:
EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now EnableFirewall.init() references _REPORTING_PERIOD, so the latter needs to be initialized before instantiating EnableFirewall.

enable_firewall = EnableFirewall('168.63.129.16')
enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500)

with patch.multiple("azurelinuxagent.ga.firewall_manager.event", info=DEFAULT, warn=DEFAULT, error=DEFAULT) as patches:
info = patches["info"]
Expand Down Expand Up @@ -262,8 +262,8 @@ def test_it_should_log_errors_thrice_per_reporting_period(self):
with MockIpTables(check_matches_list=False) as mock_iptables:
mock_iptables.set_return_values("-C", accept_dns=0, accept=0, drop=1, legacy=0)

EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500)
enable_firewall = EnableFirewall('168.63.129.16')
enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500)

firewall_manager_in_verbose_mode = []

Expand Down Expand Up @@ -324,3 +324,46 @@ def test_it_should_log_errors_thrice_per_reporting_period(self):

self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list))

def test_it_should_set_a_limit_on_the_number_of_reports_when_the_firewall_state_changes(self):
EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500)
enable_firewall = EnableFirewall('168.63.129.16')

call_count = [0]

def mock_check(*_, **__):
call_count[0] += 1
return call_count[0] % 2 == 0 # flip between False (the firewall has not been set up) and True (the firewall is OK) and on each call

enable_firewall._firewall_manager = Mock()
enable_firewall._firewall_manager.check = Mock(side_effect=mock_check)
enable_firewall._firewall_manager.get_state = Mock(return_value='*** mock state***')

# E0601: Using variable 'add_event_patch' before assignment (used-before-assignment)
get_firewall_events = lambda: [(kwargs["is_success"], kwargs['message']) for _, kwargs in add_event_patch.call_args_list if 'Firewall' in kwargs["op"]] # pylint: disable=E0601

# We expect a maximum of 8 reports per reporting period.
# The mock for check() flips between False and True; the former produces a report with a WARNING (is_success == False) and an INFO (is_success == True); the latter produces 1 INFO
expected = 4 * [
(False, '[WARNING] The firewall has not been setup. Will set it up.'), (True, 'The firewall was setup successfully:\n*** mock state***'),
(True, 'The firewall is configured correctly. Current state:\n*** mock state***')
]

# First reporting period
with patch("azurelinuxagent.common.event.add_event") as add_event_patch:
for _ in range(0, 12):
enable_firewall._operation()

self.assertEqual(12, call_count[0], "Expected 12 calls to FirewallManager.check() during the first reporting period")
firewall_events = get_firewall_events()
self.assertEqual(expected, firewall_events, "First reporting period: Expected 1 WARNING (is_success == False) and 1 INFOs (is_success == True), then 1 more INFO, repeated 4 times")

time.sleep(0.5)

# Second reporting period
with patch("azurelinuxagent.common.event.add_event") as add_event_patch:
for _ in range(0, 10):
enable_firewall._operation()

self.assertEqual(22, call_count[0], "Expected a total of 22 calls to FirewallManager.check() after the second reporting period")
firewall_events = get_firewall_events()
self.assertEqual(expected, firewall_events, "Second reporting period: Expected 1 WARNING (is_success == False) and 1 INFOs (is_success == True), then 1 more INFO, repeated 4 times")