-
Notifications
You must be signed in to change notification settings - Fork 391
Set a limit on firewall reports when the state of the firewall changes continuously #3574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,21 +110,29 @@ class FirewallState(object): | |
|
|
||
|
|
||
| class EnableFirewall(PeriodicOperation): | ||
| _REPORTING_PERIOD = datetime.timedelta(hours=24) # we set a limit on the number of messages logged within this period | ||
| _REPORTING_PERIOD = datetime.timedelta(hours=24) # We set limits on the number of reports for this period. Limits are reset after the period elapses. | ||
|
|
||
| def __init__(self, wire_server_address): | ||
| super(EnableFirewall, self).__init__(conf.get_enable_firewall_period()) | ||
| self._wire_server_address = wire_server_address | ||
| self._firewall_manager = None # initialized on demand in the _operation method | ||
| self._firewall_state = FirewallState.OK # Initialized to OK to prevent turning on verbose mode on the initial invocation of _operation(). It is properly initialized as soon as we do the first check of the firewall. | ||
| self._report_count = 0 | ||
| self._next_report_time = datetime.datetime.now(UTC) | ||
| # | ||
| # We use the below members to limit the number of reports we emit during a reporting period (a report consist of all the telemetry events/logging emitted during 1 execution of the _operation() method). | ||
| # When the firewall state is OK we emit 1 single report per period, otherwise we emit up to 3 reports per period. The self._report_count member is used to set this limit. | ||
| # However, if the state of the firewall changes after the limit has been exceeded, we want to know immediately, not until the full reporting period has elapsed. So, when the state changes, we reset self._report_count. | ||
| # Now, if the state of the firewall changes multiple times during the same reporting period, the above strategy can produce to many reports, so we set an absolute limit per period. The self_period_report_count member | ||
| # is used to set that limit and is reset each reporting period. | ||
| # | ||
| self._reporting_period_end = datetime.datetime.now(UTC) + EnableFirewall._REPORTING_PERIOD | ||
| self._report_count = 0 # we can reset this count more than once per period depending on the state of the firewall | ||
| self._period_report_count = 0 # this count is reset only once per period | ||
| self._should_report = True | ||
|
|
||
| def _operation(self): | ||
| try: | ||
| # | ||
| # Each check of the firewall can produce a report, but we limit the number of reports we emit within a fix period of time. | ||
| # We check the reporting limits and set self._should_report at the beginning of the method. Note that self._report_count and self._should_report can be reset to their initial values if the state of the firewall changes. | ||
| # | ||
| self._update_reporting_state() | ||
|
|
||
|
|
@@ -140,49 +148,52 @@ def _operation(self): | |
| try: | ||
| if self._firewall_manager.check(): | ||
| self._update_firewall_state(FirewallState.OK) | ||
| self._report(event.info, WALAEventOperation.Firewall, "The firewall is configured correctly. Current state:\n{0}", self._firewall_manager.get_state()) | ||
| self._emit_event(event.info, WALAEventOperation.Firewall, "The firewall is configured correctly. Current state:\n{0}", self._firewall_manager.get_state()) | ||
| return | ||
| self._update_firewall_state(FirewallState.NotSet) | ||
| self._report(event.warn, WALAEventOperation.Firewall, "The firewall has not been setup. Will set it up.") | ||
| self._emit_event(event.warn, WALAEventOperation.Firewall, "The firewall has not been setup. Will set it up.") | ||
| except IptablesInconsistencyError as e: | ||
| self._update_firewall_state(FirewallState.Inconsistent) | ||
| self._report(event.warn, WALAEventOperation.FirewallInconsistency, "The results returned by iptables are inconsistent, will not change the current state of the firewall: {0}", ustr(e)) | ||
| self._emit_event(event.warn, WALAEventOperation.FirewallInconsistency, "The results returned by iptables are inconsistent, will not change the current state of the firewall: {0}", ustr(e)) | ||
| return | ||
| except FirewallStateError as e: | ||
| self._update_firewall_state(FirewallState.Invalid) | ||
| self._report(event.warn, WALAEventOperation.ResetFirewall, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state()) | ||
| self._emit_event(event.warn, WALAEventOperation.ResetFirewall, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state()) | ||
| self._firewall_manager.remove() | ||
|
|
||
| self._firewall_manager.setup() | ||
| self._update_firewall_state(FirewallState.OK) | ||
| self._report(event.info, WALAEventOperation.Firewall, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state()) | ||
| self._emit_event(event.info, WALAEventOperation.Firewall, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state()) | ||
| except Exception as e: | ||
| self._update_firewall_state(FirewallState.Unknown) | ||
| self._report(event.warn, WALAEventOperation.Firewall, "An error occurred while verifying the state of the firewall: {0}. Current state:\n{1}", textutil.format_exception(e), self._firewall_manager.get_state()) | ||
| self._emit_event(event.warn, WALAEventOperation.Firewall, "An error occurred while verifying the state of the firewall: {0}. Current state:\n{1}", textutil.format_exception(e), self._firewall_manager.get_state()) | ||
|
|
||
| def _report(self, report_function, operation, message, *args): | ||
| def _emit_event(self, event_function, operation, message, *args): | ||
| if self._should_report: | ||
| report_function(operation, message, *args) | ||
| event_function(operation, message, *args) | ||
|
|
||
| def _update_reporting_state(self): | ||
| if self._next_report_time > datetime.datetime.now(UTC): | ||
| self._should_report = False | ||
| return | ||
| if datetime.datetime.now(UTC) >= self._reporting_period_end: # Reset the all the report counts every time a reporting period has elapsed | ||
| self._report_count = 0 | ||
| self._period_report_count = 0 | ||
| self._reporting_period_end = datetime.datetime.now(UTC) + EnableFirewall._REPORTING_PERIOD | ||
|
|
||
| # if the state of the firewall does not change within a reporting period, we report only once when the state is OK and max 3 times when it is not. | ||
| # if the state changes, we report at most 8 times total. | ||
| self._report_count += 1 | ||
| self._period_report_count += 1 | ||
| max_reports = 1 if self._firewall_state == FirewallState.OK else 3 | ||
| if self._report_count <= max_reports: | ||
| self._should_report = True | ||
| else: | ||
| self._report_count = 0 | ||
| self._next_report_time = datetime.datetime.now(UTC) + self._REPORTING_PERIOD | ||
| self._should_report = False | ||
| self._should_report = self._report_count <= max_reports | ||
|
|
||
| def _update_firewall_state(self, firewall_state): | ||
| if (self._firewall_state == FirewallState.OK) != (firewall_state == FirewallState.OK): # reset the reporting limits if the firewall state goes from OK to not-OK or vice versa | ||
| self._report_count = 0 | ||
| self._next_report_time = datetime.datetime.now(UTC) | ||
| self._should_report = True | ||
| if (self._firewall_state == FirewallState.OK) != (firewall_state == FirewallState.OK): | ||
| # Reset the self._report_count and self._should_report to their initial values if the firewall state goes from OK to not-OK or vice versa. We do this because we want to know about state changes | ||
| # immediately, even if the report count for the current period has already been exceeded. Otherwise, we would need to wait for the entire reporting period to report those changes. | ||
| # Now, if the state of the firewall changes too often, this strategy may emit too many events, so we use self._period_report_count, to set an absolute limit that is only reset when the full | ||
| # period elapses. | ||
| if self._period_report_count <= 8: | ||
| self._report_count = 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like if we enter this condition, we don't increment self._period_count and self._report_count before we call self._report() on lines 147, 150, 154, 162
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, the limit is per-report (1 iteration == 1 report), rather than per message, as it was 2 agent versions ago. we check the limits at the top of the iteration (line 130) and then update them later, the updated values will take effect on the next iteration i'll add this as a comment in the code |
||
| self._should_report = True | ||
| self._firewall_state = firewall_state | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |
| from azurelinuxagent.ga.env import MonitorDhcpClientRestart, EnableFirewall | ||
|
|
||
| from tests.lib.event import get_events_from_mock | ||
| from tests.lib.tools import AgentTestCase, patch, DEFAULT | ||
| from tests.lib.tools import AgentTestCase, patch, DEFAULT, Mock | ||
| from tests.lib.mock_firewall_command import MockIpTables | ||
|
|
||
|
|
||
|
|
@@ -220,8 +220,8 @@ def test_it_should_not_modify_the_firewall_rules_when_the_check_command_is_incon | |
|
|
||
| def test_it_should_log_the_state_of_the_firewall_once_per_reporting_period(self): | ||
| with MockIpTables() as mock_iptables: | ||
| EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now EnableFirewall.init() references _REPORTING_PERIOD, so the latter needs to be initialized before instantiating EnableFirewall. |
||
| enable_firewall = EnableFirewall('168.63.129.16') | ||
| enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) | ||
|
|
||
| with patch.multiple("azurelinuxagent.ga.firewall_manager.event", info=DEFAULT, warn=DEFAULT, error=DEFAULT) as patches: | ||
| info = patches["info"] | ||
|
|
@@ -262,8 +262,8 @@ def test_it_should_log_errors_thrice_per_reporting_period(self): | |
| with MockIpTables(check_matches_list=False) as mock_iptables: | ||
| mock_iptables.set_return_values("-C", accept_dns=0, accept=0, drop=1, legacy=0) | ||
|
|
||
| EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) | ||
| enable_firewall = EnableFirewall('168.63.129.16') | ||
| enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) | ||
|
|
||
| firewall_manager_in_verbose_mode = [] | ||
|
|
||
|
|
@@ -324,3 +324,46 @@ def test_it_should_log_errors_thrice_per_reporting_period(self): | |
|
|
||
| self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list)) | ||
|
|
||
| def test_it_should_set_a_limit_on_the_number_of_reports_when_the_firewall_state_changes(self): | ||
| EnableFirewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) | ||
| enable_firewall = EnableFirewall('168.63.129.16') | ||
|
|
||
| call_count = [0] | ||
|
|
||
| def mock_check(*_, **__): | ||
| call_count[0] += 1 | ||
| return call_count[0] % 2 == 0 # flip between False (the firewall has not been set up) and True (the firewall is OK) and on each call | ||
|
|
||
| enable_firewall._firewall_manager = Mock() | ||
| enable_firewall._firewall_manager.check = Mock(side_effect=mock_check) | ||
| enable_firewall._firewall_manager.get_state = Mock(return_value='*** mock state***') | ||
|
|
||
| # E0601: Using variable 'add_event_patch' before assignment (used-before-assignment) | ||
| get_firewall_events = lambda: [(kwargs["is_success"], kwargs['message']) for _, kwargs in add_event_patch.call_args_list if 'Firewall' in kwargs["op"]] # pylint: disable=E0601 | ||
|
|
||
| # We expect a maximum of 8 reports per reporting period. | ||
| # The mock for check() flips between False and True; the former produces a report with a WARNING (is_success == False) and an INFO (is_success == True); the latter produces 1 INFO | ||
| expected = 4 * [ | ||
| (False, '[WARNING] The firewall has not been setup. Will set it up.'), (True, 'The firewall was setup successfully:\n*** mock state***'), | ||
| (True, 'The firewall is configured correctly. Current state:\n*** mock state***') | ||
| ] | ||
|
|
||
| # First reporting period | ||
| with patch("azurelinuxagent.common.event.add_event") as add_event_patch: | ||
| for _ in range(0, 12): | ||
| enable_firewall._operation() | ||
|
|
||
| self.assertEqual(12, call_count[0], "Expected 12 calls to FirewallManager.check() during the first reporting period") | ||
| firewall_events = get_firewall_events() | ||
| self.assertEqual(expected, firewall_events, "First reporting period: Expected 1 WARNING (is_success == False) and 1 INFOs (is_success == True), then 1 more INFO, repeated 4 times") | ||
|
|
||
| time.sleep(0.5) | ||
|
|
||
| # Second reporting period | ||
| with patch("azurelinuxagent.common.event.add_event") as add_event_patch: | ||
| for _ in range(0, 10): | ||
| enable_firewall._operation() | ||
|
|
||
| self.assertEqual(22, call_count[0], "Expected a total of 22 calls to FirewallManager.check() after the second reporting period") | ||
| firewall_events = get_firewall_events() | ||
| self.assertEqual(expected, firewall_events, "Second reporting period: Expected 1 WARNING (is_success == False) and 1 INFOs (is_success == True), then 1 more INFO, repeated 4 times") | ||
Uh oh!
There was an error while loading. Please reload this page.