diff --git a/CHANGELOG.md b/CHANGELOG.md index dac3ac20e1..e994ad8882 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,11 +14,13 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o -------------------------------- ### Configuration +- New parameter `stop_retry_limit` (PR#2598 by Lukas Heindl). ### Core - Drop support for Python 3.8 (fixes #2616, PR#2617 by Sebastian Wagner). - `intelmq.lib.splitreports`: Handle bot parameter `chunk_size` values empty string, due to missing parameter typing checks (PR#2604 by Sebastian Wagner). - `intelmq.lib.mixins.sql` Add Support for MySQL (PR#2625 by Karl-Johan Karlsson). +- New parameter `stop_retry_limit` to gracefully handle stopping bots which take longer to shutdown (PR#2598 by Lukas Heindl, fixes #2595). ### Development diff --git a/docs/admin/configuration/intelmq.md b/docs/admin/configuration/intelmq.md index c82bb08fe5..941722f849 100644 --- a/docs/admin/configuration/intelmq.md +++ b/docs/admin/configuration/intelmq.md @@ -237,6 +237,13 @@ configured to do so. (optional, boolean) Verify the TLS certificate of the server. Defaults to true. +**`stop_retry_limit`** + +(optional, integer) amount of retries when checking the status of a botnet after issuing `intelmqctl stop`. Each retry +another *0.1s* longer is waited until a maximum of *5s* to sleep in each iteration is reached. Only applies when +stopping a bot*net* (not individual bots). +Defaults to 5. + #### Individual Bot Configuration !!! info diff --git a/intelmq/bin/intelmqctl.py b/intelmq/bin/intelmqctl.py index 91a0ec9420..35d5e8716c 100644 --- a/intelmq/bin/intelmqctl.py +++ b/intelmq/bin/intelmqctl.py @@ -563,12 +563,42 @@ def botnet_stop(self, group=None): for bot_id in bots: self.bot_stop(bot_id, getstatus=False) - retval = 0 - time.sleep(0.75) - for bot_id in bots: - botnet_status[bot_id] = self.bot_status(bot_id)[1] - if botnet_status[bot_id] not in ['stopped', 'disabled']: - retval = 1 + # shallow copy of the list suffices + # only aliasing the list to ease reading the following + stopped_but_still_running_bots = bots + + retries = getattr(self._parameters, 'stop_retry_limit', 5) + + # parameters (default): + # - sleep 0.75 s with an increment of 0.1 + # - at most 5 tries + # => sleep-ing at most 4.75 seconds + sleep_time = 0.75 # in seconds + for _ in range(retries): + # give the bots some time to terminate + time.sleep(sleep_time) + # update the botnet_status + for bot_id in stopped_but_still_running_bots: + botnet_status[bot_id] = self.bot_status(bot_id)[1] + # only keep bots in the list which are not stopped already + stopped_but_still_running_bots = [ + bot_id + for bot_id in stopped_but_still_running_bots + if botnet_status[bot_id] not in ['stopped', 'disabled'] + ] + + # check if all bots are stopped -> no need to wait further + if not stopped_but_still_running_bots: + break + # the longer the bots need to terminate the longer we wait to check + # again to avoid long-term load on the system + # but stop at 5 seconds to avoid waiting too long until rechecking + # the status + sleep_time = min(5, sleep_time + 0.1) + + retval = 1 + if len(stopped_but_still_running_bots) == 0: + retval = 0 self.log_botnet_message('stopped', group) return retval, botnet_status