Skip to content

Commit 2b1ed24

Browse files
authored
Merge pull request #2381 from sebix/CERTUNLP-develop
rsync collector by CERTUNLP merges #2241 fixes #2241
2 parents 5bb8b78 + 2026b59 commit 2b1ed24

File tree

3 files changed

+94
-20
lines changed

3 files changed

+94
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ CHANGELOG
2828

2929
#### Collectors
3030
- `intelmq.bots.collector.rt`: restrict `python-rt` to be below version 3.0 due to introduced breaking changes.
31+
- `intelmq.bots.collectors.rsync`: Support for optional private key, relative time parsing for the source path, extra rsync parameters and strict host key checking (PR#2241 by Mateo Durante).
3132

3233
#### Parsers
3334
- `intelmq.bots.parsers.shadowserver._config`:

docs/user/bots.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -637,10 +637,13 @@ Requires the rsync executable
637637

638638
**Configuration Parameters**
639639

640-
* **Feed parameters** (see above)
641-
* `file`: Name of downloaded file.
642-
* `file`: The filename to process, combined with `rsync_path`.
643-
* `rsync_path`: Path to file. It can be "/home/username/directory" or "username@remote_host:/home/username/directory"
640+
* `rsync_path`: Rsync server connection and path. It can be "/home/username/directory/" or "username@remote_host:/home/username/directory/". Supports formatting, see below.
641+
* `file`: The filename to process, combined with `rsync_path`. Supports formatting, see below.
642+
* `rsync_file_path_formatting`: Boolean if the file and rsync_path should be formatted by the given format (default: `false`). E.g. if the path is `/path/to_file/{time[%Y]}`, then the resulting path is `/path/to/file/2023` for the year 2023. (Python's `Format Specification Mini-Language <https://docs.python.org/3/library/string.html#formatspec>`_ is used for this.). You may use a `JSON` specifying `time-delta <https://docs.python.org/3/library/datetime.html#datetime.timedelta>`_ parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the path `/path/to/file/{time[%Y-%m-%d]}` will get translated to "/path/to/file/2018-12-31" for the 1st Jan of 2023.
643+
* `extra_params`: A list of extra parameters to pass to rsync. Optional.
644+
* `private_key`: Private key to use for rsync authentication. Optional.
645+
* `private_key_path`: Path to private key to use for rsync authentication. Optional. (Use `private_key` or `private_key_path`, not both.)
646+
* `strict_host_key_checking`: Boolean if the host key should be checked (default: `false`).
644647
* `temp_directory`: The temporary directory for rsync to use for rsync'd files. Optional. Default: `$VAR_STATE_PATH/rsync_collector`. `$VAR_STATE_PATH` is `/var/run/intelmq/` or `/opt/intelmq/var/run/`.
645648

646649

intelmq/bots/collectors/rsync/collector_rsync.py

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,37 @@
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

55
# -*- coding: utf-8 -*-
6-
from os import mkdir, path
6+
import stat
7+
import shlex
8+
from os import mkdir, path, chmod
79
from subprocess import run, PIPE
10+
from datetime import datetime, timedelta
811

912
from intelmq import VAR_STATE_PATH
1013
from intelmq.lib.bot import CollectorBot
1114

1215

16+
class Time(object):
17+
def __init__(self, delta=None):
18+
""" Delta is a datetime.timedelta JSON string, ex: '{days=-1}'. """
19+
self.time = datetime.now()
20+
if not isinstance(delta, bool):
21+
self.time += timedelta(**delta)
22+
23+
def __getitem__(self, timeformat):
24+
return self.time.strftime(timeformat)
25+
26+
1327
class RsyncCollectorBot(CollectorBot):
14-
"Collect data with rsync from any resource rsync supports"
28+
"""Collect data with rsync from any resource rsync supports"""
29+
rsync_path: str = "<path>"
1530
file: str = "<file>"
31+
rsync_file_path_formatting: bool = False
1632
rate_limit: int = 1000
17-
rsync_path: str = "<path>"
33+
extra_params: str = None
34+
private_key: str = None
35+
private_key_path: str = None
36+
strict_host_key_checking: bool = False
1837
temp_directory: str = path.join(VAR_STATE_PATH, "rsync_collector") # TODO: should be pathlib.Path
1938

2039
def init(self):
@@ -23,20 +42,71 @@ def init(self):
2342
except FileExistsError:
2443
pass
2544

45+
if self.extra_params:
46+
self.extra_params = shlex.split(self.extra_params)
47+
else:
48+
self.extra_params = []
49+
50+
if self.private_key and self.private_key_path:
51+
raise Exception("You must define only one of the variables private_key or private_key_path")
52+
53+
if self.private_key:
54+
bot_id = self._Bot__bot_id
55+
self.privkeydir = path.join(VAR_STATE_PATH, f'privkey_dir_{bot_id}')
56+
try:
57+
mkdir(self.privkeydir)
58+
except FileExistsError:
59+
pass
60+
self.private_key_path = path.join(self.privkeydir, 'private.key')
61+
62+
# privkey format parser, support formats with and without headers, breaklines, etc.
63+
if '-----' in self.private_key:
64+
p = self.private_key.split('-----')[2]
65+
else:
66+
p = self.private_key
67+
pb64 = ''.join(p.split())
68+
fullkey = ['-----BEGIN OPENSSH PRIVATE KEY-----']
69+
fullkey += [pb64[i:i + 70] for i in range(0, len(pb64), 70)]
70+
fullkey += ['-----END OPENSSH PRIVATE KEY-----']
71+
fullkey += ['']
72+
final_key = '\n'.join(fullkey)
73+
74+
with open(self.private_key_path, 'w') as f:
75+
f.write(final_key)
76+
chmod(self.private_key_path, stat.S_IRUSR | stat.S_IWUSR)
77+
78+
if self.private_key_path:
79+
self.strict_host_key_checking_str = 'yes' if self.strict_host_key_checking else 'no'
80+
self.extra_params += ['-e', f'ssh -i {self.private_key_path} -o StrictHostKeyChecking={self.strict_host_key_checking_str}']
81+
2682
def process(self):
27-
self.logger.info(f"Updating file {self.file}.")
28-
process = run(["rsync", path.join(self.rsync_path, self.file),
29-
self.temp_directory],
30-
stderr=PIPE)
31-
if process.returncode != 0:
32-
raise ValueError("Rsync on file {!r} failed with exitcode {} and stderr {!r}."
33-
"".format(self.file,
34-
process.returncode,
35-
process.stderr))
36-
report = self.new_report()
37-
with open(path.join(self.temp_directory, self.file)) as rsync_file:
38-
report.add("raw", rsync_file.read())
39-
self.send_message(report)
83+
formatting = self.rsync_file_path_formatting
84+
rsync_file = self.file
85+
rsync_path = self.rsync_path
86+
if formatting:
87+
try:
88+
rsync_file = rsync_file.format(time=Time(formatting))
89+
rsync_path = rsync_path.format(time=Time(formatting))
90+
except TypeError:
91+
self.logger.error(f"Wrongly formatted rsync_file_path_formatting parameter: {formatting}. Should be boolean (False) or a time-delta JSON.")
92+
raise
93+
except KeyError:
94+
self.logger.error(f"Wrongly formatted file '{rsync_file}' or rsync_path '{rsync_path}'. Possible misspell with 'time' on 'formatting' variable.")
95+
raise
96+
rsync_full_path = path.join(rsync_path, rsync_file)
97+
98+
self.logger.info(f"Updating file {rsync_file}.")
99+
cmd_list = ["rsync"] + self.extra_params + [rsync_full_path, self.temp_directory]
100+
self.logger.debug(f"Executing command: {cmd_list}.")
101+
process = run(cmd_list, stderr=PIPE)
102+
if process.returncode == 0:
103+
report = self.new_report()
104+
with open(path.join(self.temp_directory, rsync_file)) as f:
105+
report.add("raw", f.read())
106+
self.send_message(report)
107+
else:
108+
raise ValueError(f"Rsync on file {rsync_file!r} failed with exitcode \
109+
{process.returncode} and stderr {process.stderr!r}.")
40110

41111

42112
BOT = RsyncCollectorBot

0 commit comments

Comments
 (0)