Skip to content

Commit dbd5606

Browse files
committed
Allow to define rules for media downloads
1 parent 519527c commit dbd5606

File tree

2 files changed

+117
-35
lines changed

2 files changed

+117
-35
lines changed

config.sample.yml

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,28 @@ media:
4242
# Omit or keep empty to disable limit
4343
max_size: 10M
4444

45-
# Optionally specify configuration per media type
46-
types:
47-
# Configuration for media files of type "photo"
48-
photo:
49-
# Choose whether to download photos
50-
enabled: true
51-
52-
# Set max size for photos
45+
# Configure rules to define whether to download media for specific media types, chat types and/or contacts
46+
# If there is at least one rule, downloading will be disabled by default until a matching configuration is found
47+
# The first matching rule will be used
48+
# You might also specify "download_path", "max_size" and "file_pattern" in a rule to change them for a specific rule.
49+
rules:
50+
51+
# This rule will enable downloads of photos up to 10 MB for specific contacts
52+
- media_type: photo
53+
chat_type: contact
54+
chats:
55+
- <id 1>
56+
- <id 2>
57+
- ...
5358
max_size: 10M
5459

55-
# Configuration for media files of type "file" (i.e. everything else)
56-
file:
57-
# Choose whether to download files
58-
enabled: true
60+
# This rule will allow any media download regardless of the media type but limit it to 50 MB per media download and put the files in "/downloads/channels"
61+
- chat_type: channel
62+
max_size: 50M
63+
download_path: /downloads/channels
5964

60-
# Set max size for files
61-
max_size: 10M
65+
# This rule will accept any media download with a maximum size of 5 MB
66+
- max_size: 5M
6267

6368
outputs:
6469
# Send messages to Elasticsearch

telegram2elastic.py

Lines changed: 98 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,86 @@ class DownloadedMedia:
123123
filename: str
124124

125125

126+
class MediaConfigurationRule:
127+
def __init__(self, global_config: dict, config_data: dict):
128+
self.global_config = global_config
129+
self.config_data = config_data
130+
131+
def matches_message(self, message, chat):
132+
if isinstance(message.media, types.MessageMediaPhoto):
133+
message_media_type = "photo"
134+
else:
135+
message_media_type = "file"
136+
137+
if not self.matches_media_type(message_media_type):
138+
return False
139+
140+
message_chat_type = ChatType.get_from_chat(chat)
141+
if message_chat_type is None or not self.matches_chat_type(message_chat_type.value):
142+
return False
143+
144+
message_chat_id = message.chat_id
145+
if message_chat_id is None or not self.matches_chat_id(message_chat_id):
146+
return False
147+
148+
return True
149+
150+
def matches_media_type(self, media_type: str):
151+
value = self.config_data.get("media_type")
152+
return value is None or value == media_type
153+
154+
def matches_chat_type(self, chat_type: str):
155+
value = self.config_data.get("chat_type")
156+
return value is None or value == chat_type
157+
158+
def matches_chat_id(self, chat_id: int):
159+
chat_ids = self.config_data.get("chats")
160+
if not chat_ids:
161+
return True
162+
163+
return chat_id in chat_ids
164+
165+
def get_download_path(self) -> str | None:
166+
return self.get_with_fallback("download_path")
167+
168+
def get_filepattern(self) -> str:
169+
return self.get_with_fallback("file_pattern", "{date[year]}-{date[month]}-{date[day]}_{date[hour]}-{date[minute]}-{date[second]}_{message[id]}_{file[name]}.{file[ext]}")
170+
171+
def get_max_size(self) -> str:
172+
return self.get_with_fallback("max_size", "")
173+
174+
def get_with_fallback(self, option_name: str, default_value=None):
175+
value = self.config_data.get(option_name)
176+
if value is not None:
177+
return value
178+
179+
# Fallback to global config
180+
value = self.global_config.get(option_name)
181+
if value is not None:
182+
return value
183+
184+
return default_value
185+
186+
187+
class MediaConfiguration:
188+
def __init__(self, config: dict):
189+
self.config = config
190+
191+
self.rules = []
192+
193+
for rule in config.get("rules", []):
194+
self.rules.append(MediaConfigurationRule(self.config, rule))
195+
196+
# Add default match-all rule if there are no rules configured
197+
if not self.rules:
198+
self.rules.append(MediaConfigurationRule(self.config, {}))
199+
200+
def get_rule(self, message, chat):
201+
for rule in self.rules:
202+
if rule.matches_message(message, chat):
203+
return rule
204+
205+
126206
class OutputWriter(ABC):
127207
def __init__(self, config: dict):
128208
self.config: dict = config
@@ -206,7 +286,7 @@ class OutputHandler:
206286
def __init__(self, media_config: dict):
207287
self.outputs = []
208288
self.imports = {}
209-
self.media_config = media_config
289+
self.media_config = MediaConfiguration(media_config)
210290

211291
def add(self, config: dict):
212292
output_type = config.get("type")
@@ -231,7 +311,7 @@ async def write_message(self, message, is_chat_enabled: callable):
231311
logging.debug("Skipping message {} from chat '{}' as chat type {} is not enabled".format(message.id, chat_display_name, chat_type.value if chat_type else None))
232312
return
233313

234-
if message.file and self.media_config.get("download_path"):
314+
if message.file:
235315
downloaded_media = await self.download_media(message)
236316
else:
237317
downloaded_media = None
@@ -240,16 +320,25 @@ async def write_message(self, message, is_chat_enabled: callable):
240320
await output.write_message(message, downloaded_media)
241321

242322
async def download_media(self, message):
243-
download_path = Path(self.media_config.get("download_path")).expanduser()
244-
download_path.mkdir(parents=True, exist_ok=True)
245-
246323
if message.file.name is None:
247324
original_filename = f"msg{message.chat_id}-{message.id}"
248325
else:
249326
original_filename = Path(message.file.name).stem
250327

251328
full_original_filename = f"{original_filename}{message.file.ext}"
252329

330+
config_rule = self.media_config.get_rule(message, await message.get_chat())
331+
if config_rule is None:
332+
logging.debug(f"Skipping media download for '{full_original_filename}' as no config rule matches")
333+
return
334+
335+
download_path = config_rule.get_download_path()
336+
if download_path is None:
337+
logging.debug(f"Skipping media download for '{full_original_filename}' as no download path has been configured")
338+
return
339+
340+
download_path = Path(download_path).expanduser()
341+
253342
filename_pattern_map = {
254343
"date": {
255344
"year": message.date.year,
@@ -269,32 +358,20 @@ async def download_media(self, message):
269358
}
270359
}
271360

272-
file_pattern = self.media_config.get("file_pattern", "{date[year]}-{date[month]}-{date[day]}_{date[hour]}-{date[minute]}-{date[second]}_{message[id]}_{file[name]}.{file[ext]}")
273-
274-
filename = file_pattern.format_map(filename_pattern_map)
361+
filename = config_rule.get_filepattern().format_map(filename_pattern_map)
275362
filepath = download_path.joinpath(filename)
276363

277-
if isinstance(message.media, types.MessageMediaPhoto):
278-
media_type = "photo"
279-
else:
280-
media_type = "file"
281-
282-
media_type_config = self.media_config.get("types", {}).get(media_type, {})
283-
284-
if not media_type_config.get("enabled", True):
285-
logging.debug(f"Skipping media download for '{full_original_filename}' with type {media_type} as it is disabled")
286-
return
287-
288364
file_size_string = FileSize.bytes_to_human_readable(message.file.size)
289365

290-
max_size = media_type_config.get("max_size", self.media_config.get("max_size", ""))
366+
max_size = config_rule.get_max_size()
291367
if max_size != "":
292368
max_size_bytes = FileSize.human_readable_to_bytes(max_size)
293369
if message.file.size > max_size_bytes:
294370
logging.debug(f"Skipping media download for '{full_original_filename}' as it exceeds the configured max size ({file_size_string} > {max_size})")
295371
return
296372

297-
logging.debug(f"Downloading media file '{full_original_filename}' with type '{media_type}' to {filepath} ({file_size_string})")
373+
logging.debug(f"Downloading media file '{full_original_filename}' to {filepath} ({file_size_string})")
374+
filepath.parent.mkdir(parents=True, exist_ok=True)
298375
await message.download_media(file=filepath)
299376

300377
return DownloadedMedia(filepath=filepath, filename=filename)

0 commit comments

Comments
 (0)