Skip to content

Commit 519527c

Browse files
committed
Allow to configure max size for media downloads
1 parent 06407f3 commit 519527c

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

config.sample.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ media:
3838
# You might also use directory separators to create subdirectories
3939
file_pattern: "{date[year]}-{date[month]}-{date[day]}_{date[hour]}-{date[minute]}-{date[second]}_{message[id]}_{file[name]}.{file[ext]}"
4040

41+
# Limit the maximum size for media downloads
42+
# Omit or keep empty to disable limit
43+
max_size: 10M
44+
45+
# Optionally specify configuration per media type
46+
types:
47+
# Configuration for media files of type "photo"
48+
photo:
49+
# Choose whether to download photos
50+
enabled: true
51+
52+
# Set max size for photos
53+
max_size: 10M
54+
55+
# Configuration for media files of type "file" (i.e. everything else)
56+
file:
57+
# Choose whether to download files
58+
enabled: true
59+
60+
# Set max size for files
61+
max_size: 10M
62+
4163
outputs:
4264
# Send messages to Elasticsearch
4365
- type: elasticsearch

telegram2elastic.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,43 @@
1515

1616
from datetime import datetime
1717
from telethon import TelegramClient, events
18+
from telethon.tl import types
1819
from telethon.tl.patched import Message
1920
from telethon.tl.types import User, Chat, Channel
2021
from telethon.utils import get_display_name
2122

2223
LOG_LEVEL_INFO = 35
2324

2425

26+
class FileSize:
27+
units = ["K", "M", "G", "T", "P"]
28+
29+
@staticmethod
30+
def human_readable_to_bytes(size_string: str):
31+
# Convert to uppercase and strip "B" suffix (i.e. "mb" or "MB" will be "M")
32+
size_string = size_string.upper().rstrip("B")
33+
34+
# Size is already in bytes
35+
if size_string.isdigit():
36+
return int(size_string)
37+
38+
size_bytes = size_string[:-1]
39+
unit_index = FileSize.units.index(size_string[-1]) + 1
40+
41+
return int(float(size_bytes) * pow(1024, unit_index))
42+
43+
@staticmethod
44+
def bytes_to_human_readable(size_bytes: int):
45+
unit = ""
46+
47+
for unit in [""] + FileSize.units:
48+
if abs(size_bytes) < 1024:
49+
return f"{size_bytes:3.1f}{unit}B"
50+
size_bytes /= 1024
51+
52+
return f"{size_bytes:3.1f}{unit}B"
53+
54+
2555
class DottedPathDict(dict):
2656
def get(self, path, default=None):
2757
path = path.split(".", 1)
@@ -218,6 +248,8 @@ async def download_media(self, message):
218248
else:
219249
original_filename = Path(message.file.name).stem
220250

251+
full_original_filename = f"{original_filename}{message.file.ext}"
252+
221253
filename_pattern_map = {
222254
"date": {
223255
"year": message.date.year,
@@ -242,6 +274,27 @@ async def download_media(self, message):
242274
filename = file_pattern.format_map(filename_pattern_map)
243275
filepath = download_path.joinpath(filename)
244276

277+
if isinstance(message.media, types.MessageMediaPhoto):
278+
media_type = "photo"
279+
else:
280+
media_type = "file"
281+
282+
media_type_config = self.media_config.get("types", {}).get(media_type, {})
283+
284+
if not media_type_config.get("enabled", True):
285+
logging.debug(f"Skipping media download for '{full_original_filename}' with type {media_type} as it is disabled")
286+
return
287+
288+
file_size_string = FileSize.bytes_to_human_readable(message.file.size)
289+
290+
max_size = media_type_config.get("max_size", self.media_config.get("max_size", ""))
291+
if max_size != "":
292+
max_size_bytes = FileSize.human_readable_to_bytes(max_size)
293+
if message.file.size > max_size_bytes:
294+
logging.debug(f"Skipping media download for '{full_original_filename}' as it exceeds the configured max size ({file_size_string} > {max_size})")
295+
return
296+
297+
logging.debug(f"Downloading media file '{full_original_filename}' with type '{media_type}' to {filepath} ({file_size_string})")
245298
await message.download_media(file=filepath)
246299

247300
return DownloadedMedia(filepath=filepath, filename=filename)

0 commit comments

Comments
 (0)