Skip to content

Commit fcc1d3e

Browse files
committed
refactor(bot): extract connect flow and resolve code review issues
- Extract connect flow from pyrogram_handlers.py to connect_handler.py - Transcribe all voice messages within a chat's history - Fix auto-reply skip bug by checking _bot_draft_echoes in _is_user_typing - Fix operator precedence bug for awaiting_prompt_input - Add serialize_user_updates deadlock warning in bot_handlers.py - Refactor and deduplicate prompt saving logic into _handle_prompt_save - Change weak bot: object to bot: Bot typing in connect_handler.py - Add missing client: Client type hint to _poll_qr_login - Move clear_pending_input to connect_handler.py to break circular dependency - Update settings_handler.py, styles_handler.py imports - Delete bot sensitive messages properly in system_messages.py and connect_handler.py - Add execution rule against combining commands to CONTRIBUTING.md - Update tests/ to reflect recent architectural changes
1 parent 56886a5 commit fcc1d3e

16 files changed

+1461
-1214
lines changed

CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ feat(bot): add /start command with greeting message
103103
- Update README with /start usage
104104
```
105105

106+
### Execution Rule
107+
❌ It is strictly prohibited to combine commands (e.g. using `;` or `&&`).
108+
✅ Every command must be executed strictly one by one.
109+
106110
### PowerShell Workflow
107111
```powershell
108112
# Write commit message to a file

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Auto-replies work in private chats only. Draft instructions work everywhere.
1515
## Security
1616

1717
- By default, the bot **only writes drafts** and never sends messages on your behalf. Auto-sending is only possible when the auto-reply timer is explicitly enabled in `/settings`.
18-
- **Messages are not stored.** The bot doesn't save conversations — chat history is fetched via Telegram API on each event and is never persisted.
18+
- **Messages are not stored.** The bot doesn't save conversations — chat history is fetched via Telegram API on each event and is never persisted. The context is limited to the last 40 messages or 16,000 characters (whichever comes first); older messages are dropped.
1919
- **Saved Messages** (self-chat) and **Telegram service notifications** are fully ignored — the bot doesn't read, draft, or process messages in them. Additional chats can be excluded via `IGNORED_CHAT_IDS` in `config.py`.
2020
- Telegram sessions are encrypted with `Fernet` (`SESSION_ENCRYPTION_KEY`) before being stored in the database.
2121

@@ -120,7 +120,7 @@ You can combine: `😈 tell her I miss her` — switches the style to Seducer an
120120

121121
### Voice Messages and Stickers
122122

123-
When a voice message is received, the bot automatically transcribes it via Telegram Premium `TranscribeAudio` and generates a draft reply based on the text. Requires Telegram Premium on the connected account (or trial attempts for free users).
123+
All voice messages in the chat history — from both sides (yours and the contact's) — are automatically transcribed via Telegram Premium `TranscribeAudio` and included in the AI context as text. Multiple voice messages are transcribed in parallel. If transcription fails (e.g. no Premium), the message is included as `[voice message]` so the AI still knows a voice was sent. Requires Telegram Premium on the connected account (or trial attempts for free users).
124124

125125
Stickers are processed by emoji — the bot sees the sticker's emoji in the conversation context and generates an appropriate reply.
126126

bot.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,16 @@
2525
from clients import pyrogram_client # noqa: E402
2626
from handlers.bot_handlers import on_start, on_start_connect_callback, on_text # noqa: E402
2727
from handlers.pyrogram_handlers import ( # noqa: E402
28-
on_disconnect, on_connect, on_status, handle_connect_text,
29-
on_connect_qr_callback, on_confirm_phone_callback, on_cancel_phone_callback, on_connect_cancel_callback,
28+
on_disconnect, on_status,
3029
on_disconnect_confirm_callback, on_disconnect_cancel_callback,
3130
on_pyrogram_message, on_pyrogram_draft,
3231
poll_missed_messages,
3332
)
33+
from handlers.connect_handler import ( # noqa: E402
34+
on_connect, handle_connect_text,
35+
on_connect_qr_callback, on_confirm_phone_callback, on_cancel_phone_callback, on_connect_cancel_callback,
36+
)
37+
3438
from handlers.settings_handler import on_settings, on_settings_callback # noqa: E402
3539
from handlers.styles_handler import ( # noqa: E402
3640
on_auto_reply_callback, on_chat_prompt_callback, on_chat_prompt_cancel_callback,

clients/pyrogram_client.py

Lines changed: 88 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pyrogram
99
from pyrogram.handlers import MessageHandler, RawUpdateHandler
1010

11-
from config import PYROGRAM_API_ID, PYROGRAM_API_HASH, MAX_CONTEXT_MESSAGES, DEBUG_PRINT, VOICE_TRANSCRIPTION_TIMEOUT, POLL_MISSED_DIALOGS_LIMIT, STICKER_FALLBACK_EMOJI
11+
from config import PYROGRAM_API_ID, PYROGRAM_API_HASH, MAX_CONTEXT_MESSAGES, MAX_CONTEXT_CHARS, DEBUG_PRINT, VOICE_TRANSCRIPTION_TIMEOUT, POLL_MISSED_DIALOGS_LIMIT, STICKER_FALLBACK_EMOJI
1212
from utils.utils import get_timestamp
1313

1414

@@ -204,6 +204,9 @@ def is_active(user_id: int) -> bool:
204204
async def read_chat_history(user_id: int, chat_id: int, limit: int = MAX_CONTEXT_MESSAGES) -> list[dict]:
205205
"""Читает последние сообщения из чата пользователя.
206206
207+
Голосовые сообщения транскрибируются параллельно через transcribe_voice().
208+
Если транскрипция не удалась — подставляется '[voice message]'.
209+
207210
Args:
208211
user_id: Telegram user ID
209212
chat_id: ID чата для чтения
@@ -217,13 +220,20 @@ async def read_chat_history(user_id: int, chat_id: int, limit: int = MAX_CONTEXT
217220
return []
218221

219222
messages = []
223+
# Индексы сообщений с голосовыми, для которых нужна транскрипция
224+
voice_indices: list[int] = []
225+
voice_msg_ids: list[int] = []
226+
220227
try:
221228
async for msg in client.get_chat_history(chat_id, limit=limit):
222229
text = msg.text
223230
# Стикер → эмодзи как текстовое представление
224231
if not text and msg.sticker:
225232
text = msg.sticker.emoji or STICKER_FALLBACK_EMOJI
226-
if not text:
233+
# Голосовое сообщение → отложим транскрипцию
234+
if not text and msg.voice:
235+
text = None # placeholder, заполним после транскрипции
236+
if not text and not msg.voice:
227237
continue
228238

229239
role = "user" if msg.from_user and msg.from_user.id == user_id else "other"
@@ -234,6 +244,8 @@ async def read_chat_history(user_id: int, chat_id: int, limit: int = MAX_CONTEXT
234244
date = msg.date
235245
if isinstance(date, datetime):
236246
date = date.astimezone(timezone.utc)
247+
248+
idx = len(messages)
237249
messages.append({
238250
"role": role,
239251
"text": text,
@@ -243,9 +255,34 @@ async def read_chat_history(user_id: int, chat_id: int, limit: int = MAX_CONTEXT
243255
"username": sender.username if sender else None,
244256
})
245257

258+
if msg.voice and text is None:
259+
voice_indices.append(idx)
260+
voice_msg_ids.append(msg.id)
261+
262+
# Транскрибируем все голосовые параллельно
263+
if voice_indices:
264+
transcriptions = await asyncio.gather(
265+
*(transcribe_voice(user_id, chat_id, mid) for mid in voice_msg_ids),
266+
)
267+
for idx, transcription in zip(voice_indices, transcriptions):
268+
messages[idx]["text"] = transcription or "[voice message]"
269+
270+
if DEBUG_PRINT:
271+
ok_count = sum(1 for t in transcriptions if t)
272+
print(
273+
f"{get_timestamp()} [PYROGRAM] Transcribed {ok_count}/{len(voice_indices)} "
274+
f"voice messages in chat {chat_id}"
275+
)
276+
246277
# Переворачиваем — от старых к новым
247278
messages.reverse()
248279

280+
# Обрезаем по суммарной длине текста (убираем старые сообщения)
281+
total_chars = sum(len(m["text"] or "") for m in messages)
282+
while messages and total_chars > MAX_CONTEXT_CHARS:
283+
removed = messages.pop(0)
284+
total_chars -= len(removed["text"] or "")
285+
249286
if DEBUG_PRINT:
250287
print(f"{get_timestamp()} [PYROGRAM] Read {len(messages)} messages from chat {chat_id}")
251288

@@ -526,10 +563,18 @@ async def send_message(user_id: int, chat_id: int, text: str) -> bool:
526563
print(f"{get_timestamp()} [PYROGRAM] ERROR sending message in chat {chat_id}: {e}")
527564
return False
528565

566+
# Кэш транскрипций: {user_id: {(chat_id, msg_id): text_or_None}}
567+
# Предотвращает повторные API-вызовы для одних и тех же голосовых.
568+
_transcription_cache: dict[int, dict[tuple[int, int], str | None]] = defaultdict(dict)
569+
_TRANSCRIPTION_CACHE_MAX = 200 # Макс. записей на пользователя
570+
529571

530572
async def transcribe_voice(user_id: int, chat_id: int, msg_id: int) -> str | None:
531573
"""Транскрибирует голосовое сообщение через Telegram Premium TranscribeAudio.
532574
575+
Результат кэшируется — повторные вызовы для того же сообщения
576+
не обращаются к Telegram API.
577+
533578
Args:
534579
user_id: Telegram user ID
535580
chat_id: ID чата
@@ -538,10 +583,18 @@ async def transcribe_voice(user_id: int, chat_id: int, msg_id: int) -> str | Non
538583
Returns:
539584
Текст транскрипции или None при ошибке
540585
"""
586+
# Проверяем кэш
587+
cache_key = (chat_id, msg_id)
588+
user_cache = _transcription_cache[user_id]
589+
if cache_key in user_cache:
590+
return user_cache[cache_key]
591+
541592
client = _active_clients.get(user_id)
542593
if not client:
543594
return None
544595

596+
result_text: str | None = None
597+
545598
try:
546599
peer = await client.resolve_peer(chat_id)
547600
result = await client.invoke(
@@ -555,40 +608,46 @@ async def transcribe_voice(user_id: int, chat_id: int, msg_id: int) -> str | Non
555608
if not result.pending:
556609
if DEBUG_PRINT:
557610
print(f"{get_timestamp()} [PYROGRAM] Transcribed voice in chat {chat_id}: {len(result.text)} chars")
558-
return result.text or None
559-
560-
# Ждём UpdateTranscribedAudio через polling
561-
final_text = result.text or ""
562-
563-
deadline = asyncio.get_event_loop().time() + VOICE_TRANSCRIPTION_TIMEOUT
564-
while asyncio.get_event_loop().time() < deadline:
565-
await asyncio.sleep(1)
566-
# Повторяем запрос — Telegram вернёт обновлённый результат
567-
try:
568-
result = await client.invoke(
569-
raw.functions.messages.TranscribeAudio(
570-
peer=peer,
571-
msg_id=msg_id,
611+
result_text = result.text or None
612+
else:
613+
# Ждём UpdateTranscribedAudio через polling
614+
final_text = result.text or ""
615+
616+
deadline = asyncio.get_event_loop().time() + VOICE_TRANSCRIPTION_TIMEOUT
617+
while asyncio.get_event_loop().time() < deadline:
618+
await asyncio.sleep(1)
619+
# Повторяем запрос — Telegram вернёт обновлённый результат
620+
try:
621+
result = await client.invoke(
622+
raw.functions.messages.TranscribeAudio(
623+
peer=peer,
624+
msg_id=msg_id,
625+
)
572626
)
573-
)
574-
if not result.pending:
575-
final_text = result.text or ""
627+
if not result.pending:
628+
final_text = result.text or ""
629+
break
630+
final_text = result.text or final_text
631+
except Exception:
576632
break
577-
final_text = result.text or final_text
578-
except Exception:
579-
break
580633

581-
if final_text:
582-
if DEBUG_PRINT:
583-
print(f"{get_timestamp()} [PYROGRAM] Transcribed voice in chat {chat_id}: {len(final_text)} chars")
584-
return final_text
585-
586-
return None
634+
if final_text:
635+
if DEBUG_PRINT:
636+
print(f"{get_timestamp()} [PYROGRAM] Transcribed voice in chat {chat_id}: {len(final_text)} chars")
637+
result_text = final_text
587638

588639
except Exception as e:
589640
error_str = str(e)
590641
if "PREMIUM_ACCOUNT_REQUIRED" in error_str:
591642
print(f"{get_timestamp()} [PYROGRAM] WARNING: voice transcription requires Premium in chat {chat_id}")
592643
else:
593644
print(f"{get_timestamp()} [PYROGRAM] ERROR transcribing voice in chat {chat_id}: {e}")
594-
return None
645+
646+
# Сохраняем в кэш (включая None для ошибок)
647+
user_cache[cache_key] = result_text
648+
if len(user_cache) > _TRANSCRIPTION_CACHE_MAX:
649+
# Удаляем самую старую запись
650+
oldest = next(iter(user_cache))
651+
del user_cache[oldest]
652+
653+
return result_text

config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
# ====== КОНТЕКСТ ======
7676
MAX_CONTEXT_MESSAGES = 40 # Макс. кол-во сообщений из чата для контекста
77+
MAX_CONTEXT_CHARS = 16000 # Макс. суммарная длина текста в истории (символы)
7778

7879
# ====== QR LOGIN ======
7980
QR_LOGIN_TIMEOUT_SECONDS = 120 # Таймаут ожидания сканирования QR-кода (секунды)

handlers/bot_handlers.py

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from utils.telegram_rating import extract_rating_from_chat
1717
from system_messages import get_system_message, SYSTEM_MESSAGES
1818
from clients import pyrogram_client
19-
from handlers.pyrogram_handlers import on_connect
19+
from handlers.connect_handler import on_connect
2020

2121

2222
@serialize_user_updates
@@ -63,7 +63,13 @@ async def on_start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
6363

6464

6565
async def on_start_connect_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
66-
"""Callback кнопки 'Connect' из приветственного сообщения."""
66+
"""Callback кнопки 'Connect' из приветственного сообщения.
67+
68+
ВНИМАНИЕ: Без декоратора @serialize_user_updates!
69+
Функция только убирает кнопку и делегирует вызов в on_connect, который
70+
уже удерживает этот lock. Так как asyncio.Lock не reentrant, добавление
71+
декоратора сюда приведёт к вечной блокировке (deadlock) для пользователя.
72+
"""
6773
query = update.callback_query
6874
await query.answer()
6975

@@ -87,14 +93,45 @@ async def on_text(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
8793
message_text = m.text or ""
8894
awaiting_prompt_input = (
8995
context.user_data.get("awaiting_prompt")
90-
or context.user_data.get("awaiting_chat_prompt") is not None
96+
or (context.user_data.get("awaiting_chat_prompt") is not None)
9197
)
9298
if not message_text.strip() and not awaiting_prompt_input:
9399
return
94100

95101
await _process_text(update, context, u, m, message_text)
96102

97103

104+
async def _handle_prompt_save(
105+
u: User, m: Message, context: ContextTypes.DEFAULT_TYPE, prompt_text: str, max_length: int,
106+
save_coro, msg_saved: str, msg_trunc: str, is_clearing: bool = False, msg_cleared: str | None = None
107+
) -> None:
108+
"""Вспомогательная функция для сохранения промпта и отправки уведомления."""
109+
was_truncated = len(prompt_text) > max_length
110+
if was_truncated:
111+
prompt_text = prompt_text[:max_length]
112+
113+
saved = await save_coro
114+
if not saved:
115+
error_msg = await get_system_message(u.language_code, "error")
116+
await m.reply_text(error_msg)
117+
return
118+
119+
context.user_data.pop("awaiting_chat_prompt", None)
120+
context.user_data.pop("awaiting_prompt", None)
121+
122+
if is_clearing and msg_cleared:
123+
msg_key = msg_cleared
124+
else:
125+
msg_key = msg_trunc if was_truncated else msg_saved
126+
127+
msg = await get_system_message(u.language_code, msg_key)
128+
if was_truncated:
129+
msg = msg.format(max_length=max_length)
130+
await m.reply_text(msg)
131+
if DEBUG_PRINT:
132+
print(f"{get_timestamp()} [BOT] Prompt saved for user {u.id}: {len(prompt_text)} chars")
133+
134+
98135
async def _process_text(
99136
update: Update, context: ContextTypes.DEFAULT_TYPE,
100137
u: User, m: Message, message_text: str,
@@ -105,56 +142,25 @@ async def _process_text(
105142
if chat_prompt_chat_id is not None:
106143
prompt_text = message_text.strip()
107144
is_clearing_prompt = prompt_text == ""
108-
was_truncated = len(prompt_text) > CHAT_PROMPT_MAX_LENGTH
109-
if was_truncated:
110-
prompt_text = prompt_text[:CHAT_PROMPT_MAX_LENGTH]
111-
112-
saved = await update_chat_prompt(
113-
u.id,
114-
chat_prompt_chat_id,
115-
None if is_clearing_prompt else prompt_text,
145+
146+
save_coro = update_chat_prompt(
147+
u.id, chat_prompt_chat_id, None if is_clearing_prompt else prompt_text[:CHAT_PROMPT_MAX_LENGTH]
148+
)
149+
await _handle_prompt_save(
150+
u, m, context, prompt_text, CHAT_PROMPT_MAX_LENGTH, save_coro,
151+
msg_saved="settings_prompt_saved", msg_trunc="settings_prompt_truncated",
152+
is_clearing=is_clearing_prompt, msg_cleared="settings_prompt_cleared"
116153
)
117-
if not saved:
118-
error_msg = await get_system_message(u.language_code, "error")
119-
await m.reply_text(error_msg)
120-
return
121-
122-
context.user_data.pop("awaiting_chat_prompt", None)
123-
context.user_data.pop("awaiting_prompt", None)
124-
if is_clearing_prompt:
125-
message_key = "settings_prompt_cleared"
126-
else:
127-
message_key = "settings_prompt_truncated" if was_truncated else "settings_prompt_saved"
128-
msg = await get_system_message(u.language_code, message_key)
129-
if was_truncated:
130-
msg = msg.format(max_length=CHAT_PROMPT_MAX_LENGTH)
131-
await m.reply_text(msg)
132-
if DEBUG_PRINT:
133-
print(f"{get_timestamp()} [BOT] Chat prompt saved for user {u.id}, chat {chat_prompt_chat_id}: {len(prompt_text)} chars")
134154
return
135155

136156
# Проверяем: пользователь вводит кастомный промпт?
137157
if context.user_data.get("awaiting_prompt"):
138158
prompt_text = message_text.strip()
139-
was_truncated = len(prompt_text) > USER_PROMPT_MAX_LENGTH
140-
if was_truncated:
141-
prompt_text = prompt_text[:USER_PROMPT_MAX_LENGTH]
142-
143-
saved = await update_user_settings(u.id, {"custom_prompt": prompt_text})
144-
if not saved:
145-
error_msg = await get_system_message(u.language_code, "error")
146-
await m.reply_text(error_msg)
147-
return
148-
149-
context.user_data.pop("awaiting_prompt", None)
150-
context.user_data.pop("awaiting_chat_prompt", None)
151-
message_key = "settings_prompt_truncated" if was_truncated else "settings_prompt_saved"
152-
msg = await get_system_message(u.language_code, message_key)
153-
if was_truncated:
154-
msg = msg.format(max_length=USER_PROMPT_MAX_LENGTH)
155-
await m.reply_text(msg)
156-
if DEBUG_PRINT:
157-
print(f"{get_timestamp()} [BOT] Custom prompt saved for user {u.id}: {len(prompt_text)} chars")
159+
save_coro = update_user_settings(u.id, {"custom_prompt": prompt_text[:USER_PROMPT_MAX_LENGTH]})
160+
await _handle_prompt_save(
161+
u, m, context, prompt_text, USER_PROMPT_MAX_LENGTH, save_coro,
162+
msg_saved="settings_prompt_saved", msg_trunc="settings_prompt_truncated",
163+
)
158164
return
159165

160166
try:

0 commit comments

Comments
 (0)