Skip to content

Commit 592abaa

Browse files
umeshmagvanrossum
authored andcommitted
knowpro.py: Email example (#1694)
Email import: * Chunk large message bodies * Gmail testing * Test REPL improvements * Bug fixes
1 parent eff7458 commit 592abaa

File tree

4 files changed

+213
-41
lines changed

4 files changed

+213
-41
lines changed

tools/test_email.py

Lines changed: 159 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,16 @@
66
import asyncio
77
import sys
88
import traceback
9-
from typing import Any, Iterable
9+
from typing import (
10+
Any,
11+
Literal,
12+
Iterable,
13+
Callable,
14+
Awaitable
15+
)
1016
from colorama import Fore
1117
from pathlib import Path
18+
import argparse
1219

1320
import typechat
1421

@@ -33,13 +40,30 @@
3340
from utool import print_result
3441

3542
class EmailContext:
36-
def __init__(self, db_path: str, conversation: EmailMemory) -> None:
37-
self.db_path = db_path
43+
def __init__(self, base_path: Path, db_name: str, conversation: EmailMemory) -> None:
44+
self.base_path = base_path
45+
self.db_path = base_path.joinpath(db_name)
3846
self.conversation = conversation
3947

40-
async def reset(self):
48+
async def load_conversation(self, db_name: str, create_new:bool = False):
49+
await self.conversation.settings.conversation_settings.storage_provider.close()
50+
self.db_path = self.base_path.joinpath(db_name)
51+
self.conversation = await load_or_create_email_index(str(self.db_path), create_new)
52+
53+
# Delete the current conversation and re-create it
54+
async def restart_conversation(self):
4155
await self.conversation.settings.conversation_settings.storage_provider.close()
42-
self.conversation = await load_or_create_email_index(self.db_path, create_new=True)
56+
self.conversation = await load_or_create_email_index(str(self.db_path), create_new=True)
57+
58+
59+
CommandHandler = Callable[[EmailContext, list[str]], Awaitable[None]]
60+
61+
# Command decorator
62+
def command(parser: argparse.ArgumentParser):
63+
def decorator(func: Callable):
64+
func.parser = parser # type: ignore
65+
return func
66+
return decorator
4367

4468
# Just simple test code
4569
# TODO : Once stable, move creation etc to utool.py
@@ -54,40 +78,46 @@ async def main():
5478

5579
db_path = str(base_path.joinpath("pyEmails.db"))
5680
context = EmailContext(
57-
db_path=db_path,
81+
base_path,
82+
"pyEmails.db",
5883
conversation=await load_or_create_email_index(db_path, create_new=False)
5984
)
6085
print(f"Using email memory at: {db_path}")
6186
await print_conversation_stats(context.conversation)
6287

6388
# Command handlers
64-
cmd_handlers = {
89+
cmd_handlers: dict[str, CommandHandler] = {
90+
"@exit": exit_app,
91+
"@quit": exit_app,
6592
"@add_messages": add_messages, # Add messages
93+
"@parse_messages": parse_messages,
94+
"@load_index": load_index,
6695
"@build_index": build_index, # Build index
6796
"@reset_index": reset_index, # Delete index and start over
6897
"@search": search_index, # Search index
6998
"@answer": generate_answer # Question answer
7099
}
100+
default_handler = generate_answer
71101
while True:
72102
line = input("✉>>").strip()
73103
if len(line) == 0:
74104
continue
75-
elif line == "exit":
76-
break
77-
args = shlex.split(line)
78-
if len(args) < 1:
79-
continue
80105
try:
106+
args = shlex.split(line)
107+
if len(args) < 1:
108+
continue
81109
cmd = args[0].lower()
110+
args.pop(0)
82111
if cmd == "@help":
83-
print_commands(cmd_handlers.keys())
112+
help(cmd_handlers, args)
84113
else:
85114
cmd_handler = cmd_handlers.get(cmd)
115+
if cmd_handler is None and not cmd.startswith("@"):
116+
cmd_handler = default_handler
86117
if cmd_handler:
87-
args.pop(0)
88118
await cmd_handler(context, args)
89119
else:
90-
print_commands(cmd_handlers.keys())
120+
print_commands(cmd_handlers)
91121
except Exception as e:
92122
print()
93123
print(Fore.RED, f"Error\n: {e}")
@@ -100,13 +130,26 @@ async def main():
100130
# ==
101131

102132
# Adds messages. Takes a path either to a file or to a directory
133+
def _add_messages_def() -> argparse.ArgumentParser:
134+
cmd = argparse.ArgumentParser(
135+
description="Add messages to index"
136+
)
137+
cmd.add_argument(
138+
"--path",
139+
default="",
140+
help="Path to an .eml file or to a directory with .eml files"
141+
)
142+
return cmd
143+
144+
@command(_add_messages_def())
103145
async def add_messages(context: EmailContext, args: list[str]):
104-
if len(args) < 1:
105-
print_error("No path provided")
146+
named_args = _add_messages_def().parse_args(args)
147+
if named_args.path is None:
148+
print("No path provided")
106149
return
107-
150+
108151
# Get the path to the email file or directory of emails to ingest
109-
src_path = Path(args[0])
152+
src_path = Path(named_args.path)
110153
emails: list[EmailMessage]
111154
if src_path.is_file():
112155
emails = [import_email_from_file(str(src_path))]
@@ -173,8 +216,79 @@ async def generate_answer(context: EmailContext, args:list[str]):
173216

174217
async def reset_index(context: EmailContext, args: list[str]):
175218
print(f"Deleting {context.db_path}")
176-
await context.reset()
219+
await context.restart_conversation()
177220
await print_conversation_stats(context.conversation)
221+
222+
223+
def _load_index_def() -> argparse.ArgumentParser:
224+
cmdDef = argparse.ArgumentParser(
225+
description="Load index at given db path"
226+
)
227+
cmdDef.add_argument("--name", type=str, default="", help="Name of the index to load")
228+
cmdDef.add_argument("--new", type=bool, default=False)
229+
return cmdDef
230+
231+
@command(_load_index_def())
232+
async def load_index(context: EmailContext, args: list[str]):
233+
named_args = _load_index_def().parse_args(args)
234+
235+
db_name: str = named_args.name
236+
if (len(db_name) == 0):
237+
return
238+
239+
if not db_name.endswith(".db"):
240+
db_name += ".db"
241+
print(db_name)
242+
await context.load_conversation(db_name, named_args.new)
243+
244+
def _parse_messages_def() -> argparse.ArgumentParser:
245+
cmdDef = argparse.ArgumentParser(
246+
description="Parse messages in the given path"
247+
)
248+
cmdDef.add_argument("--path", type=str, default="")
249+
cmdDef.add_argument("--verbose", type=bool, default=False)
250+
return cmdDef
251+
252+
@command(_parse_messages_def())
253+
async def parse_messages(context: EmailContext, args: list[str]):
254+
named_args = _parse_messages_def().parse_args(args)
255+
src_path = Path(named_args.path)
256+
file_paths: list[str]
257+
if src_path.is_file():
258+
file_paths = [str(src_path)]
259+
else:
260+
file_paths = [str(file_path) for file_path in Path(src_path).iterdir() if file_path.is_file()]
261+
262+
print(f"Parsing {len(file_paths)} messages")
263+
for file_path in file_paths:
264+
try:
265+
msg = import_email_from_file(file_path)
266+
print(file_path)
267+
if named_args.verbose:
268+
print("####################")
269+
print_email(msg)
270+
print_knowledge(msg.get_knowledge())
271+
print("####################")
272+
273+
except Exception as e:
274+
print_error(file_path)
275+
print_error(str(e))
276+
277+
async def exit_app(context: EmailContext, args: list[str]):
278+
print("Goodbye")
279+
sys.exit(0)
280+
281+
def help(handlers: dict[str, CommandHandler], args: list[str]):
282+
if len(args) > 0:
283+
cmd = handlers.get(args[0])
284+
if cmd is not None:
285+
print_help(cmd)
286+
return
287+
288+
print_commands(handlers)
289+
print("@help <commandName> for details")
290+
291+
178292
#
179293
# Utilities
180294
#
@@ -203,11 +317,22 @@ def delete_sqlite_db(db_path: str):
203317
if os.path.exists(wal_path):
204318
os.remove(wal_path)
205319

206-
320+
#=========================
207321
#
208322
# Printing
209323
#
324+
#=========================
325+
326+
def print_help(handler: CommandHandler):
327+
if hasattr(handler, "parser"):
328+
parser = argparse.ArgumentParser = handler.parser # type: ignore
329+
print(parser.format_help())
330+
print()
210331

332+
def print_commands(commands: dict[str, CommandHandler]):
333+
names = sorted(commands.keys())
334+
print_list(Fore.GREEN, names, "COMMANDS", "ul")
335+
211336
def print_email(email: EmailMessage):
212337
print("From:", email.metadata.sender)
213338
print("To:", ", ".join(email.metadata.recipients))
@@ -234,15 +359,20 @@ def print_knowledge(knowledge: kplib.KnowledgeResponse):
234359
print()
235360
print(Fore.RESET)
236361

237-
def print_commands(names: Iterable[str]):
238-
print_list(Fore.GREEN, sorted(names), "Commands")
239-
240-
def print_list(color, list: Iterable[Any], title: str):
362+
def print_list(color, list: Iterable[Any], title: str, type: Literal["plain", "ol", "ul"] = "plain"):
363+
print(color)
241364
if title:
242-
print(color + f"# {title}")
243-
print()
244-
for item in list:
245-
print(color + " -", item)
365+
print(f"# {title}\n")
366+
if type == "plain":
367+
for item in list:
368+
print(item)
369+
elif type == "ul":
370+
for item in list:
371+
print(f"- {item}")
372+
elif type == "ol":
373+
for i, item in enumerate(list):
374+
print(f"{i + 1}. {item}")
375+
print(Fore.RESET)
246376

247377
def print_error(msg: str):
248378
print(Fore.RED + msg)

typeagent/emails/email_import.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,42 @@
33

44
import re
55
from pathlib import Path
6-
from datetime import datetime
6+
from typing import Iterable
77

88
from email import message_from_string
99
from email.utils import parsedate_to_datetime
1010
from email.message import Message
1111

1212
from .email_message import EmailMessage, EmailMessageMeta
1313

14-
def import_emails_from_dir(dir_path: str) -> list[EmailMessage]:
14+
def import_emails_from_dir(dir_path: str, max_chunk_length: int = 4096) -> list[EmailMessage]:
1515
messages: list[EmailMessage] = []
1616
for file_path in Path(dir_path).iterdir():
17-
messages.append(import_email_from_file(str(file_path.resolve())));
17+
if file_path.is_file():
18+
messages.append(import_email_from_file(str(file_path.resolve()), max_chunk_length))
1819
return messages
1920

2021
# Imports an email file (.eml) as a list of EmailMessage objects
21-
def import_email_from_file(file_path: str) -> EmailMessage:
22+
def import_email_from_file(file_path: str, max_chunk_length: int = 4096) -> EmailMessage:
2223
email_string: str = ""
2324
with open(file_path, "r") as f:
2425
email_string = f.read()
2526

26-
return import_email_string(email_string)
27+
return import_email_string(email_string, max_chunk_length)
2728

2829
# Imports a single email MIME string and returns an EmailMessage object
29-
def import_email_string(email_string: str) -> EmailMessage:
30+
def import_email_string(email_string: str, max_chunk_length: int = 4096) -> EmailMessage:
3031
msg: Message = message_from_string(email_string)
31-
email: EmailMessage = import_email_message(msg)
32+
email: EmailMessage = import_email_message(msg, max_chunk_length)
3233
return email
3334

34-
def import_forwarded_email_string(email_string: str) -> list[EmailMessage]:
35+
def import_forwarded_email_string(email_string: str, max_chunk_length: int = 4096) -> list[EmailMessage]:
3536
msg_parts = get_forwarded_email_parts(email_string)
36-
return [import_email_string(part) for part in msg_parts if len(part) > 0]
37+
return [import_email_string(part, max_chunk_length) for part in msg_parts if len(part) > 0]
3738

3839
# Imports an email.message.Message object and returns an EmailMessage object
3940
# If the message is a reply, returns only the latest response.
40-
def import_email_message(msg: Message) -> EmailMessage:
41+
def import_email_message(msg: Message, max_chunk_length: int) -> EmailMessage:
4142
# Extract metadata from
4243
email_meta = EmailMessageMeta(
4344
sender = msg.get("From", ""),
@@ -61,9 +62,10 @@ def import_email_message(msg: Message) -> EmailMessage:
6162
if email_meta.subject is not None:
6263
body = email_meta.subject + "\n\n" + body
6364

65+
body_chunks = _text_to_chunks(body, max_chunk_length)
6466
email: EmailMessage = EmailMessage(
6567
metadata=email_meta,
66-
text_chunks=[body],
68+
text_chunks=body_chunks,
6769
timestamp=timestamp
6870
)
6971
return email
@@ -159,3 +161,35 @@ def _remove_empty(strings: list[str]) -> list[str]:
159161
if len(s) > 0:
160162
non_empty.append(s)
161163
return non_empty
164+
165+
def _text_to_chunks(text: str, max_chunk_length: int) -> list[str]:
166+
if len(text) < max_chunk_length:
167+
return [text]
168+
169+
paragraphs = _splitIntoParagraphs(text)
170+
return list(_merge_chunks(paragraphs, "\n\n", max_chunk_length))
171+
172+
def _splitIntoParagraphs(text: str) -> list[str]:
173+
return _remove_empty(re.split(r'\n{2,}', text))
174+
175+
def _merge_chunks(chunks: Iterable[str], separator: str, max_chunk_length: int) -> Iterable[str]:
176+
sep_length = len(separator)
177+
cur_chunk:str = ""
178+
for new_chunk in chunks:
179+
cur_length = len(cur_chunk)
180+
new_length = len(new_chunk)
181+
if new_length > max_chunk_length:
182+
# Truncate
183+
new_chunk = new_chunk[0:max_chunk_length]
184+
new_length = len(new_chunk)
185+
186+
if (cur_length + (new_length + sep_length) > max_chunk_length):
187+
if cur_length > 0:
188+
yield cur_chunk
189+
cur_chunk = new_chunk
190+
else:
191+
cur_chunk += separator
192+
cur_chunk += new_chunk
193+
194+
if (len(cur_chunk)) > 0:
195+
yield cur_chunk

typeagent/emails/email_message.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ class EmailMessageMeta(IKnowledgeSource, IMessageMetadata):
2727
bcc: list[str] = Field(default_factory=list)
2828
subject: str | None = None
2929

30+
@property
31+
def source(self) -> str | None: # type: ignore[reportIncompatibleVariableOverride]
32+
return self.sender
33+
34+
@property
35+
def dest(self) -> str | list[str] | None: # type: ignore[reportIncompatibleVariableOverride]
36+
return self.recipients
37+
3038
def get_knowledge(self) -> kplib.KnowledgeResponse:
3139
return kplib.KnowledgeResponse(
3240
entities=self.to_entities(),

0 commit comments

Comments
 (0)