patched-codes
diff --git a/‎patchwork/common/client/llm/google.py‎
Lines changed: 65 additions & 39 deletions b/‎patchwork/common/client/llm/google.py‎
Lines changed: 65 additions & 39 deletions
diff --git a/‎patchwork/steps/CallLLM/CallLLM.py‎
Lines changed: 8 additions & 31 deletions b/‎patchwork/steps/CallLLM/CallLLM.py‎
Lines changed: 8 additions & 31 deletions
diff --git a/‎patchwork/steps/CallLLM/typed.py‎
Lines changed: 1 addition & 0 deletions b/‎patchwork/steps/CallLLM/typed.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎patchwork/steps/LLM/LLM.py‎
Lines changed: 2 additions & 6 deletions b/‎patchwork/steps/LLM/LLM.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎patchwork/steps/LLM/typed.py‎
Lines changed: 1 addition & 0 deletions b/‎patchwork/steps/LLM/typed.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎patchwork/steps/ReadEmail/ReadEmail.py‎
Lines changed: 91 additions & 0 deletions b/‎patchwork/steps/ReadEmail/ReadEmail.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎patchwork/steps/ReadEmail/__init__.py‎ b/‎patchwork/steps/ReadEmail/__init__.py‎
diff --git a/‎patchwork/steps/ReadEmail/typed.py‎
Lines changed: 24 additions & 0 deletions b/‎patchwork/steps/ReadEmail/typed.py‎
Lines changed: 24 additions & 0 deletions
@@ -1,17 +1,17 @@
 from __future__ import annotations
 
-import functools
 import time
-
-from google import generativeai
-from google.generativeai.types.content_types import (
-    add_object_type,
-    convert_to_nullable,
-    strip_titles,
-    unpack_defs,
+from functools import lru_cache
+from pathlib import Path
+
+from google import genai
+from google.genai.types import (
+    CountTokensConfig,
+    File,
+    GenerateContentConfig,
+    GenerateContentResponse,
+    Model,
 )
-from google.generativeai.types.generation_types import GenerateContentResponse
-from google.generativeai.types.model_types import Model
 from openai.types import CompletionUsage
 from openai.types.chat import (
     ChatCompletionMessage,
@@ -21,17 +21,13 @@
     completion_create_params,
 )
 from openai.types.chat.chat_completion import ChatCompletion, Choice
-from typing_extensions import Any, Dict, Iterable, List, Optional, Union
+from pydantic import BaseModel
+from typing_extensions import Any, Dict, Iterable, List, Optional, Type, Union
 
 from patchwork.common.client.llm.protocol import NOT_GIVEN, LlmClient, NotGiven
 from patchwork.common.client.llm.utils import json_schema_to_model
 
 
-@functools.lru_cache
-def _cached_list_model_from_google() -> list[Model]:
-    return list(generativeai.list_models())
-
-
 class GoogleLlmClient(LlmClient):
     __SAFETY_SETTINGS = [
         dict(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_NONE"),
@@ -43,20 +39,45 @@ class GoogleLlmClient(LlmClient):
 
     def __init__(self, api_key: str):
         self.__api_key = api_key
-        generativeai.configure(api_key=api_key)
+        self.client = genai.Client(api_key=api_key)
+
+    @lru_cache(maxsize=1)
+    def __get_models_info(self) -> list[Model]:
+        return list(self.client.models.list())
 
     def __get_model_limits(self, model: str) -> int:
-        for model_info in _cached_list_model_from_google():
-            if model_info.name == f"{self.__MODEL_PREFIX}{model}":
+        for model_info in self.__get_models_info():
+            if model_info.name == f"{self.__MODEL_PREFIX}{model}" and model_info.input_token_limit is not None:
                 return model_info.input_token_limit
         return 1_000_000
 
+    @lru_cache
     def get_models(self) -> set[str]:
-        return {model.name.removeprefix(self.__MODEL_PREFIX) for model in _cached_list_model_from_google()}
+        return {model_info.name.removeprefix(self.__MODEL_PREFIX) for model_info in self.__get_models_info()}
 
     def is_model_supported(self, model: str) -> bool:
         return model in self.get_models()
 
+    def __upload(self, file: Path | NotGiven) -> File | None:
+        if file is NotGiven:
+            return None
+
+        try:
+            file_ref = self.client.files.get(file.name)
+            if file_ref.error is None:
+                return file_ref
+        except Exception as e:
+            pass
+
+        try:
+            file_ref = self.client.files.upload(file=file)
+            if file_ref.error is None:
+                return file_ref
+        except Exception as e:
+            pass
+
+        return None
+
     def is_prompt_supported(
         self,
         messages: Iterable[ChatCompletionMessageParam],
@@ -74,11 +95,23 @@ def is_prompt_supported(
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        file: Path | NotGiven = NOT_GIVEN,
     ) -> int:
         system, chat = self.__openai_messages_to_google_messages(messages)
-        gen_model = generativeai.GenerativeModel(model_name=model, system_instruction=system)
+
+        file_ref = self.__upload(file)
+        if file_ref is not None:
+            chat.append(file_ref)
+
         try:
-            token_count = gen_model.count_tokens(chat).total_tokens
+            token_response = self.client.models.count_tokens(
+                model=model,
+                contents=chat,
+                config=CountTokensConfig(
+                    system_instructions=system,
+                ),
+            )
+            token_count = token_response.total_tokens
         except Exception as e:
             return -1
         model_limit = self.__get_model_limits(model)
@@ -142,13 +175,15 @@ def chat_completion(
 
         system_content, contents = self.__openai_messages_to_google_messages(messages)
 
-        model_client = generativeai.GenerativeModel(
-            model_name=model,
-            safety_settings=self.__SAFETY_SETTINGS,
-            generation_config=NOT_GIVEN.remove_not_given(generation_dict),
-            system_instruction=system_content,
+        response = self.client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=GenerateContentConfig(
+                system_instruction=system_content,
+                safety_settings=self.__SAFETY_SETTINGS,
+                **generation_dict,
+            ),
         )
-        response = model_client.generate_content(contents=contents)
         return self.__google_response_to_openai_response(response, model)
 
     @staticmethod
@@ -191,18 +226,9 @@ def __google_response_to_openai_response(google_response: GenerateContentRespons
         )
 
     @staticmethod
-    def json_schema_to_google_schema(json_schema: dict[str, Any] | None) -> dict[str, Any] | None:
+    def json_schema_to_google_schema(json_schema: dict[str, Any] | None) -> Type[BaseModel] | None:
         if json_schema is None:
             return None
 
         model = json_schema_to_model(json_schema)
-        parameters = model.model_json_schema()
-        defs = parameters.pop("$defs", {})
-
-        for name, value in defs.items():
-            unpack_defs(value, defs)
-        unpack_defs(parameters, defs)
-        convert_to_nullable(parameters)
-        add_object_type(parameters)
-        strip_titles(parameters)
-        return parameters
+        return model
@@ -11,10 +11,7 @@
 from rich.markup import escape
 
 from patchwork.common.client.llm.aio import AioLlmClient
-from patchwork.common.client.llm.anthropic import AnthropicLlmClient
-from patchwork.common.client.llm.google import GoogleLlmClient
-from patchwork.common.client.llm.openai_ import OpenAiLlmClient
-from patchwork.common.constants import DEFAULT_PATCH_URL, TOKEN_URL
+from patchwork.common.constants import TOKEN_URL
 from patchwork.logger import logger
 from patchwork.step import Step, StepStatus
 from patchwork.steps.CallLLM.typed import CallLLMInputs, CallLLMOutputs
@@ -54,31 +51,9 @@ def __init__(self, inputs: dict):
         self.save_responses_to_file = inputs.get("save_responses_to_file", None)
         self.model = inputs.get("model", "gpt-4o-mini")
         self.allow_truncated = inputs.get("allow_truncated", False)
-
-        clients = []
-
-        patched_key = inputs.get("patched_api_key")
-        if patched_key is not None:
-            client = OpenAiLlmClient(patched_key, DEFAULT_PATCH_URL)
-            clients.append(client)
-
-        openai_key = inputs.get("openai_api_key") or os.environ.get("OPENAI_API_KEY")
-        if openai_key is not None:
-            client_args = {key[len("client_") :]: value for key, value in inputs.items() if key.startswith("client_")}
-            client = OpenAiLlmClient(openai_key, **client_args)
-            clients.append(client)
-
-        google_key = inputs.get("google_api_key")
-        if google_key is not None:
-            client = GoogleLlmClient(google_key)
-            clients.append(client)
-
-        anthropic_key = inputs.get("anthropic_api_key")
-        if anthropic_key is not None:
-            client = AnthropicLlmClient(anthropic_key)
-            clients.append(client)
-
-        if len(clients) == 0:
+        self.file = inputs.get("file", None)
+        self.client = AioLlmClient.create_aio_client(inputs)
+        if self.client is None:
             raise ValueError(
                 f"Model API key not found.\n"
                 f'Please login at: "{TOKEN_URL}",\n'
@@ -89,8 +64,6 @@ def __init__(self, inputs: dict):
                 "If you are using an OpenAI API Key, please set `--openai_api_key=<token>`.\n"
             )
 
-        self.client = AioLlmClient(*clients)
-
     def __persist_to_file(self, contents):
         # Convert relative path to absolute path
         file_path = os.path.abspath(self.save_responses_to_file)
@@ -143,6 +116,10 @@ def __call(self, prompts: list[list[dict]]) -> list[_InnerCallLLMResponse]:
         # Parse model arguments
         parsed_model_args = self.__parse_model_args()
 
+        kwargs = dict(parsed_model_args)
+        if self.file is not None:
+            kwargs["file"] = Path(self.file)
+
         for prompt in prompts:
             is_input_accepted = self.client.is_prompt_supported(prompt, self.model) > 0
             if not is_input_accepted:
 
@@ -35,6 +35,7 @@ class CallLLMInputs(TypedDict, total=False):
     google_api_key: Annotated[
         str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"])
     ]
+    file: Annotated[str, StepTypeConfig(is_path=True)]
 
 
 class CallLLMOutputs(TypedDict):
 
@@ -4,17 +4,13 @@
 from patchwork.steps.ExtractModelResponse.ExtractModelResponse import (
     ExtractModelResponse,
 )
-from patchwork.steps.LLM.typed import LLMInputs
+from patchwork.steps.LLM.typed import LLMInputs, LLMOutputs
 from patchwork.steps.PreparePrompt.PreparePrompt import PreparePrompt
 
 
-class LLM(Step):
+class LLM(Step, input_class=LLMInputs, output_class=LLMOutputs):
     def __init__(self, inputs):
         super().__init__(inputs)
-        missing_keys = LLMInputs.__required_keys__.difference(set(inputs.keys()))
-        if len(missing_keys) > 0:
-            raise ValueError(f'Missing required data: "{missing_keys}"')
-
         self.inputs = inputs
 
     def run(self) -> dict:
 
@@ -43,6 +43,7 @@ class LLMInputs(__LLMInputsRequired, total=False):
     google_api_key: Annotated[
         str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"])
     ]
+    file: Annotated[str, StepTypeConfig(is_path=True)]
     # ExtractModelResponseInputs
     response_partitions: Annotated[Dict[str, List[str]], StepTypeConfig(is_config=True)]
 
 
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+import base64
+import os
+import quopri
+from datetime import datetime
+from pathlib import Path
+
+from eml_parser import EmlParser
+from pydantic import BaseModel, Field
+
+from patchwork.step import Step
+from patchwork.steps.ReadEmail.typed import ReadEmailInputs, ReadEmailOutputs
+
+
+class ParsedHeader(BaseModel):
+    subject: str
+    from_: str = Field(alias="from")
+    to: list[str]
+    date: datetime
+
+
+class ParsedBody(BaseModel):
+    content: str
+    content_type: str
+
+
+class AttachmentHeader(BaseModel):
+    content_disposition: list[str] = Field(alias="content-disposition")
+    content_transfer_encoding: list[str] = Field(alias="content-transfer-encoding")
+    content_type: list[str] = Field(alias="content-type")
+
+
+class ParsedAttachment(BaseModel):
+    filename: str
+    raw: bytes
+    content_header: AttachmentHeader
+
+
+class ParsedEmail(BaseModel):
+    header: ParsedHeader
+    body: list[ParsedBody]
+    attachment: list[ParsedAttachment]
+
+
+class ReadEmail(Step, input_class=ReadEmailInputs, output_class=ReadEmailOutputs):
+    def __init__(self, inputs: dict):
+        super().__init__(inputs)
+        self.file = inputs["eml_file_path"]
+        self.base_path = inputs.get("base_path", os.getcwd())
+
+    def __decode(self, content_transfer_encoding: str, content: bytes) -> bytes:
+        if content_transfer_encoding.lower() == "base64":
+            return base64.b64decode(content)
+        elif content_transfer_encoding.lower() == "quoted‑printable":
+            return quopri.decodestring(content)
+
+        return content
+
+    def run(self) -> dict:
+        ep = EmlParser(
+            include_raw_body=True,
+            include_attachment_data=True,
+        )
+
+        email_data_dict = ep.decode_email(self.file)
+        email_data = ParsedEmail.model_validate(email_data_dict)
+
+        rv = {
+            "subject": email_data.header.subject,
+            "datetime": email_data.header.date,
+            "from": email_data.header.from_,
+            "attachments": [],
+            "body": "",
+        }
+
+        base_path = Path(self.base_path)
+        base_path.mkdir(parents=True, exist_ok=True)
+        for attachment in email_data.attachment:
+            file_path = base_path / attachment.filename
+            with file_path.open("wb") as f:
+                content = attachment.raw
+                for content_transfer_encoding in attachment.content_header.content_transfer_encoding:
+                    content = self.__decode(content_transfer_encoding, content)
+                f.write(content)
+            rv["attachments"].append(dict(path=file_path, content=content.decode()))
+
+        for body in email_data.body:
+            rv["body"] += body.content
+
+        return rv
@@ -0,0 +1,24 @@
+from typing_extensions import Annotated, List, TypedDict
+
+from patchwork.common.utils.step_typing import StepTypeConfig
+
+
+class __ReadEmailRequiredInputs(TypedDict):
+    eml_file_path: Annotated[str, StepTypeConfig(is_path=True)]
+
+
+class ReadEmailInputs(__ReadEmailRequiredInputs, total=False):
+    base_path: Annotated[str, StepTypeConfig(is_path=True)]
+
+
+class Attachment(TypedDict):
+    path: str
+    content: str
+
+
+class ReadEmailOutputs(TypedDict):
+    subject: str
+    datetime: str
+    from_: str  # this is actually from instead of from_
+    body: str
+    attachments: List[Attachment]
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ class CallLLMInputs(TypedDict, total=False):`
`35`	`35`	`google_api_key: Annotated[`
`36`	`36`	`str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"])`
`37`	`37`	`]`
	`38`	`+ file: Annotated[str, StepTypeConfig(is_path=True)]`
`38`	`39`
`39`	`40`
`40`	`41`	`class CallLLMOutputs(TypedDict):`
Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ class LLMInputs(__LLMInputsRequired, total=False):`
`43`	`43`	`google_api_key: Annotated[`
`44`	`44`	`str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"])`
`45`	`45`	`]`
	`46`	`+ file: Annotated[str, StepTypeConfig(is_path=True)]`
`46`	`47`	`# ExtractModelResponseInputs`
`47`	`48`	`response_partitions: Annotated[Dict[str, List[str]], StepTypeConfig(is_config=True)]`
`48`	`49`