diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py index 7b7e04b4172..b87a7120e8c 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py @@ -5,12 +5,12 @@ from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate -from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.context.search.models import SavedSearchDoc +from onyx.context.search.models import SearchDoc from onyx.server.query_and_chat.streaming_models import SectionEnd from onyx.utils.logger import setup_logger @@ -47,7 +47,7 @@ def is_reducer( doc_list.append(x) # Convert InferenceSections to SavedSearchDocs - search_docs = chunks_or_sections_to_search_docs(doc_list) + search_docs = SearchDoc.chunks_or_sections_to_search_docs(doc_list) retrieved_saved_search_docs = [ SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0) for search_doc in search_docs diff --git a/backend/onyx/agents/agent_search/dr/utils.py b/backend/onyx/agents/agent_search/dr/utils.py index b0e86d9b52d..817b6e19dde 100644 --- a/backend/onyx/agents/agent_search/dr/utils.py +++ b/backend/onyx/agents/agent_search/dr/utils.py @@ -13,7 +13,7 @@ ) from onyx.context.search.models import InferenceSection from onyx.context.search.models import SavedSearchDoc -from onyx.context.search.utils import chunks_or_sections_to_search_docs +from onyx.context.search.models import SearchDoc from onyx.tools.tool_implementations.web_search.web_search_tool import ( WebSearchTool, ) @@ -266,7 +266,7 @@ def convert_inference_sections_to_search_docs( is_internet: bool = False, ) -> list[SavedSearchDoc]: # Convert InferenceSections to SavedSearchDocs - search_docs = chunks_or_sections_to_search_docs(inference_sections) + search_docs = SearchDoc.chunks_or_sections_to_search_docs(inference_sections) for search_doc in search_docs: search_doc.is_internet = is_internet diff --git a/backend/onyx/agents/agent_search/orchestration/states.py b/backend/onyx/agents/agent_search/orchestration/states.py index 80cc9f8dbfe..725b399b219 100644 --- a/backend/onyx/agents/agent_search/orchestration/states.py +++ b/backend/onyx/agents/agent_search/orchestration/states.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot +from onyx.chat.prompt_builder.schemas import PromptSnapshot from onyx.tools.message import ToolCallSummary from onyx.tools.models import SearchToolOverrideKwargs from onyx.tools.models import ToolCallFinalResult diff --git a/backend/onyx/background/celery/tasks/docprocessing/tasks.py b/backend/onyx/background/celery/tasks/docprocessing/tasks.py index 11afc53aade..b376b236539 100644 --- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py +++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py @@ -86,7 +86,6 @@ from onyx.file_store.document_batch_storage import get_document_batch_storage from onyx.httpx.httpx_pool import HttpxPool from onyx.indexing.embedder import DefaultIndexingEmbedder -from onyx.indexing.indexing_pipeline import run_indexing_pipeline from onyx.natural_language_processing.search_nlp_models import EmbeddingModel from onyx.natural_language_processing.search_nlp_models import ( InformationContentClassificationModel, @@ -1268,6 +1267,8 @@ def _docprocessing_task( tenant_id: str, batch_num: int, ) -> None: + from onyx.indexing.indexing_pipeline import run_indexing_pipeline + start_time = time.monotonic() if tenant_id: diff --git a/backend/onyx/background/indexing/run_docfetching.py b/backend/onyx/background/indexing/run_docfetching.py index 70502f1f895..955c8d9fac7 100644 --- a/backend/onyx/background/indexing/run_docfetching.py +++ b/backend/onyx/background/indexing/run_docfetching.py @@ -28,7 +28,6 @@ from onyx.connectors.connector_runner import ConnectorRunner from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.exceptions import UnexpectedValidationError -from onyx.connectors.factory import instantiate_connector from onyx.connectors.interfaces import CheckpointedConnector from onyx.connectors.models import ConnectorFailure from onyx.connectors.models import ConnectorStopSignal @@ -66,7 +65,6 @@ from onyx.httpx.httpx_pool import HttpxPool from onyx.indexing.embedder import DefaultIndexingEmbedder from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface -from onyx.indexing.indexing_pipeline import run_indexing_pipeline from onyx.natural_language_processing.search_nlp_models import ( InformationContentClassificationModel, ) @@ -100,6 +98,8 @@ def _get_connector_runner( are the complete list of existing documents of the connector. If the task of type LOAD_STATE, the list will be considered complete and otherwise incomplete. """ + from onyx.connectors.factory import instantiate_connector + task = attempt.connector_credential_pair.connector.input_type try: @@ -283,6 +283,8 @@ def _run_indexing( 2. Embed and index these documents into the chosen datastore (vespa) 3. Updates Postgres to record the indexed documents + the outcome of this run """ + from onyx.indexing.indexing_pipeline import run_indexing_pipeline + start_time = time.monotonic() # jsut used for logging with get_session_with_current_tenant() as db_session_temp: diff --git a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py index 0c3ee865b5a..f3573c5e05f 100644 --- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py +++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py @@ -4,7 +4,6 @@ from langchain_core.messages import BaseMessage from langchain_core.messages import HumanMessage from langchain_core.messages import SystemMessage -from pydantic import BaseModel from pydantic.v1 import BaseModel as BaseModel__v1 from onyx.chat.models import PromptConfig @@ -196,10 +195,6 @@ def build(self) -> list[BaseMessage]: # Stores some parts of a prompt builder as needed for tool calls -class PromptSnapshot(BaseModel): - raw_message_history: list[PreviousMessage] - raw_user_query: str - built_prompt: list[BaseMessage] # TODO: rename this? AnswerConfig maybe? diff --git a/backend/onyx/chat/prompt_builder/schemas.py b/backend/onyx/chat/prompt_builder/schemas.py new file mode 100644 index 00000000000..461a76f45ef --- /dev/null +++ b/backend/onyx/chat/prompt_builder/schemas.py @@ -0,0 +1,10 @@ +from langchain_core.messages import BaseMessage +from pydantic import BaseModel + +from onyx.llm.models import PreviousMessage + + +class PromptSnapshot(BaseModel): + raw_message_history: list[PreviousMessage] + raw_user_query: str + built_prompt: list[BaseMessage] diff --git a/backend/onyx/chat/tool_handling/tool_response_handler.py b/backend/onyx/chat/tool_handling/tool_response_handler.py index 7531d9edf8a..613976b1ac6 100644 --- a/backend/onyx/chat/tool_handling/tool_response_handler.py +++ b/backend/onyx/chat/tool_handling/tool_response_handler.py @@ -7,7 +7,7 @@ from onyx.chat.models import ResponsePart from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.chat.prompt_builder.answer_prompt_builder import LLMCall -from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot +from onyx.chat.prompt_builder.schemas import PromptSnapshot from onyx.llm.interfaces import LLM from onyx.tools.force import ForceUseTool from onyx.tools.message import build_tool_message diff --git a/backend/onyx/context/search/models.py b/backend/onyx/context/search/models.py index 14e7c5bcb40..a1b062f57ca 100644 --- a/backend/onyx/context/search/models.py +++ b/backend/onyx/context/search/models.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from datetime import datetime from typing import Any @@ -355,6 +356,44 @@ class SearchDoc(BaseModel): secondary_owners: list[str] | None = None is_internet: bool = False + @classmethod + def chunks_or_sections_to_search_docs( + cls, + items: "Sequence[InferenceChunk | InferenceSection] | None", + ) -> list["SearchDoc"]: + """Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects.""" + if not items: + return [] + + search_docs = [ + cls( + document_id=( + chunk := ( + item.center_chunk + if isinstance(item, InferenceSection) + else item + ) + ).document_id, + chunk_ind=chunk.chunk_id, + semantic_identifier=chunk.semantic_identifier or "Unknown", + link=chunk.source_links[0] if chunk.source_links else None, + blurb=chunk.blurb, + source_type=chunk.source_type, + boost=chunk.boost, + hidden=chunk.hidden, + metadata=chunk.metadata, + score=chunk.score, + match_highlights=chunk.match_highlights, + updated_at=chunk.updated_at, + primary_owners=chunk.primary_owners, + secondary_owners=chunk.secondary_owners, + is_internet=False, + ) + for item in items + ] + + return search_docs + def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore initial_dict = super().model_dump(*args, **kwargs) # type: ignore initial_dict["updated_at"] = ( diff --git a/backend/onyx/context/search/utils.py b/backend/onyx/context/search/utils.py index 607eb877fff..8edd75f7e51 100644 --- a/backend/onyx/context/search/utils.py +++ b/backend/onyx/context/search/utils.py @@ -118,40 +118,6 @@ def inference_section_from_chunks( ) -def chunks_or_sections_to_search_docs( - items: Sequence[InferenceChunk | InferenceSection] | None, -) -> list[SearchDoc]: - if not items: - return [] - - search_docs = [ - SearchDoc( - document_id=( - chunk := ( - item.center_chunk if isinstance(item, InferenceSection) else item - ) - ).document_id, - chunk_ind=chunk.chunk_id, - semantic_identifier=chunk.semantic_identifier or "Unknown", - link=chunk.source_links[0] if chunk.source_links else None, - blurb=chunk.blurb, - source_type=chunk.source_type, - boost=chunk.boost, - hidden=chunk.hidden, - metadata=chunk.metadata, - score=chunk.score, - match_highlights=chunk.match_highlights, - updated_at=chunk.updated_at, - primary_owners=chunk.primary_owners, - secondary_owners=chunk.secondary_owners, - is_internet=False, - ) - for item in items - ] - - return search_docs - - def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]: try: # Re-tokenize using the NLTK tokenizer for better matching diff --git a/backend/onyx/db/chat.py b/backend/onyx/db/chat.py index 440d23c28b4..559206a1377 100644 --- a/backend/onyx/db/chat.py +++ b/backend/onyx/db/chat.py @@ -34,7 +34,6 @@ from onyx.context.search.models import RetrievalDocs from onyx.context.search.models import SavedSearchDoc from onyx.context.search.models import SearchDoc as ServerSearchDoc -from onyx.context.search.utils import chunks_or_sections_to_search_docs from onyx.db.models import AgentSearchMetrics from onyx.db.models import AgentSubQuery from onyx.db.models import AgentSubQuestion @@ -57,7 +56,7 @@ from onyx.server.query_and_chat.models import ChatMessageDetail from onyx.server.query_and_chat.models import SubQueryDetail from onyx.server.query_and_chat.models import SubQuestionDetail -from onyx.tools.tool_runner import ToolCallFinalResult +from onyx.tools.models import ToolCallFinalResult from onyx.utils.logger import setup_logger from onyx.utils.special_types import JSON_ro @@ -1147,7 +1146,7 @@ def log_agent_sub_question_results( db_session.add(sub_query_object) db_session.commit() - search_docs = chunks_or_sections_to_search_docs( + search_docs = ServerSearchDoc.chunks_or_sections_to_search_docs( sub_query.retrieved_documents ) for doc in search_docs: diff --git a/backend/onyx/file_processing/extract_file_text.py b/backend/onyx/file_processing/extract_file_text.py index 793ee23901c..405d15daa4b 100644 --- a/backend/onyx/file_processing/extract_file_text.py +++ b/backend/onyx/file_processing/extract_file_text.py @@ -15,14 +15,12 @@ from typing import Any from typing import IO from typing import NamedTuple +from typing import Optional +from typing import TYPE_CHECKING from zipfile import BadZipFile import chardet import openpyxl -from markitdown import FileConversionException -from markitdown import MarkItDown -from markitdown import StreamInfo -from markitdown import UnsupportedFormatException from PIL import Image from pypdf import PdfReader from pypdf.errors import PdfStreamError @@ -37,6 +35,8 @@ from onyx.utils.file_types import WORD_PROCESSING_MIME_TYPE from onyx.utils.logger import setup_logger +if TYPE_CHECKING: + from markitdown import MarkItDown logger = setup_logger() # NOTE(rkuo): Unify this with upload_files_for_chat and file_valiation.py @@ -85,7 +85,7 @@ "image/webp", ] -_MARKITDOWN_CONVERTER: MarkItDown | None = None +_MARKITDOWN_CONVERTER: Optional["MarkItDown"] = None KNOWN_OPENPYXL_BUGS = [ "Value must be either numerical or a string containing a wildcard", @@ -93,8 +93,10 @@ ] -def get_markitdown_converter() -> MarkItDown: +def get_markitdown_converter() -> "MarkItDown": global _MARKITDOWN_CONVERTER + from markitdown import MarkItDown + if _MARKITDOWN_CONVERTER is None: _MARKITDOWN_CONVERTER = MarkItDown(enable_plugins=False) return _MARKITDOWN_CONVERTER @@ -358,6 +360,12 @@ def docx_to_text_and_images( The images list returned is empty in this case. """ md = get_markitdown_converter() + from markitdown import ( + StreamInfo, + FileConversionException, + UnsupportedFormatException, + ) + try: doc = md.convert( to_bytesio(file), stream_info=StreamInfo(mimetype=WORD_PROCESSING_MIME_TYPE) @@ -394,6 +402,12 @@ def docx_to_text_and_images( def pptx_to_text(file: IO[Any], file_name: str = "") -> str: md = get_markitdown_converter() + from markitdown import ( + StreamInfo, + FileConversionException, + UnsupportedFormatException, + ) + stream_info = StreamInfo( mimetype=PRESENTATION_MIME_TYPE, filename=file_name or None, extension=".pptx" ) diff --git a/backend/onyx/llm/utils.py b/backend/onyx/llm/utils.py index dd5af68e879..24810aaf852 100644 --- a/backend/onyx/llm/utils.py +++ b/backend/onyx/llm/utils.py @@ -8,8 +8,6 @@ from typing import cast from typing import TYPE_CHECKING -import litellm # type: ignore -import tiktoken from langchain.prompts.base import StringPromptValue from langchain.prompts.chat import ChatPromptValue from langchain.schema import PromptValue @@ -18,18 +16,6 @@ from langchain.schema.messages import BaseMessage from langchain.schema.messages import HumanMessage from langchain.schema.messages import SystemMessage -from litellm.exceptions import APIConnectionError # type: ignore -from litellm.exceptions import APIError # type: ignore -from litellm.exceptions import AuthenticationError # type: ignore -from litellm.exceptions import BadRequestError # type: ignore -from litellm.exceptions import BudgetExceededError # type: ignore -from litellm.exceptions import ContentPolicyViolationError # type: ignore -from litellm.exceptions import ContextWindowExceededError # type: ignore -from litellm.exceptions import NotFoundError # type: ignore -from litellm.exceptions import PermissionDeniedError # type: ignore -from litellm.exceptions import RateLimitError # type: ignore -from litellm.exceptions import Timeout # type: ignore -from litellm.exceptions import UnprocessableEntityError # type: ignore from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION @@ -40,7 +26,6 @@ from onyx.configs.model_configs import GEN_AI_MAX_TOKENS from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS -from onyx.file_processing.extract_file_text import read_pdf_file from onyx.file_store.models import ChatFileType from onyx.file_store.models import InMemoryChatFile from onyx.llm.interfaces import LLM @@ -72,6 +57,19 @@ def litellm_exception_to_error_msg( dict[str, str] | None ) = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS, ) -> str: + from litellm.exceptions import BadRequestError + from litellm.exceptions import AuthenticationError + from litellm.exceptions import PermissionDeniedError + from litellm.exceptions import NotFoundError + from litellm.exceptions import UnprocessableEntityError + from litellm.exceptions import RateLimitError + from litellm.exceptions import ContextWindowExceededError + from litellm.exceptions import APIConnectionError + from litellm.exceptions import APIError + from litellm.exceptions import Timeout + from litellm.exceptions import ContentPolicyViolationError + from litellm.exceptions import BudgetExceededError + error_msg = str(e) if custom_error_msg_mappings: @@ -133,6 +131,8 @@ def _build_content( files: list[InMemoryChatFile] | None = None, ) -> str: """Applies all non-image files.""" + from onyx.file_processing.extract_file_text import read_pdf_file + if not files: return message @@ -355,6 +355,7 @@ def check_number_of_tokens( function. If none is provided, default to the tiktoken encoder used by GPT-3.5 and GPT-4. """ + import tiktoken if encode_fn is None: encode_fn = tiktoken.get_encoding("cl100k_base").encode @@ -378,6 +379,8 @@ def test_llm(llm: LLM) -> str | None: @lru_cache(maxsize=1) # the copy.deepcopy is expensive, so we cache the result def get_model_map() -> dict: + import litellm + starting_map = copy.deepcopy(cast(dict, litellm.model_cost)) # NOTE: we could add additional models here in the future, @@ -457,6 +460,7 @@ def get_llm_contextual_cost( this does not account for the cost of documents that fit within a single chunk which do not get contextualized. """ + import litellm # calculate input costs num_tokens = ONE_MILLION @@ -655,6 +659,8 @@ def model_supports_image_input(model_name: str, model_provider: str) -> bool: def model_is_reasoning_model(model_name: str, model_provider: str) -> bool: + import litellm + model_map = get_model_map() try: model_obj = find_model_obj( diff --git a/backend/onyx/natural_language_processing/search_nlp_models.py b/backend/onyx/natural_language_processing/search_nlp_models.py index 96712acccac..ae4cd8a75f1 100644 --- a/backend/onyx/natural_language_processing/search_nlp_models.py +++ b/backend/onyx/natural_language_processing/search_nlp_models.py @@ -13,13 +13,11 @@ import aioboto3 # type: ignore import httpx -import openai import requests import voyageai # type: ignore from cohere import AsyncClient as CohereAsyncClient from google.oauth2 import service_account # type: ignore from httpx import HTTPError -from litellm import aembedding from requests import JSONDecodeError from requests import RequestException from requests import Response @@ -186,6 +184,8 @@ def __init__( async def _embed_openai( self, texts: list[str], model: str | None, reduced_dimension: int | None ) -> list[Embedding]: + import openai + if not model: model = DEFAULT_OPENAI_MODEL @@ -249,6 +249,8 @@ async def _embed_voyage( async def _embed_azure( self, texts: list[str], model: str | None ) -> list[Embedding]: + from litellm import aembedding + response = await aembedding( model=model, input=texts, @@ -331,6 +333,8 @@ async def embed( deployment_name: str | None = None, reduced_dimension: int | None = None, ) -> list[Embedding]: + import openai + try: if self.provider == EmbeddingProvider.OPENAI: return await self._embed_openai(texts, model_name, reduced_dimension) diff --git a/backend/onyx/server/query_and_chat/query_backend.py b/backend/onyx/server/query_and_chat/query_backend.py index dc54056fd01..9cbe9584754 100644 --- a/backend/onyx/server/query_and_chat/query_backend.py +++ b/backend/onyx/server/query_and_chat/query_backend.py @@ -14,7 +14,6 @@ from onyx.context.search.preprocessing.access_filters import ( build_access_filters_for_user, ) -from onyx.context.search.utils import chunks_or_sections_to_search_docs from onyx.db.chat import get_chat_messages_by_session from onyx.db.chat import get_chat_session_by_id from onyx.db.chat import get_chat_sessions_by_user @@ -74,7 +73,7 @@ def admin_search( ) matching_chunks = document_index.admin_retrieval(query=query, filters=final_filters) - documents = chunks_or_sections_to_search_docs(matching_chunks) + documents = SearchDoc.chunks_or_sections_to_search_docs(matching_chunks) # Deduplicate documents by id deduplicated_documents: list[SearchDoc] = []