Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions application/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(
llm_handler=None,
tool_executor: Optional[ToolExecutor] = None,
backup_models: Optional[List[str]] = None,
model_user_id: Optional[str] = None,
):
self.endpoint = endpoint
self.llm_name = llm_name
Expand All @@ -52,10 +53,13 @@ def __init__(
self.prompt = prompt
self.decoded_token = decoded_token or {}
self.user: str = self.decoded_token.get("sub")
# BYOM-resolution scope: owner for shared agents, caller for
# caller-owned BYOM, None for built-ins. Falls back to self.user
# for worker/legacy callers that don't thread model_user_id.
self.model_user_id = model_user_id
self.tools: List[Dict] = []
self.chat_history: List[Dict] = chat_history if chat_history is not None else []

# Dependency injection for LLM — fall back to creating if not provided
if llm is not None:
self.llm = llm
else:
Expand All @@ -67,8 +71,16 @@ def __init__(
model_id=model_id,
agent_id=agent_id,
backup_models=backup_models,
model_user_id=model_user_id,
)

# For BYOM, registry id (UUID) differs from upstream model id
# (e.g. ``mistral-large-latest``). LLMCreator resolved this onto
# the LLM instance; cache it for subsequent gen calls.
self.upstream_model_id = (
getattr(self.llm, "model_id", None) or model_id
)

self.retrieved_docs = retrieved_docs or []

if llm_handler is not None:
Expand Down Expand Up @@ -306,7 +318,9 @@ def _check_context_limit(self, messages: List[Dict]) -> bool:
try:
current_tokens = self._calculate_current_context_tokens(messages)
self.current_token_count = current_tokens
context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
threshold = int(context_limit * settings.COMPRESSION_THRESHOLD_PERCENTAGE)

if current_tokens >= threshold:
Expand All @@ -325,7 +339,9 @@ def _validate_context_size(self, messages: List[Dict]) -> None:

current_tokens = self._calculate_current_context_tokens(messages)
self.current_token_count = current_tokens
context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
percentage = (current_tokens / context_limit) * 100

if current_tokens >= context_limit:
Expand Down Expand Up @@ -387,7 +403,9 @@ def _build_messages(
)
system_prompt = system_prompt + compression_context

context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
system_tokens = num_tokens_from_string(system_prompt)

safety_buffer = int(context_limit * 0.1)
Expand Down Expand Up @@ -497,7 +515,10 @@ def _truncate_history_to_fit(
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
self._validate_context_size(messages)

gen_kwargs = {"model": self.model_id, "messages": messages}
# Use the upstream id resolved by LLMCreator (see __init__).
# Built-in models: same as self.model_id. BYOM: the user's
# typed model name, not the internal UUID.
gen_kwargs = {"model": self.upstream_model_id, "messages": messages}
if self.attachments:
gen_kwargs["_usage_attachments"] = self.attachments

Expand Down
10 changes: 5 additions & 5 deletions application/agents/research_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _clarification_phase(self, question: str) -> Optional[str]:

try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=None,
response_format={"type": "json_object"},
Expand Down Expand Up @@ -390,7 +390,7 @@ def _planning_phase(self, question: str) -> tuple[List[Dict], str]:

try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=None,
response_format={"type": "json_object"},
Expand Down Expand Up @@ -506,7 +506,7 @@ def _research_step_with_executor(

try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=self.tools if self.tools else None,
)
Expand Down Expand Up @@ -537,7 +537,7 @@ def _research_step_with_executor(
)
try:
response = self.llm.gen(
model=self.model_id, messages=messages, tools=None
model=self.upstream_model_id, messages=messages, tools=None
)
self._track_tokens(self._snapshot_llm_tokens())
text = self._extract_text(response)
Expand Down Expand Up @@ -664,7 +664,7 @@ def _synthesis_phase(
]

llm_response = self.llm.gen_stream(
model=self.model_id, messages=messages, tools=None
model=self.upstream_model_id, messages=messages, tools=None
)

if log_context:
Expand Down
3 changes: 3 additions & 0 deletions application/agents/tools/internal_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _get_retriever(self):
chunks=int(self.config.get("chunks", 2)),
doc_token_limit=int(self.config.get("doc_token_limit", 50000)),
model_id=self.config.get("model_id", "docsgpt-local"),
model_user_id=self.config.get("model_user_id"),
user_api_key=self.config.get("user_api_key"),
agent_id=self.config.get("agent_id"),
llm_name=self.config.get("llm_name", settings.LLM_PROVIDER),
Expand Down Expand Up @@ -435,6 +436,7 @@ def build_internal_tool_config(
chunks: int = 2,
doc_token_limit: int = 50000,
model_id: str = "docsgpt-local",
model_user_id: Optional[str] = None,
user_api_key: Optional[str] = None,
agent_id: Optional[str] = None,
llm_name: str = None,
Expand All @@ -449,6 +451,7 @@ def build_internal_tool_config(
"chunks": chunks,
"doc_token_limit": doc_token_limit,
"model_id": model_id,
"model_user_id": model_user_id,
"user_api_key": user_api_key,
"agent_id": agent_id,
"llm_name": llm_name or settings.LLM_PROVIDER,
Expand Down
16 changes: 14 additions & 2 deletions application/agents/workflows/workflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,26 @@ def _execute_agent_node(
node_config.json_schema, node.title
)
node_model_id = node_config.model_id or self.agent.model_id
# Inherit BYOM scope from parent agent so owner-stored BYOM
# resolves on shared workflows.
node_user_id = getattr(self.agent, "model_user_id", None) or (
self.agent.decoded_token.get("sub")
if isinstance(self.agent.decoded_token, dict)
else None
)
node_llm_name = (
node_config.llm_name
or get_provider_from_model_id(node_model_id or "")
or get_provider_from_model_id(
node_model_id or "", user_id=node_user_id
)
or self.agent.llm_name
)
node_api_key = get_api_key_for_provider(node_llm_name) or self.agent.api_key

if node_json_schema and node_model_id:
model_capabilities = get_model_capabilities(node_model_id)
model_capabilities = get_model_capabilities(
node_model_id, user_id=node_user_id
)
if model_capabilities and not model_capabilities.get(
"supports_structured_output", False
):
Expand All @@ -232,6 +243,7 @@ def _execute_agent_node(
"endpoint": self.agent.endpoint,
"llm_name": node_llm_name,
"model_id": node_model_id,
"model_user_id": getattr(self.agent, "model_user_id", None),
"api_key": node_api_key,
"tool_ids": node_config.tools,
"prompt": node_config.system_prompt,
Expand Down
65 changes: 65 additions & 0 deletions application/alembic/versions/0003_user_custom_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""0003 user_custom_models — per-user OpenAI-compatible model registrations.

Revision ID: 0003_user_custom_models
Revises: 0002_app_metadata
"""

from typing import Sequence, Union

from alembic import op


revision: str = "0003_user_custom_models"
down_revision: Union[str, None] = "0002_app_metadata"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.execute(
"""
CREATE TABLE user_custom_models (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id TEXT NOT NULL,
upstream_model_id TEXT NOT NULL,
display_name TEXT NOT NULL,
description TEXT NOT NULL DEFAULT '',
base_url TEXT NOT NULL,
api_key_encrypted TEXT NOT NULL,
capabilities JSONB NOT NULL DEFAULT '{}'::jsonb,
enabled BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
op.execute(
"CREATE INDEX user_custom_models_user_id_idx "
"ON user_custom_models (user_id);"
)

# Mirror the project-wide invariants set up in 0001_initial:
# * user_id FK with ON DELETE RESTRICT (deferrable),
# * ensure_user_exists() trigger so the parent users row autocreates,
# * set_updated_at() trigger.
op.execute(
"ALTER TABLE user_custom_models "
"ADD CONSTRAINT user_custom_models_user_id_fk "
"FOREIGN KEY (user_id) REFERENCES users(user_id) "
"ON DELETE RESTRICT DEFERRABLE INITIALLY IMMEDIATE;"
)
op.execute(
"CREATE TRIGGER user_custom_models_ensure_user "
"BEFORE INSERT OR UPDATE OF user_id ON user_custom_models "
"FOR EACH ROW EXECUTE FUNCTION ensure_user_exists();"
)
op.execute(
"CREATE TRIGGER user_custom_models_set_updated_at "
"BEFORE UPDATE ON user_custom_models "
"FOR EACH ROW WHEN (OLD.* IS DISTINCT FROM NEW.*) "
"EXECUTE FUNCTION set_updated_at();"
)


def downgrade() -> None:
op.execute("DROP TABLE IF EXISTS user_custom_models;")
52 changes: 48 additions & 4 deletions application/api/answer/routes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def complete_stream(
is_shared_usage: bool = False,
shared_token: Optional[str] = None,
model_id: Optional[str] = None,
model_user_id: Optional[str] = None,
_continuation: Optional[Dict] = None,
) -> Generator[str, None, None]:
"""
Expand Down Expand Up @@ -289,8 +290,18 @@ def complete_stream(
# conversation if this is the first turn.
if not conversation_id and should_save_conversation:
try:
# Use model-owner scope so shared-agent
# owner-BYOM resolves to its registered plugin.
provider = (
get_provider_from_model_id(model_id)
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (
decoded_token.get("sub")
if decoded_token
else None
),
)
if model_id
else settings.LLM_PROVIDER
)
Expand All @@ -304,6 +315,7 @@ def complete_stream(
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
conversation_id = (
self.conversation_service.save_conversation(
Expand Down Expand Up @@ -340,6 +352,9 @@ def complete_stream(
tool_schemas=getattr(agent, "tools", []),
agent_config={
"model_id": model_id or self.default_model_id,
# Persist BYOM scope so resume doesn't
# fall back to caller's layer.
"model_user_id": model_user_id,
"llm_name": getattr(agent, "llm_name", settings.LLM_PROVIDER),
"api_key": getattr(agent, "api_key", None),
"user_api_key": user_api_key,
Expand Down Expand Up @@ -370,8 +385,14 @@ def complete_stream(
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
# Run under model-owner scope so title-gen LLM inside
# save_conversation uses the owner's BYOM provider/key.
provider = (
get_provider_from_model_id(model_id)
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (decoded_token.get("sub") if decoded_token else None),
)
if model_id
else settings.LLM_PROVIDER
)
Expand All @@ -384,6 +405,7 @@ def complete_stream(
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)

if should_save_conversation:
Expand Down Expand Up @@ -481,12 +503,34 @@ def complete_stream(
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
# Mirror the normal-path provider resolution so the
# partial-save title LLM uses the model-owner's BYOM
# registration (shared-agent dispatch) rather than
# the deployment default with the instance api key.
provider = (
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (
decoded_token.get("sub")
if decoded_token
else None
),
)
if model_id
else settings.LLM_PROVIDER
)
sys_api_key = get_api_key_for_provider(
provider or settings.LLM_PROVIDER
)
llm = LLMCreator.create_llm(
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
provider or settings.LLM_PROVIDER,
api_key=sys_api_key,
user_api_key=user_api_key,
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
self.conversation_service.save_conversation(
conversation_id,
Expand Down
2 changes: 2 additions & 0 deletions application/api/answer/routes/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def post(self):
decoded_token=processor.decoded_token,
agent_id=processor.agent_id,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
_continuation={
"messages": messages,
"tools_dict": tools_dict,
Expand Down Expand Up @@ -145,6 +146,7 @@ def post(self):
is_shared_usage=processor.is_shared_usage,
shared_token=processor.shared_token,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
),
mimetype="text/event-stream",
)
Expand Down
Loading
Loading