Skip to content

Commit 392a887

Browse files
committed
[owl] Fix byok check on actual deployment provider (#914)
* fix EmbeddedLLM/JAM.ai.dev#913 * billing is now deployment provider-aware * quota check depends on deployment provider - org key pair * UI model selection will check deployment provider - org key pair * in byok case, if the org has credit/quota for other deployment byok deployment will be prioritized, but still able to fallback.
1 parent 299cb1f commit 392a887

File tree

7 files changed

+936
-38
lines changed

7 files changed

+936
-38
lines changed

services/api/src/owl/routers/serving.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,9 @@ async def chat_completion(
160160
) -> Response:
161161
# Check quota
162162
billing: BillingManager = request.state.billing
163-
billing.has_llm_quota(body.model)
164163
billing.has_egress_quota()
164+
# Reject obvious no-path requests before doing endpoint setup work.
165+
billing.has_model_preflight_access(body.model)
165166
_, project, org = auth_info
166167
body.id = request.state.id
167168
llm = LMEngine(organization=org, project=project, request=request)
@@ -231,8 +232,9 @@ async def generate_embeddings(
231232
) -> EmbeddingResponse:
232233
# Check quota
233234
billing: BillingManager = request.state.billing
234-
billing.has_embedding_quota(body.model)
235235
billing.has_egress_quota()
236+
# Reject obvious no-path requests before doing endpoint setup work.
237+
billing.has_model_preflight_access(body.model)
236238
_, project, org = auth_info
237239
embedder = LMEngine(organization=org, project=project, request=request)
238240
if isinstance(body.input, str):
@@ -264,8 +266,9 @@ async def generate_rankings(
264266
) -> RerankingResponse:
265267
# Check quota
266268
billing: BillingManager = request.state.billing
267-
billing.has_reranker_quota(body.model)
268269
billing.has_egress_quota()
270+
# Reject obvious no-path requests before doing endpoint setup work.
271+
billing.has_model_preflight_access(body.model)
269272
_, project, org = auth_info
270273
reranker = LMEngine(organization=org, project=project, request=request)
271274
return await reranker.rerank_documents(**body.model_dump())

services/api/src/owl/utils/billing/oss.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -544,16 +544,38 @@ def _cloud_event(
544544
def has_gen_table_quota(self, table: GenerativeTableCore) -> bool:
545545
return True
546546

547+
def model_has_byok_deployment(self, model: ModelConfigRead) -> bool:
548+
return False
549+
550+
def get_byok_key(self, provider: str) -> str:
551+
return ""
552+
553+
def has_model_preflight_access(self, model_id: str) -> bool:
554+
return True
555+
547556
# --- LLM Usage --- #
548557

549-
def has_llm_quota(self, model_id: str) -> bool:
558+
def has_llm_quota(
559+
self,
560+
model_id: str,
561+
is_byok: bool = False,
562+
) -> bool:
550563
return True
551564

565+
def has_image_gen_quota(
566+
self,
567+
model_id: str,
568+
is_byok: bool = False,
569+
) -> bool:
570+
return self.has_llm_quota(model_id, is_byok=is_byok)
571+
552572
def create_llm_events(
553573
self,
554574
model_id: str,
555575
input_tokens: int,
556576
output_tokens: int,
577+
model_provider: str = "",
578+
is_byok: bool = False,
557579
*,
558580
create_usage: bool = True,
559581
) -> None:
@@ -598,6 +620,8 @@ def create_image_gen_events(
598620
text_output_token: int,
599621
image_input_token: int,
600622
image_output_token: int,
623+
model_provider: str = "",
624+
is_byok: bool = False,
601625
create_usage: bool = True,
602626
) -> None:
603627
text_input_token = int(text_input_token)
@@ -651,13 +675,19 @@ def create_image_gen_events(
651675

652676
# --- Embedding Usage --- #
653677

654-
def has_embedding_quota(self, model_id: str) -> bool:
678+
def has_embedding_quota(
679+
self,
680+
model_id: str,
681+
is_byok: bool = False,
682+
) -> bool:
655683
return True
656684

657685
def create_embedding_events(
658686
self,
659687
model_id: str,
660688
token_usage: int,
689+
model_provider: str = "",
690+
is_byok: bool = False,
661691
*,
662692
create_usage: bool = True,
663693
) -> None:
@@ -690,13 +720,19 @@ def create_embedding_events(
690720

691721
# --- Reranker Usage --- #
692722

693-
def has_reranker_quota(self, model_id: str) -> bool:
723+
def has_reranker_quota(
724+
self,
725+
model_id: str,
726+
is_byok: bool = False,
727+
) -> bool:
694728
return True
695729

696730
def create_reranker_events(
697731
self,
698732
model_id: str,
699733
num_searches: int,
734+
model_provider: str = "",
735+
is_byok: bool = False,
700736
*,
701737
create_usage: bool = True,
702738
) -> None:

0 commit comments

Comments
 (0)