EmbeddedLLM
diff --git a/‎services/api/src/owl/routers/serving.py‎
Lines changed: 6 additions & 3 deletions b/‎services/api/src/owl/routers/serving.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎services/api/src/owl/utils/billing/oss.py‎
Lines changed: 39 additions & 3 deletions b/‎services/api/src/owl/utils/billing/oss.py‎
Lines changed: 39 additions & 3 deletions
@@ -160,8 +160,9 @@ async def chat_completion(
 ) -> Response:
     # Check quota
     billing: BillingManager = request.state.billing
-    billing.has_llm_quota(body.model)
     billing.has_egress_quota()
+    # Reject obvious no-path requests before doing endpoint setup work.
+    billing.has_model_preflight_access(body.model)
     _, project, org = auth_info
     body.id = request.state.id
     llm = LMEngine(organization=org, project=project, request=request)
@@ -231,8 +232,9 @@ async def generate_embeddings(
 ) -> EmbeddingResponse:
     # Check quota
     billing: BillingManager = request.state.billing
-    billing.has_embedding_quota(body.model)
     billing.has_egress_quota()
+    # Reject obvious no-path requests before doing endpoint setup work.
+    billing.has_model_preflight_access(body.model)
     _, project, org = auth_info
     embedder = LMEngine(organization=org, project=project, request=request)
     if isinstance(body.input, str):
@@ -264,8 +266,9 @@ async def generate_rankings(
 ) -> RerankingResponse:
     # Check quota
     billing: BillingManager = request.state.billing
-    billing.has_reranker_quota(body.model)
     billing.has_egress_quota()
+    # Reject obvious no-path requests before doing endpoint setup work.
+    billing.has_model_preflight_access(body.model)
     _, project, org = auth_info
     reranker = LMEngine(organization=org, project=project, request=request)
     return await reranker.rerank_documents(**body.model_dump())
@@ -544,16 +544,38 @@ def _cloud_event(
     def has_gen_table_quota(self, table: GenerativeTableCore) -> bool:
         return True
 
+    def model_has_byok_deployment(self, model: ModelConfigRead) -> bool:
+        return False
+
+    def get_byok_key(self, provider: str) -> str:
+        return ""
+
+    def has_model_preflight_access(self, model_id: str) -> bool:
+        return True
+
     # --- LLM Usage --- #
 
-    def has_llm_quota(self, model_id: str) -> bool:
+    def has_llm_quota(
+        self,
+        model_id: str,
+        is_byok: bool = False,
+    ) -> bool:
         return True
 
+    def has_image_gen_quota(
+        self,
+        model_id: str,
+        is_byok: bool = False,
+    ) -> bool:
+        return self.has_llm_quota(model_id, is_byok=is_byok)
+
     def create_llm_events(
         self,
         model_id: str,
         input_tokens: int,
         output_tokens: int,
+        model_provider: str = "",
+        is_byok: bool = False,
         *,
         create_usage: bool = True,
     ) -> None:
@@ -598,6 +620,8 @@ def create_image_gen_events(
         text_output_token: int,
         image_input_token: int,
         image_output_token: int,
+        model_provider: str = "",
+        is_byok: bool = False,
         create_usage: bool = True,
     ) -> None:
         text_input_token = int(text_input_token)
@@ -651,13 +675,19 @@ def create_image_gen_events(
 
     # --- Embedding Usage --- #
 
-    def has_embedding_quota(self, model_id: str) -> bool:
+    def has_embedding_quota(
+        self,
+        model_id: str,
+        is_byok: bool = False,
+    ) -> bool:
         return True
 
     def create_embedding_events(
         self,
         model_id: str,
         token_usage: int,
+        model_provider: str = "",
+        is_byok: bool = False,
         *,
         create_usage: bool = True,
     ) -> None:
@@ -690,13 +720,19 @@ def create_embedding_events(
 
     # --- Reranker Usage --- #
 
-    def has_reranker_quota(self, model_id: str) -> bool:
+    def has_reranker_quota(
+        self,
+        model_id: str,
+        is_byok: bool = False,
+    ) -> bool:
         return True
 
     def create_reranker_events(
         self,
         model_id: str,
         num_searches: int,
+        model_provider: str = "",
+        is_byok: bool = False,
         *,
         create_usage: bool = True,
     ) -> None: