Make Bytez impl conform to the transform spec per maintainer feedback.

inf3rnus · inf3rnus · commit 86aae1e74da4 · 2025-07-09T14:44:33.000-04:00
diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
@@ -510,12 +510,7 @@ export async function tryPost(
           body: transformedRequestBody,
           headers: fetchOptions.headers,
         },
-        requestParams: {
-          // in the event transformedRequestBody request is empty, e.g. you have opted to handle requests via a custom requestHandler
-          ...params,
-          // if this is populated, we will overwrite whatever was initially in params
-          ...transformedRequestBody,
-        },
+        requestParams: transformedRequestBody,
         finalUntransformedRequest: {
           body: params,
         },
diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts
@@ -1,4 +1,5 @@
 import { ProviderAPIConfig } from '../types';
+import { version } from '../../../package.json';
 
 const BytezInferenceAPI: ProviderAPIConfig = {
   getBaseURL: () => 'https://api.bytez.com',
@@ -8,6 +9,7 @@ const BytezInferenceAPI: ProviderAPIConfig = {
     const headers: Record<string, string> = {};
 
     headers['Authorization'] = `Key ${apiKey}`;
+    headers['user-agent'] = `portkey-${version}`;
 
     return headers;
   },
diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
@@ -6,20 +6,18 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = {
     required: true,
   },
   max_tokens: {
-    // NOTE param acts as an alias, it will be added to "params" on the req body
-    // we do this adaptation ourselves in our custom requestHandler. See src/providers/bytez/index.ts
-    param: 'max_new_tokens',
+    param: 'params.max_new_tokens',
     default: 100,
     min: 0,
   },
   temperature: {
-    param: 'temperature',
+    param: 'params.temperature',
     default: 1,
     min: 0,
     max: 2,
   },
   top_p: {
-    param: 'top_p',
+    param: 'params.top_p',
     default: 1,
     min: 0,
     max: 1,
diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
@@ -1,129 +1,56 @@
+import { BYTEZ } from '../../globals';
 import { ProviderConfigs } from '../types';
+import { generateErrorResponse } from '../utils';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';
-import { bodyAdapter, LRUCache } from './utils';
 import { BytezResponse } from './types';
 
-const BASE_URL = 'https://api.bytez.com/models/v2';
-
-const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 });
-
 const BytezInferenceAPIConfig: ProviderConfigs = {
   api: BytezInferenceAPI,
   chatComplete: BytezInferenceChatCompleteConfig,
-  requestHandlers: {
-    chatComplete: async ({ providerOptions, requestBody }) => {
-      try {
-        const { model: modelId } = requestBody;
-
-        const adaptedBody = bodyAdapter(requestBody);
-
-        const headers = {
-          'Content-Type': 'application/json',
-          Authorization: `Key ${providerOptions.apiKey}`,
-        };
-
-        const isChatModel = await validateModelIsChat(modelId, headers);
-
-        if (!isChatModel) {
-          return constructFailureResponse(
-            'Bytez only supports chat models on PortKey',
-            { status: 400 }
-          );
-        }
-
-        const url = `${BASE_URL}/${modelId}`;
-
-        const response = await fetch(url, {
-          method: 'POST',
-          headers,
-          body: JSON.stringify(adaptedBody),
-        });
-
-        if (adaptedBody.stream) {
-          return new Response(response.body, response);
-        }
-
-        const { error, output }: BytezResponse = await response.json();
-
-        if (error) {
-          return constructFailureResponse(error, response);
-        }
-
-        return new Response(
-          JSON.stringify({
-            id: crypto.randomUUID(),
-            object: 'chat.completion',
-            created: Date.now(),
-            model: modelId,
-            choices: [
-              {
-                index: 0,
-                message: output,
-                logprobs: null,
-                finish_reason: 'stop',
-              },
-            ],
-            usage: {
-              inferenceTime: response.headers.get('inference-time'),
-              modelSize: response.headers.get('inference-meter'),
-            },
-          }),
-          response
+  responseTransforms: {
+    chatComplete: (
+      response: BytezResponse,
+      responseStatus: number,
+      responseHeaders: any,
+      strictOpenAiCompliance: boolean,
+      endpoint: string,
+      requestBody: any
+    ) => {
+      const { error, output } = response;
+
+      if (error) {
+        return generateErrorResponse(
+          {
+            message: error,
+            type: String(responseStatus),
+            param: null,
+            code: null,
+          },
+          BYTEZ
         );
-      } catch (error: any) {
-        return constructFailureResponse(error.message);
       }
+
+      return {
+        id: crypto.randomUUID(),
+        object: 'chat.completion',
+        created: Date.now(),
+        model: requestBody.model,
+        choices: [
+          {
+            index: 0,
+            message: output,
+            logprobs: null,
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          inferenceTime: responseHeaders.get('inference-time'),
+          modelSize: responseHeaders.get('inference-meter'),
+        },
+      };
     },
   },
 };
 
-async function validateModelIsChat(
-  modelId: string,
-  headers: Record<string, any>
-) {
-  // return from cache if already validated
-  if (IS_CHAT_MODEL_CACHE.has(modelId)) {
-    return IS_CHAT_MODEL_CACHE.get(modelId);
-  }
-
-  const url = `${BASE_URL}/list/models?modelId=${modelId}`;
-
-  const response = await fetch(url, {
-    headers,
-  });
-
-  const {
-    error,
-    output: [model],
-  }: BytezResponse = await response.json();
-
-  if (error) {
-    throw new Error(error);
-  }
-
-  const isChatModel = model.task === 'chat';
-
-  IS_CHAT_MODEL_CACHE.set(modelId, isChatModel);
-
-  return isChatModel;
-}
-
-function constructFailureResponse(message: string, response?: object) {
-  return new Response(
-    JSON.stringify({
-      status: 'failure',
-      message,
-    }),
-    {
-      status: 500,
-      headers: {
-        'content-type': 'application/json',
-      },
-      // override defaults if desired
-      ...response,
-    }
-  );
-}
-
 export default BytezInferenceAPIConfig;
diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts