From 2bfd4af06abdc2c7ef8de46c47cceb7362036678 Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Fri, 22 Aug 2025 17:21:43 +0530 Subject: [PATCH 1/3] cleanup vertex api.ts --- src/errors/GatewayError.ts | 2 ++ src/handlers/handlerUtils.ts | 2 +- src/providers/google-vertex-ai/api.ts | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/errors/GatewayError.ts b/src/errors/GatewayError.ts index 3ed135847..343a894c8 100644 --- a/src/errors/GatewayError.ts +++ b/src/errors/GatewayError.ts @@ -1,9 +1,11 @@ export class GatewayError extends Error { constructor( message: string, + public status: number = 500, public cause?: Error ) { super(message); this.name = 'GatewayError'; + this.status = status; } } diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts index 1d6abde64..ce2691847 100644 --- a/src/handlers/handlerUtils.ts +++ b/src/handlers/handlerUtils.ts @@ -809,7 +809,7 @@ export async function tryTargetsRecursively( message: errorMessage, }), { - status: 500, + status: error instanceof GatewayError ? error.status : 500, headers: { 'content-type': 'application/json', // Add this header so that the fallback loop can be interrupted if its an exception. diff --git a/src/providers/google-vertex-ai/api.ts b/src/providers/google-vertex-ai/api.ts index cfdf2ce7f..15175a377 100644 --- a/src/providers/google-vertex-ai/api.ts +++ b/src/providers/google-vertex-ai/api.ts @@ -1,8 +1,9 @@ +import { GatewayError } from '../../errors/GatewayError'; import { Options } from '../../types/requestBody'; import { endpointStrings, ProviderAPIConfig } from '../types'; import { getModelAndProvider, getAccessToken, getBucketAndFile } from './utils'; -const getApiVersion = (provider: string, inputModel: string) => { +const getApiVersion = (provider: string) => { if (provider === 'meta') return 'v1beta1'; return 'v1'; }; @@ -17,12 +18,12 @@ const getProjectRoute = ( vertexServiceAccountJson, } = providerOptions; let projectId = inputProjectId; - if (vertexServiceAccountJson && vertexServiceAccountJson.project_id) { + if (vertexServiceAccountJson?.project_id) { projectId = vertexServiceAccountJson.project_id; } const { provider } = getModelAndProvider(inputModel as string); - let routeVersion = getApiVersion(provider, inputModel as string); + const routeVersion = getApiVersion(provider); return `/${routeVersion}/projects/${projectId}/locations/${vertexRegion}`; }; @@ -68,7 +69,6 @@ export const GoogleApiConfig: ProviderAPIConfig = { if (vertexServiceAccountJson) { authToken = await getAccessToken(c, vertexServiceAccountJson); } - return { 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`, @@ -99,7 +99,7 @@ export const GoogleApiConfig: ProviderAPIConfig = { const jobId = gatewayRequestURL.split('/').at(jobIdIndex); const url = new URL(gatewayRequestURL); - const searchParams = url.searchParams; + const searchParams = new URLSearchParams(url.search); const pageSize = searchParams.get('limit') ?? 20; const after = searchParams.get('after') ?? ''; @@ -140,9 +140,15 @@ export const GoogleApiConfig: ProviderAPIConfig = { case 'cancelFinetune': { return `/v1/projects/${projectId}/locations/${vertexRegion}/tuningJobs/${jobId}:cancel`; } + default: + return ''; } } + if (!inputModel) { + throw new GatewayError('Model is required', 400); + } + const { provider, model } = getModelAndProvider(inputModel as string); const projectRoute = getProjectRoute(providerOptions, inputModel as string); const googleUrlMap = new Map([ @@ -181,6 +187,7 @@ export const GoogleApiConfig: ProviderAPIConfig = { } else if (mappedFn === 'messagesCountTokens') { return `${projectRoute}/publishers/${provider}/models/count-tokens:rawPredict`; } + return `${projectRoute}/publishers/${provider}/models/${model}:rawPredict`; } case 'meta': { From 36393df75a96a78f3da91fd013c1366f9a58a61b Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Wed, 17 Sep 2025 19:18:08 +0530 Subject: [PATCH 2/3] cleanup vertex --- src/providers/google-vertex-ai/api.ts | 14 +++-- .../google-vertex-ai/chatComplete.ts | 17 +++-- src/providers/google-vertex-ai/createBatch.ts | 24 +++++++ src/providers/google-vertex-ai/embed.ts | 27 ++++---- src/providers/google-vertex-ai/index.ts | 46 +++++++++----- src/providers/google-vertex-ai/listBatches.ts | 2 +- .../google-vertex-ai/messagesCountTokens.ts | 2 +- src/providers/google-vertex-ai/types.ts | 4 +- src/providers/google-vertex-ai/utils.ts | 63 ++++--------------- 9 files changed, 106 insertions(+), 93 deletions(-) diff --git a/src/providers/google-vertex-ai/api.ts b/src/providers/google-vertex-ai/api.ts index 15175a377..314d248d7 100644 --- a/src/providers/google-vertex-ai/api.ts +++ b/src/providers/google-vertex-ai/api.ts @@ -18,7 +18,7 @@ const getProjectRoute = ( vertexServiceAccountJson, } = providerOptions; let projectId = inputProjectId; - if (vertexServiceAccountJson?.project_id) { + if (vertexServiceAccountJson && vertexServiceAccountJson.project_id) { projectId = vertexServiceAccountJson.project_id; } @@ -59,8 +59,9 @@ export const GoogleApiConfig: ProviderAPIConfig = { } if (vertexRegion === 'global') { - return `https://aiplatform.googleapis.com`; + return 'https://aiplatform.googleapis.com'; } + return `https://${vertexRegion}-aiplatform.googleapis.com`; }, headers: async ({ c, providerOptions }) => { @@ -88,6 +89,9 @@ export const GoogleApiConfig: ProviderAPIConfig = { mappedFn = `stream-${fn}` as endpointStrings; } + const url = new URL(gatewayRequestURL); + const searchParams = url.searchParams; + if (NON_INFERENCE_ENDPOINTS.includes(fn)) { const jobIdIndex = [ 'cancelBatch', @@ -99,9 +103,9 @@ export const GoogleApiConfig: ProviderAPIConfig = { const jobId = gatewayRequestURL.split('/').at(jobIdIndex); const url = new URL(gatewayRequestURL); - const searchParams = new URLSearchParams(url.search); - const pageSize = searchParams.get('limit') ?? 20; - const after = searchParams.get('after') ?? ''; + const params = new URLSearchParams(url.search); + const pageSize = params.get('limit') ?? 20; + const after = params.get('after') ?? ''; let projectId = vertexProjectId; if (!projectId || vertexServiceAccountJson) { diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts index 0c7ec7dbd..b7a08a6d1 100644 --- a/src/providers/google-vertex-ai/chatComplete.ts +++ b/src/providers/google-vertex-ai/chatComplete.ts @@ -17,8 +17,8 @@ import { AnthropicChatCompleteStreamResponse, } from '../anthropic/chatComplete'; import { - AnthropicErrorResponse, AnthropicStreamState, + AnthropicErrorResponse, } from '../anthropic/types'; import { GoogleMessage, @@ -28,6 +28,7 @@ import { transformOpenAIRoleToGoogleRole, transformToolChoiceForGemini, } from '../google/chatComplete'; +import { GOOGLE_GENERATE_CONTENT_FINISH_REASON } from '../google/types'; import { ChatCompletionResponse, ErrorResponse, @@ -295,7 +296,13 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = { delete tool.function?.strict; if (['googleSearch', 'google_search'].includes(tool.function.name)) { - tools.push({ googleSearch: {} }); + const timeRangeFilter = tool.function.parameters?.timeRangeFilter; + tools.push({ + googleSearch: { + // allow null + ...(timeRangeFilter !== undefined && { timeRangeFilter }), + }, + }); } else if ( ['googleSearchRetrieval', 'google_search_retrieval'].includes( tool.function.name @@ -516,7 +523,7 @@ export const GoogleChatCompleteResponseTransform: ( message: message, index: index, finish_reason: transformFinishReason( - generation.finishReason, + generation.finishReason as GOOGLE_GENERATE_CONTENT_FINISH_REASON, strictOpenAiCompliance ), logprobs, @@ -641,11 +648,11 @@ export const GoogleChatCompleteStreamChunkTransform: ( parsedChunk.candidates?.map((generation, index) => { const finishReason = generation.finishReason ? transformFinishReason( - parsedChunk.candidates[0].finishReason, + parsedChunk.candidates[0] + .finishReason as GOOGLE_GENERATE_CONTENT_FINISH_REASON, strictOpenAiCompliance ) : null; - let message: any = { role: 'assistant', content: '' }; if (generation.content?.parts[0]?.text) { const contentBlocks = []; diff --git a/src/providers/google-vertex-ai/createBatch.ts b/src/providers/google-vertex-ai/createBatch.ts index 1cb627af8..67e826695 100644 --- a/src/providers/google-vertex-ai/createBatch.ts +++ b/src/providers/google-vertex-ai/createBatch.ts @@ -1,3 +1,6 @@ +import { constructConfigFromRequestHeaders } from '../../handlers/handlerUtils'; +import { transformUsingProviderConfig } from '../../services/transformToProviderRequest'; +import { Options } from '../../types/requestBody'; import { ProviderConfig } from '../types'; import { GoogleBatchRecord } from './types'; import { getModelAndProvider, GoogleToOpenAIBatch } from './utils'; @@ -69,6 +72,27 @@ export const GoogleBatchCreateConfig: ProviderConfig = { }, }; +export const GoogleBatchCreateRequestTransform = ( + requestBody: any, + requestHeaders: Record +) => { + const providerOptions = constructConfigFromRequestHeaders(requestHeaders); + + const baseConfig = transformUsingProviderConfig( + GoogleBatchCreateConfig, + requestBody, + providerOptions as Options + ); + + const finalBody = { + // Contains extra fields like tags etc, also might contains model etc, so order is important to override the fields with params created using config. + ...requestBody?.provider_options, + ...baseConfig, + }; + + return finalBody; +}; + export const GoogleBatchCreateResponseTransform = ( response: Response, responseStatus: number diff --git a/src/providers/google-vertex-ai/embed.ts b/src/providers/google-vertex-ai/embed.ts index 93f3dd699..c0843dd65 100644 --- a/src/providers/google-vertex-ai/embed.ts +++ b/src/providers/google-vertex-ai/embed.ts @@ -12,6 +12,7 @@ import { transformEmbeddingInputs, transformEmbeddingsParameters, } from './transformGenerationConfig'; +import { Params } from '../../types/requestBody'; enum TASK_TYPE { RETRIEVAL_QUERY = 'RETRIEVAL_QUERY', @@ -49,6 +50,19 @@ export const GoogleEmbedConfig: ProviderConfig = { }, }; +export const VertexBatchEmbedConfig: ProviderConfig = { + input: { + param: 'content', + required: true, + transform: (value: EmbedParams) => { + if (typeof value.input === 'string') { + return value.input; + } + return value.input.map((item) => item).join('\n'); + }, + }, +}; + export const GoogleEmbedResponseTransform: ( response: GoogleEmbedResponse | GoogleErrorResponse, responseStatus: number, @@ -120,16 +134,3 @@ export const GoogleEmbedResponseTransform: ( return generateInvalidProviderResponseError(response, GOOGLE_VERTEX_AI); }; - -export const VertexBatchEmbedConfig: ProviderConfig = { - input: { - param: 'content', - required: true, - transform: (value: EmbedParams) => { - if (typeof value.input === 'string') { - return value.input; - } - return value.input.map((item) => item).join('\n'); - }, - }, -}; diff --git a/src/providers/google-vertex-ai/index.ts b/src/providers/google-vertex-ai/index.ts index 5b5f9e5ba..6d615054f 100644 --- a/src/providers/google-vertex-ai/index.ts +++ b/src/providers/google-vertex-ai/index.ts @@ -20,33 +20,34 @@ import { import { chatCompleteParams, responseTransformers } from '../open-ai-base'; import { GOOGLE_VERTEX_AI } from '../../globals'; import { Params } from '../../types/requestBody'; +import { + GoogleFileUploadRequestHandler, + GoogleFileUploadResponseTransform, +} from './uploadFile'; import { GoogleBatchCreateConfig, + GoogleBatchCreateRequestTransform, GoogleBatchCreateResponseTransform, } from './createBatch'; +import { GoogleRetrieveBatchResponseTransform } from './retrieveBatch'; import { BatchOutputRequestHandler, BatchOutputResponseTransform, } from './getBatchOutput'; import { GoogleListBatchesResponseTransform } from './listBatches'; import { GoogleCancelBatchResponseTransform } from './cancelBatch'; -import { - GoogleFileUploadRequestHandler, - GoogleFileUploadResponseTransform, -} from './uploadFile'; -import { GoogleRetrieveBatchResponseTransform } from './retrieveBatch'; import { GoogleFinetuneCreateResponseTransform, GoogleVertexFinetuneConfig, } from './createFinetune'; -import { GoogleRetrieveFileContentResponseTransform } from './retrieveFileContent'; +import { GoogleListFilesRequestHandler } from './listFiles'; import { GoogleRetrieveFileRequestHandler, GoogleRetrieveFileResponseTransform, } from './retrieveFile'; -import { GoogleFinetuneRetrieveResponseTransform } from './retrieveFinetune'; import { GoogleFinetuneListResponseTransform } from './listFinetunes'; -import { GoogleListFilesRequestHandler } from './listFiles'; +import { GoogleFinetuneRetrieveResponseTransform } from './retrieveFinetune'; +import { GoogleRetrieveFileContentResponseTransform } from './retrieveFileContent'; import { VertexAnthropicMessagesConfig, VertexAnthropicMessagesResponseTransform, @@ -60,7 +61,7 @@ import { const VertexConfig: ProviderConfigs = { api: VertexApiConfig, - getConfig: ({ params }) => { + getConfig: (params: Params) => { const requestConfig = { uploadFile: {}, createBatch: GoogleBatchCreateConfig, @@ -76,20 +77,25 @@ const VertexConfig: ProviderConfigs = { const responseTransforms = { uploadFile: GoogleFileUploadResponseTransform, retrieveBatch: GoogleRetrieveBatchResponseTransform, + retrieveFile: GoogleRetrieveFileResponseTransform, getBatchOutput: BatchOutputResponseTransform, listBatches: GoogleListBatchesResponseTransform, cancelBatch: GoogleCancelBatchResponseTransform, - createBatch: GoogleBatchCreateResponseTransform, - retrieveFileContent: GoogleRetrieveFileContentResponseTransform, - retrieveFile: GoogleRetrieveFileResponseTransform, createFinetune: GoogleFinetuneCreateResponseTransform, retrieveFinetune: GoogleFinetuneRetrieveResponseTransform, listFinetunes: GoogleFinetuneListResponseTransform, + createBatch: GoogleBatchCreateResponseTransform, + retrieveFileContent: GoogleRetrieveFileContentResponseTransform, + }; + + const requestTransforms = { + createBatch: GoogleBatchCreateRequestTransform, }; const baseConfig = { ...requestConfig, responseTransforms, + requestTransforms, }; const providerModel = params?.model; @@ -115,6 +121,9 @@ const VertexConfig: ProviderConfigs = { imageGenerate: GoogleImageGenResponseTransform, ...responseTransforms, }, + requestTransforms: { + ...baseConfig.requestTransforms, + }, }; case 'anthropic': return { @@ -131,18 +140,24 @@ const VertexConfig: ProviderConfigs = { messages: VertexAnthropicMessagesResponseTransform, ...responseTransforms, }, + requestTransforms: { + ...baseConfig.requestTransforms, + }, }; case 'meta': return { chatComplete: VertexLlamaChatCompleteConfig, - createBatch: GoogleBatchCreateConfig, api: GoogleApiConfig, + createBatch: GoogleBatchCreateConfig, createFinetune: baseConfig.createFinetune, responseTransforms: { chatComplete: VertexLlamaChatCompleteResponseTransform, 'stream-chatComplete': VertexLlamaChatCompleteStreamChunkTransform, ...responseTransforms, }, + requestTransforms: { + ...baseConfig.requestTransforms, + }, }; case 'endpoints': return { @@ -160,14 +175,17 @@ const VertexConfig: ProviderConfigs = { } ), createBatch: GoogleBatchCreateConfig, - api: GoogleApiConfig, createFinetune: baseConfig.createFinetune, + api: GoogleApiConfig, responseTransforms: { ...responseTransformers(GOOGLE_VERTEX_AI, { chatComplete: true, }), ...responseTransforms, }, + requestTransforms: { + ...baseConfig.requestTransforms, + }, }; case 'mistralai': return { diff --git a/src/providers/google-vertex-ai/listBatches.ts b/src/providers/google-vertex-ai/listBatches.ts index 983205765..67760afad 100644 --- a/src/providers/google-vertex-ai/listBatches.ts +++ b/src/providers/google-vertex-ai/listBatches.ts @@ -1,6 +1,6 @@ import { GOOGLE_VERTEX_AI } from '../../globals'; -import { generateInvalidProviderResponseError } from '../utils'; import { GoogleBatchRecord, GoogleErrorResponse } from './types'; +import { generateInvalidProviderResponseError } from '../utils'; import { GoogleToOpenAIBatch } from './utils'; type GoogleListBatchesResponse = { diff --git a/src/providers/google-vertex-ai/messagesCountTokens.ts b/src/providers/google-vertex-ai/messagesCountTokens.ts index 43f007af4..2ead2a879 100644 --- a/src/providers/google-vertex-ai/messagesCountTokens.ts +++ b/src/providers/google-vertex-ai/messagesCountTokens.ts @@ -7,7 +7,7 @@ export const VertexAnthropicMessagesCountTokensConfig = { param: 'model', required: true, transform: (params: MessageCreateParamsBase) => { - let model = params.model ?? ''; + const model = params.model ?? ''; return model.replace('anthropic.', ''); }, }, diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts index d262569f9..496e74809 100644 --- a/src/providers/google-vertex-ai/types.ts +++ b/src/providers/google-vertex-ai/types.ts @@ -44,7 +44,7 @@ export interface GoogleResponseCandidate { }, ]; }; - finishReason: VERTEX_GEMINI_GENERATE_CONTENT_FINISH_REASON; + finishReason: string; index: 0; safetyRatings: { category: string; @@ -202,7 +202,7 @@ export interface GoogleBatchRecord { }; startTime: string; endTime: string; - completionsStats?: { + completionStats?: { successfulCount: string; failedCount: string; incompleteCount: string; diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts index 91602d052..5ec6dbe72 100644 --- a/src/providers/google-vertex-ai/utils.ts +++ b/src/providers/google-vertex-ai/utils.ts @@ -1,10 +1,9 @@ import { - GoogleBatchRecord, GoogleErrorResponse, + GoogleResponseCandidate, + GoogleBatchRecord, GoogleFinetuneRecord, - GoogleResponseCandidate as VertexResponseCandidate, } from './types'; -import { GoogleResponseCandidate } from '../google/chatComplete'; import { generateErrorResponse } from '../utils'; import { BatchEndpoints, @@ -300,7 +299,7 @@ export const transformGeminiToolParameters = ( }; // Vertex AI does not support additionalProperties in JSON Schema -// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#schema +// https://cloud.google.com/vertex-ai/docs/reference/rest/v1/Schema export const recursivelyDeleteUnsupportedParameters = (obj: any) => { if (typeof obj !== 'object' || obj === null || Array.isArray(obj)) return; delete obj.additional_properties; @@ -422,7 +421,7 @@ const getTimeKey = (status: GoogleBatchRecord['state'], value: string) => { export const GoogleToOpenAIBatch = (response: GoogleBatchRecord) => { const jobId = response.name.split('/').at(-1); - const total = Object.values(response.completionsStats ?? {}).reduce( + const total = Object.values(response.completionStats ?? {}).reduce( (acc, current) => acc + Number.parseInt(current), 0 ); @@ -431,7 +430,6 @@ export const GoogleToOpenAIBatch = (response: GoogleBatchRecord) => { ? BatchEndpoints.EMBEDDINGS : BatchEndpoints.CHAT_COMPLETIONS; - // Embeddings file is `000000000000.jsonl`, for inference the output is at `predictions.jsonl` const fileSuffix = endpoint === BatchEndpoints.EMBEDDINGS ? '000000000000.jsonl' @@ -461,8 +459,8 @@ export const GoogleToOpenAIBatch = (response: GoogleBatchRecord) => { ...getTimeKey(response.state, response.updateTime), request_counts: { total: total, - completed: response.completionsStats?.successfulCount, - failed: response.completionsStats?.failedCount, + completed: response.completionStats?.successfulCount, + failed: response.completionStats?.failedCount, }, ...(response.error && { errors: { @@ -473,48 +471,8 @@ export const GoogleToOpenAIBatch = (response: GoogleBatchRecord) => { }; }; -export const fetchGoogleCustomEndpoint = async ({ - authorization, - method, - url, - body, -}: { - url: string; - body?: ReadableStream | Record; - authorization: string; - method: string; -}) => { - const result = { response: null, error: null, status: null }; - try { - const options = { - ...(method !== 'GET' && - body && { - body: typeof body === 'object' ? JSON.stringify(body) : body, - }), - method: method, - headers: { - Authorization: authorization, - 'Content-Type': 'application/json', - }, - }; - - const request = await fetch(url, options); - if (!request.ok) { - const error = await request.text(); - result.error = error as any; - result.status = request.status as any; - } - - const response = await request.json(); - result.response = response as any; - } catch (error) { - result.error = error as any; - } - return result; -}; - export const transformVertexLogprobs = ( - generation: GoogleResponseCandidate | VertexResponseCandidate + generation: GoogleResponseCandidate ) => { const logprobsContent: Logprobs[] = []; if (!generation.logprobsResult) return null; @@ -636,9 +594,6 @@ export const vertexRequestLineHandler = ( return transformedBody; } }; -export const isEmbeddingModel = (modelName: string) => { - return modelName.includes('embedding'); -}; export const generateSignedURL = async ( serviceAccountInfo: Record, @@ -751,3 +706,7 @@ export const generateSignedURL = async ( const schemeAndHost = `https://${host}`; return `${schemeAndHost}${canonicalUri}?${canonicalQueryString}&x-goog-signature=${signatureHex}`; }; + +export const isEmbeddingModel = (modelName: string) => { + return modelName.includes('embedding'); +}; From c6881f49ced20dee4118dccbedfe1fe8bc739759 Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Mon, 29 Sep 2025 18:13:59 +0530 Subject: [PATCH 3/3] formatting --- src/providers/google-vertex-ai/api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/google-vertex-ai/api.ts b/src/providers/google-vertex-ai/api.ts index 496be5e7d..a714319ef 100644 --- a/src/providers/google-vertex-ai/api.ts +++ b/src/providers/google-vertex-ai/api.ts @@ -73,7 +73,7 @@ export const GoogleApiConfig: ProviderAPIConfig = { const anthropicBeta = providerOptions?.['anthropicBeta'] ?? gatewayRequestBody?.['anthropic_beta']; - + return { 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`,