feat: get ai models api (#800)

CasLubbers · merll · svcAPLBot · web-flow · commit d86dfb4880ee · 2025-10-02T08:34:06.000+02:00
* feat: added API spec for agent inference platform

* feat: add agent crud api and fix comments

* feat: add ai model get api

* feat: add authz tests

---------

Co-authored-by: Matthias Erll &lt;merll@akamai.com&gt;
Co-authored-by: svcAPLBot &lt;174728082+svcAPLBot@users.noreply.github.com&gt;
diff --git a/src/ai/aiModelHandler.ts b/src/ai/aiModelHandler.ts
@@ -0,0 +1,43 @@
+import { V1Deployment } from '@kubernetes/client-node'
+import { AplAIModelResponse } from 'src/otomi-models'
+import { getDeploymentsWithAIModelLabels } from './k8s'
+
+function getConditions(deployment: V1Deployment) {
+  return (deployment.status?.conditions || []).map((condition) => ({
+    lastTransitionTime: condition.lastTransitionTime?.toISOString(),
+    message: condition.message,
+    reason: condition.reason,
+    status: condition.status === 'True',
+    type: condition.type,
+  }))
+}
+
+export function transformK8sDeploymentToAplAIModel(deployment: V1Deployment): AplAIModelResponse {
+  const labels = deployment.metadata?.labels || {}
+  const modelName = deployment.metadata?.name || labels.modelName
+
+  // Convert K8s deployment conditions to schema format
+  const conditions = getConditions(deployment)
+
+  return {
+    kind: 'AplAIModel',
+    metadata: {
+      name: modelName,
+    },
+    spec: {
+      displayName: modelName,
+      modelEndpoint: `http://${deployment.metadata?.name}.${deployment.metadata?.namespace}.svc.cluster.local`,
+      modelType: labels.modelType as 'foundation' | 'embedding',
+      ...(labels.modelDimension && { modelDimension: parseInt(labels.modelDimension, 10) }),
+    },
+    status: {
+      conditions,
+      phase: deployment.status?.readyReplicas && deployment.status.readyReplicas > 0 ? 'Ready' : 'NotReady',
+    },
+  }
+}
+
+export async function getAIModels(): Promise<AplAIModelResponse[]> {
+  const deployments = await getDeploymentsWithAIModelLabels()
+  return deployments.map(transformK8sDeploymentToAplAIModel)
+}
diff --git a/src/ai/k8s.ts b/src/ai/k8s.ts
@@ -0,0 +1,29 @@
+import { AppsV1Api, KubeConfig, V1Deployment } from '@kubernetes/client-node'
+import Debug from 'debug'
+
+const debug = Debug('otomi:ai:k8s')
+
+let appsApiClient: AppsV1Api | undefined
+
+function getAppsApiClient(): AppsV1Api {
+  if (appsApiClient) return appsApiClient
+  const kc = new KubeConfig()
+  kc.loadFromDefault()
+  appsApiClient = kc.makeApiClient(AppsV1Api)
+  return appsApiClient
+}
+
+export async function getDeploymentsWithAIModelLabels(): Promise<V1Deployment[]> {
+  const appsApi = getAppsApiClient()
+
+  try {
+    const labelSelector = 'modelType,modelName'
+    const result = await appsApi.listDeploymentForAllNamespaces({ labelSelector })
+
+    debug(`Found ${result.items.length} AI model deployments`)
+    return result.items
+  } catch (e) {
+    debug('Error fetching deployments from Kubernetes:', e)
+    return []
+  }
+}
diff --git a/src/api.authz.test.ts b/src/api.authz.test.ts
@@ -769,4 +769,37 @@ describe('API authz tests', () => {
         .expect('Content-Type', /json/)
     })
   })
+
+  describe('AI Models endpoint tests', () => {
+    test('platform admin can get AI models', async () => {
+      jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
+      await agent
+        .get('/alpha/ai/models')
+        .set('Authorization', `Bearer ${platformAdminToken}`)
+        .expect(200)
+        .expect('Content-Type', /json/)
+    })
+
+    test('team admin can get AI models', async () => {
+      jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
+      await agent
+        .get('/alpha/ai/models')
+        .set('Authorization', `Bearer ${teamAdminToken}`)
+        .expect(200)
+        .expect('Content-Type', /json/)
+    })
+
+    test('team member can get AI models', async () => {
+      jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
+      await agent
+        .get('/alpha/ai/models')
+        .set('Authorization', `Bearer ${teamMemberToken}`)
+        .expect(200)
+        .expect('Content-Type', /json/)
+    })
+
+    test('anonymous user cannot get AI models', async () => {
+      await agent.get('/alpha/ai/models').expect(401)
+    })
+  })
 })
diff --git a/src/api/alpha/ai/models.ts b/src/api/alpha/ai/models.ts
@@ -0,0 +1,20 @@
+import Debug from 'debug'
+import { Operation, OperationHandlerArray } from 'express-openapi'
+import { OpenApiRequestExt } from 'src/otomi-models'
+
+const debug = Debug('otomi:api:alpha:ai:models')
+
+export default function (): OperationHandlerArray {
+  const get: Operation = [
+    /* business middleware not expressible by OpenAPI documentation goes here */
+    async ({ otomi }: OpenApiRequestExt, res): Promise<void> => {
+      debug('getAllAIModels')
+      const v = await otomi.getAllAIModels()
+      res.json(v)
+    },
+  ]
+  const api = {
+    get,
+  }
+  return api
+}
diff --git a/src/openapi/aiModel.yaml b/src/openapi/aiModel.yaml
@@ -1,11 +1,10 @@
-AplAIModel:
+AIModel:
+  type: object
   x-acl:
-    platformAdmin:
-      - read-any
-    teamAdmin:
-      - read
-    teamMember:
-      - read
+    platformAdmin: [read-any]
+    teamAdmin: [read-any]
+    teamMember: [read-any]
+  properties: {}
 
 AplAIModelSpec:
   x-acl:
@@ -38,4 +37,5 @@ AplAIModelSpec:
       example: 4096
   required:
     - modelEndpoint
+    - modelType
   type: object
diff --git a/src/openapi/api.yaml b/src/openapi/api.yaml
@@ -2475,7 +2475,7 @@ paths:
     get:
       operationId: getAIModels
       description: Get available shared AI models (foundation or embedding)
-      x-aclSchema: AplAIModel
+      x-aclSchema: AIModel
       responses:
         '200':
           description: Successfully obtained shared AI models
@@ -2827,7 +2827,7 @@ components:
       properties:
         kind:
           type: string
-          enum: [AplKnowledgeBase]
+          enum: [AplAIModel]
         spec:
           $ref: 'aiModel.yaml#/AplAIModelSpec'
       required:
@@ -3127,6 +3127,8 @@ components:
       $ref: 'testrepoconnect.yaml#/TestRepoConnect'
     InternalRepoUrls:
       $ref: 'internalRepoUrls.yaml#/InternalRepoUrls'
+    AIModel:
+      $ref: 'aiModel.yaml#/AIModel'
     Team:
       $ref: 'team.yaml#/Team'
     TeamAuthz:
diff --git a/src/otomi-models.ts b/src/otomi-models.ts
@@ -8,6 +8,7 @@ export type AppList = components['schemas']['AppList']
 export type Backup = components['schemas']['Backup']
 export type AplBackupRequest = components['schemas']['AplBackupRequest']
 export type AplBackupResponse = components['schemas']['AplBackupResponse']
+export type AplAIModelResponse = components['schemas']['AplAIModelResponse']
 export type Kubecfg = components['schemas']['Kubecfg']
 export type K8sService = components['schemas']['K8sService']
 export type Netpol = components['schemas']['Netpol']
diff --git a/src/otomi-stack.ts b/src/otomi-stack.ts
@@ -1,4 +1,4 @@
-import { CoreV1Api, User as k8sUser, KubeConfig, V1ObjectReference } from '@kubernetes/client-node'
+import { CoreV1Api, KubeConfig, User as k8sUser, V1ObjectReference } from '@kubernetes/client-node'
 import Debug from 'debug'
 
 import { getRegions, ObjectStorageKeyRegions } from '@linode/api-v4'
@@ -12,6 +12,7 @@ import { AlreadyExists, ForbiddenError, HttpError, OtomiError, PublicUrlExists,
 import getRepo, { getWorktreeRepo, Git } from 'src/git'
 import { cleanSession, getSessionStack } from 'src/middleware'
 import {
+  AplAIModelResponse,
   AplBackupRequest,
   AplBackupResponse,
   AplBuildRequest,
@@ -114,6 +115,7 @@ import { getSealedSecretsPEM, sealedSecretManifest, SealedSecretManifestType } f
 import { getKeycloakUsers, isValidUsername } from './utils/userUtils'
 import { ObjectStorageClient } from './utils/wizardUtils'
 import { fetchChartYaml, fetchWorkloadCatalog, NewHelmChartValues, sparseCloneChart } from './utils/workloadUtils'
+import { getAIModels } from './ai/aiModelHandler'
 
 interface ExcludedApp extends App {
   managed: boolean
@@ -2112,6 +2114,10 @@ export default class OtomiStack {
     return names
   }
 
+  async getAllAIModels(): Promise<AplAIModelResponse[]> {
+    return getAIModels()
+  }
+
   async getK8sServices(teamId: string): Promise<Array<K8sService>> {
     if (env.isDev) return []
     // const teams = user.teams.map((name) => {