Python

LLM Evaluation

You can run evaluations across multiple models and select the suitable model for your use case based on quantitive methods.
Below is an example to compare llama 13b-chat, openai gpt-3.5, and cohere command models.

import requests
import json

url = "http://localhost/evaluate/llm"

payload = json.dumps({
  "userInput": "User input or question.",
  "targetAnswers": [
    "optimal answer example1.",
    "optimal answer example2.",
    "optimal answer example3."  ],
  "semantic": {
    "api_key": "",
    "provider": "openai"
  },
  "evaluate": [
    {
      "apiKey": "",
      "provider": "replicate",
      "type": "chat",
      "model": "13b-chat",
      "maxTokens": 50
    },
    {
      "apiKey": "",
      "provider": "cohere",
      "type": "completion",
      "model": "command",
      "maxTokens": 50
    },
    {
      "apiKey": "",
      "provider": "openai",
      "type": "chat",
      "model": "gpt-3.5-turbo",
      "maxTokens": 50,
      "temperature": 0.7
    }
  ]
})
headers = {
  'X-API-KEY': '<microservice-key>',
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

Snapshot of the expected output format:

{
 "openai/gpt-3.5-turbo": [{
  "prediction": "Photosynthesis is how plants make food for themselves....",
  "score_cosine_similarity": 0.9566836802012463,
  "score_euclidean_distance": 0.29175853870023755
 }],
 "cohere/command": [{
  "prediction": "Photosynthesis is the process by which plants use the energy .....",
  "score_cosine_similarity": 0.9378139154300577,
  "score_euclidean_distance": 0.3512465738424273
 }],
 "replicate/13b-chat": [{
  "prediction": "Here's an explanation of photosynthesis in simple terms .....",
  "score_cosine_similarity": 0.9096764395396765,
  "score_euclidean_distance": 0.4248874961328429
 }],
 "lookup": {
  "cosine_similarity": "a value closer to 1 indicates a higher degree of similarity between two vectors",
  "euclidean_distance": "the lower the value, the closer the two points"
 }
}

Semantic Search

import requests
import json

url = "/semanticsearch/search"

payload = json.dumps({
  "api_key": "",
  "provider": "openai",
  "input": {
    "pivot_item": "Hello from OpenAI!",
    "search_array": [
      "Greetings from OpenAI!",
      "Bonjour de OpenAI!",
      "Hola desde OpenAI!"
    ],
    "number_of_matches": 2
  }
})
headers = {
  'X-API-KEY': '<microservice-key>',
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

Chatbot

import requests
import json

url = "/chatbot/chat"

payload = json.dumps({
  "api_key": "",
  "model": "gpt4",
  "provider": "openai",
  "input": {
    "system": "You are a helpful assistant.",
    "messages": [
      {
        "role": "user",
        "content": "recommend a vegetarian dish for dinner"
      }
    ]
  }
})
headers = {
  'X-API-KEY': '<microservice-key>',
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

Replace the <microservice-key> with the provided microservice key when running the docker image.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Python

LLM Evaluation

Semantic Search

Chatbot

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally