Azure-Samples · kdestin · Jan 6, 2025 · Dec 25, 2024
@@ -158,125 +158,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import requests\n",
-    "from typing_extensions import Self\n",
-    "from typing import TypedDict\n",
-    "from promptflow.tracing import trace\n",
-    "\n",
-    "\n",
-    "class ModelEndpoints:\n",
-    "    def __init__(self: Self, env: dict, model_type: str) -> str:\n",
-    "        self.env = env\n",
-    "        self.model_type = model_type\n",
-    "\n",
-    "    class Response(TypedDict):\n",
-    "        query: str\n",
-    "        response: str\n",
-    "\n",
-    "    @trace\n",
-    "    def __call__(self: Self, query: str) -> Response:\n",
-    "        if self.model_type == \"gpt4-0613\":\n",
-    "            output = self.call_gpt4_endpoint(query)\n",
-    "        elif self.model_type == \"gpt35-turbo\":\n",
-    "            output = self.call_gpt35_turbo_endpoint(query)\n",
-    "        elif self.model_type == \"mistral7b\":\n",
-    "            output = self.call_mistral_endpoint(query)\n",
-    "        elif self.model_type == \"tiny_llama\":\n",
-    "            output = self.call_tiny_llama_endpoint(query)\n",
-    "        elif self.model_type == \"phi3_mini_serverless\":\n",
-    "            output = self.call_phi3_mini_serverless_endpoint(query)\n",
-    "        elif self.model_type == \"gpt2\":\n",
-    "            output = self.call_gpt2_endpoint(query)\n",
-    "        else:\n",
-    "            output = self.call_default_endpoint(query)\n",
-    "\n",
-    "        return output\n",
-    "\n",
-    "    def query(self: Self, endpoint: str, headers: str, payload: str) -> str:\n",
-    "        response = requests.post(url=endpoint, headers=headers, json=payload)\n",
-    "        return response.json()\n",
-    "\n",
-    "    def call_gpt4_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"gpt4-0613\"][\"endpoint\"]\n",
-    "        key = self.env[\"gpt4-0613\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"api-key\": key}\n",
-    "\n",
-    "        payload = {\"messages\": [{\"role\": \"user\", \"content\": query}], \"max_tokens\": 500}\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[\"choices\"][0][\"message\"][\"content\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_gpt35_turbo_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"gpt35-turbo\"][\"endpoint\"]\n",
-    "        key = self.env[\"gpt35-turbo\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"api-key\": key}\n",
-    "\n",
-    "        payload = {\"messages\": [{\"role\": \"user\", \"content\": query}], \"max_tokens\": 500}\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[\"choices\"][0][\"message\"][\"content\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_tiny_llama_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"tiny_llama\"][\"endpoint\"]\n",
-    "        key = self.env[\"tiny_llama\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"Authorization\": (\"Bearer \" + key)}\n",
-    "\n",
-    "        payload = {\n",
-    "            \"model\": \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\",\n",
-    "            \"messages\": [{\"role\": \"user\", \"content\": query}],\n",
-    "            \"max_tokens\": 500,\n",
-    "            \"stream\": False,\n",
-    "        }\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[\"choices\"][0][\"message\"][\"content\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_phi3_mini_serverless_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"phi3_mini_serverless\"][\"endpoint\"]\n",
-    "        key = self.env[\"phi3_mini_serverless\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"Authorization\": (\"Bearer \" + key)}\n",
-    "\n",
-    "        payload = {\"messages\": [{\"role\": \"user\", \"content\": query}], \"max_tokens\": 500}\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[\"choices\"][0][\"message\"][\"content\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_gpt2_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"gpt2\"][\"endpoint\"]\n",
-    "        key = self.env[\"gpt2\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"Authorization\": (\"Bearer \" + key)}\n",
-    "\n",
-    "        payload = {\n",
-    "            \"inputs\": query,\n",
-    "        }\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[0][\"generated_text\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_mistral_endpoint(self: Self, query: str) -> Response:\n",
-    "        endpoint = self.env[\"mistral7b\"][\"endpoint\"]\n",
-    "        key = self.env[\"mistral7b\"][\"key\"]\n",
-    "\n",
-    "        headers = {\"Content-Type\": \"application/json\", \"Authorization\": (\"Bearer \" + key)}\n",
-    "\n",
-    "        payload = {\"messages\": [{\"content\": query, \"role\": \"user\"}], \"max_tokens\": 50}\n",
-    "\n",
-    "        output = self.query(endpoint=endpoint, headers=headers, payload=payload)\n",
-    "        response = output[\"choices\"][0][\"message\"][\"content\"]\n",
-    "        return {\"query\": query, \"response\": response}\n",
-    "\n",
-    "    def call_default_endpoint(query: str) -> Response:\n",
-    "        return {\"query\": \"What is the capital of France?\", \"response\": \"Paris\"}"
+    "!pygmentize model_endpoints.py"
    ]
   },
   {
@@ -349,6 +231,7 @@
     "from azure.ai.evaluation import (\n",
     "    RelevanceEvaluator,\n",
     ")\n",
+    "from model_endpoints import ModelEndpoints\n",
     "\n",
     "relevance_evaluator = RelevanceEvaluator(model_config)\n",
     "\n",
@@ -412,7 +295,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv-azureai-samples",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },

@@ -0,0 +1,119 @@
+import requests
+from typing_extensions import Self
+from typing import TypedDict
+from promptflow.tracing import trace
+
+
+class ModelEndpoints:
+    def __init__(self: Self, env: dict, model_type: str) -> str:
+        self.env = env
+        self.model_type = model_type
+
+    class Response(TypedDict):
+        query: str
+        response: str
+
+    @trace
+    def __call__(self: Self, query: str) -> Response:
+        if self.model_type == "gpt4-0821":
+            output = self.call_gpt4_endpoint(query)
+        elif self.model_type == "gpt35-turbo":
+            output = self.call_gpt35_turbo_endpoint(query)
+        elif self.model_type == "mistral7b":
+            output = self.call_mistral_endpoint(query)
+        elif self.model_type == "tiny_llama":
+            output = self.call_tiny_llama_endpoint(query)
+        elif self.model_type == "phi3_mini_serverless":
+            output = self.call_phi3_mini_serverless_endpoint(query)
+        elif self.model_type == "gpt2":
+            output = self.call_gpt2_endpoint(query)
+        else:
+            output = self.call_default_endpoint(query)
+
+        return output
+
+    def query(self: Self, endpoint: str, headers: str, payload: str) -> str:
+        response = requests.post(url=endpoint, headers=headers, json=payload)
+        return response.json()
+
+    def call_gpt4_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["gpt4-0821"]["endpoint"]
+        key = self.env["gpt4-0821"]["key"]
+
+        headers = {"Content-Type": "application/json", "api-key": key}
+
+        payload = {"messages": [{"role": "user", "content": query}], "max_tokens": 500}
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output["choices"][0]["message"]["content"]
+        return {"query": query, "response": response}
+
+    def call_gpt35_turbo_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["gpt35-turbo"]["endpoint"]
+        key = self.env["gpt35-turbo"]["key"]
+
+        headers = {"Content-Type": "application/json", "api-key": key}
+
+        payload = {"messages": [{"role": "user", "content": query}], "max_tokens": 500}
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output["choices"][0]["message"]["content"]
+        return {"query": query, "response": response}
+
+    def call_tiny_llama_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["tiny_llama"]["endpoint"]
+        key = self.env["tiny_llama"]["key"]
+
+        headers = {"Content-Type": "application/json", "Authorization": ("Bearer " + key)}
+
+        payload = {
+            "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "messages": [{"role": "user", "content": query}],
+            "max_tokens": 500,
+            "stream": False,
+        }
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output["choices"][0]["message"]["content"]
+        return {"query": query, "response": response}
+
+    def call_phi3_mini_serverless_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["phi3_mini_serverless"]["endpoint"]
+        key = self.env["phi3_mini_serverless"]["key"]
+
+        headers = {"Content-Type": "application/json", "Authorization": ("Bearer " + key)}
+
+        payload = {"messages": [{"role": "user", "content": query}], "max_tokens": 500}
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output["choices"][0]["message"]["content"]
+        return {"query": query, "response": response}
+
+    def call_gpt2_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["gpt2"]["endpoint"]
+        key = self.env["gpt2"]["key"]
+
+        headers = {"Content-Type": "application/json", "Authorization": ("Bearer " + key)}
+
+        payload = {
+            "inputs": query,
+        }
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output[0]["generated_text"]
+        return {"query": query, "response": response}
+
+    def call_mistral_endpoint(self: Self, query: str) -> Response:
+        endpoint = self.env["mistral7b"]["endpoint"]
+        key = self.env["mistral7b"]["key"]
+
+        headers = {"Content-Type": "application/json", "Authorization": ("Bearer " + key)}
+
+        payload = {"messages": [{"content": query, "role": "user"}], "max_tokens": 50}
+
+        output = self.query(endpoint=endpoint, headers=headers, payload=payload)
+        response = output["choices"][0]["message"]["content"]
+        return {"query": query, "response": response}
+
+    def call_default_endpoint(query: str) -> Response:
+        return {"query": "What is the capital of France?", "response": "Paris"}