Skip to content

Commit f633b84

Browse files
committed
Merge branch 'develop' into main
2 parents f69f6f9 + 2589998 commit f633b84

File tree

14 files changed

+399
-251
lines changed

14 files changed

+399
-251
lines changed

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ Includes:
1010
- Deployable on any Kubernetes cluster, with its Helm chart
1111
- Manage users effortlessly with OpenID Connect
1212
- More than 150 tones and personalities (accountant, advisor, debater, excel sheet, instructor, logistician, etc.) to better help employees in their specific daily tasks
13-
- Plug and play with any storage system, including [Azure Cosmos DB](https://learn.microsoft.com/en-us/azure/cosmos-db/), [Redis](https://github.com/redis/redis) and [Qdrant](https://github.com/qdrant/qdrant).
13+
- Plug and play storage system, including [Azure Cosmos DB](https://learn.microsoft.com/en-us/azure/cosmos-db/), [Redis](https://github.com/redis/redis) and [Qdrant](https://github.com/qdrant/qdrant).
1414
- Possibility to send temporary messages, for confidentiality
1515
- Salable system based on stateless APIs, cache, progressive web app and events
1616
- Search engine for conversations, based on semantic similarity and AI embeddings
17-
- Unlimited conversation history
17+
- Unlimited conversation history and number of users
18+
- Usage tracking, for better understanding of your employees' usage
1819

1920
![Application screenshot](docs/main.png)
2021

@@ -35,6 +36,9 @@ store = "cosmos"
3536
# Enum: "redis"
3637
stream = "redis"
3738

39+
[api]
40+
root_path = ""
41+
3842
[openai]
3943
ada_deploy_id = "ada"
4044
ada_max_tokens = 2049
@@ -49,7 +53,7 @@ max_length = 1000
4953

5054
[logging]
5155
app_level = "DEBUG"
52-
sys_level = "INFO"
56+
sys_level = "WARN"
5357

5458
[oidc]
5559
algorithms = ["RS256"]
@@ -65,7 +69,7 @@ db = 0
6569
host = "localhost"
6670

6771
[cosmos]
68-
# Containers "conversation" (/user_id), "message" (/conversation_id) and "user" (/dummy) must exist
72+
# Containers "conversation" (/user_id), "message" (/conversation_id), "user" (/dummy), "usage" (/user_id) must exist
6973
url = "https://private-gpt.documents.azure.com:443"
7074
database = "private-gpt"
7175
```

cicd/helm/private-gpt/templates/conversation-api-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ metadata:
77
app.kubernetes.io/component: conversation-api
88
data:
99
config.toml: |
10+
[persistence]
11+
search = "qdrant"
12+
store = "cosmos"
13+
stream = "redis"
14+
1015
[api]
1116
root_path = "/{{ include "private-gpt.fullname" . }}-conversation-api"
1217
@@ -37,3 +42,7 @@ data:
3742
[redis]
3843
db = {{ .Values.redis.db | int }}
3944
host = "{{ include "common.names.fullname" .Subcharts.redis }}-master"
45+
46+
[cosmos]
47+
url = {{ .Values.cosmos.url | quote }}
48+
database = {{ .Values.cosmos.database | quote }}

cicd/helm/private-gpt/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ api:
4242
base: null
4343
gpt_deploy_id: gpt-35-turbo
4444

45+
cosmos:
46+
# https://[db].documents.azure.com
47+
url: null
48+
database: null
49+
4550
redis:
4651
auth:
4752
enabled: false
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Import utils
2+
from utils import (build_logger, get_config)
3+
4+
# Import misc
5+
from azure.core.credentials import AzureKeyCredential
6+
from fastapi import HTTPException, status
7+
from tenacity import retry, stop_after_attempt, wait_random_exponential
8+
import azure.ai.contentsafety as azure_cs
9+
import azure.core.exceptions as azure_exceptions
10+
11+
12+
###
13+
# Init misc
14+
###
15+
16+
logger = build_logger(__name__)
17+
18+
###
19+
# Init Azure Content Safety
20+
###
21+
22+
# Score are following: 0 - Safe, 2 - Low, 4 - Medium, 6 - High
23+
# See: https://review.learn.microsoft.com/en-us/azure/cognitive-services/content-safety/concepts/harm-categories?branch=release-build-content-safety#severity-levels
24+
ACS_SEVERITY_THRESHOLD = 2
25+
ACS_API_BASE = get_config("acs", "api_base", str, required=True)
26+
ACS_API_TOKEN = get_config("acs", "api_token", str, required=True)
27+
ACS_MAX_LENGTH = get_config("acs", "max_length", int, required=True)
28+
logger.info(f"Connected Azure Content Safety to {ACS_API_BASE}")
29+
acs_client = azure_cs.ContentSafetyClient(
30+
ACS_API_BASE, AzureKeyCredential(ACS_API_TOKEN)
31+
)
32+
33+
34+
class ContentSafety:
35+
@retry(
36+
reraise=True,
37+
stop=stop_after_attempt(3),
38+
wait=wait_random_exponential(multiplier=0.5, max=30),
39+
)
40+
async def is_moderated(self, prompt: str) -> bool:
41+
logger.debug(f"Checking moderation for text: {prompt}")
42+
43+
if len(prompt) > ACS_MAX_LENGTH:
44+
logger.info(f"Message ({len(prompt)}) too long for moderation")
45+
raise HTTPException(
46+
status_code=status.HTTP_400_BAD_REQUEST,
47+
detail="Message too long",
48+
)
49+
50+
req = azure_cs.models.AnalyzeTextOptions(
51+
text=prompt,
52+
categories=[
53+
azure_cs.models.TextCategory.HATE,
54+
azure_cs.models.TextCategory.SELF_HARM,
55+
azure_cs.models.TextCategory.SEXUAL,
56+
azure_cs.models.TextCategory.VIOLENCE,
57+
],
58+
)
59+
60+
try:
61+
res = acs_client.analyze_text(req)
62+
except azure_exceptions.ClientAuthenticationError as e:
63+
logger.exception(e)
64+
return False
65+
66+
is_moderated = any(
67+
cat.severity >= ACS_SEVERITY_THRESHOLD
68+
for cat in [
69+
res.hate_result,
70+
res.self_harm_result,
71+
res.sexual_result,
72+
res.violence_result,
73+
]
74+
)
75+
if is_moderated:
76+
logger.info(f"Message is moderated: {prompt}")
77+
logger.debug(f"Moderation result: {res}")
78+
79+
return is_moderated

src/conversation-api/ai/openai.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Import utils
2+
from uuid import UUID
3+
from utils import (build_logger, get_config, hash_token)
4+
5+
# Import misc
6+
from azure.identity import DefaultAzureCredential
7+
from models.user import UserModel
8+
from tenacity import retry, stop_after_attempt, wait_random_exponential
9+
from typing import Any, Dict, List, AsyncGenerator, Union
10+
import asyncio
11+
import openai
12+
13+
14+
###
15+
# Init misc
16+
###
17+
18+
logger = build_logger(__name__)
19+
loop = asyncio.get_running_loop()
20+
21+
22+
###
23+
# Init OpenIA
24+
###
25+
26+
async def refresh_oai_token_background():
27+
"""
28+
Refresh OpenAI token every 15 minutes.
29+
30+
The OpenAI SDK does not support token refresh, so we need to do it manually. We passe manually the token to the SDK. Azure AD tokens are valid for 30 mins, but we refresh every 15 minutes to be safe.
31+
32+
See: https://github.com/openai/openai-python/pull/350#issuecomment-1489813285
33+
"""
34+
while True:
35+
logger.info("Refreshing OpenAI token")
36+
oai_cred = DefaultAzureCredential()
37+
oai_token = oai_cred.get_token("https://cognitiveservices.azure.com/.default")
38+
openai.api_key = oai_token.token
39+
# Execute every 20 minutes
40+
await asyncio.sleep(15 * 60)
41+
42+
43+
openai.api_base = get_config("openai", "api_base", str, required=True)
44+
openai.api_type = "azure_ad"
45+
openai.api_version = "2023-05-15"
46+
logger.info(f"Using Aure private service ({openai.api_base})")
47+
loop.create_task(refresh_oai_token_background())
48+
49+
OAI_GPT_DEPLOY_ID = get_config("openai", "gpt_deploy_id", str, required=True)
50+
OAI_GPT_MAX_TOKENS = get_config("openai", "gpt_max_tokens", int, required=True)
51+
OAI_GPT_MODEL = get_config(
52+
"openai", "gpt_model", str, default="gpt-3.5-turbo", required=True
53+
)
54+
logger.info(
55+
f'Using OpenAI ADA model "{OAI_GPT_MODEL}" ({OAI_GPT_DEPLOY_ID}) with {OAI_GPT_MAX_TOKENS} tokens max'
56+
)
57+
58+
OAI_ADA_DEPLOY_ID = get_config("openai", "ada_deploy_id", str, required=True)
59+
OAI_ADA_MAX_TOKENS = get_config("openai", "ada_max_tokens", int, required=True)
60+
OAI_ADA_MODEL = get_config(
61+
"openai", "ada_model", str, default="text-embedding-ada-002", required=True
62+
)
63+
logger.info(
64+
f'Using OpenAI ADA model "{OAI_ADA_MODEL}" ({OAI_ADA_DEPLOY_ID}) with {OAI_ADA_MAX_TOKENS} tokens max'
65+
)
66+
67+
68+
class OpenAI:
69+
@retry(
70+
reraise=True,
71+
stop=stop_after_attempt(3),
72+
wait=wait_random_exponential(multiplier=0.5, max=30),
73+
)
74+
async def vector_from_text(self, prompt: str, user_id: UUID) -> List[float]:
75+
logger.debug(f"Getting vector for text: {prompt}")
76+
try:
77+
res = openai.Embedding.create(
78+
deployment_id=OAI_ADA_DEPLOY_ID,
79+
input=prompt,
80+
model=OAI_ADA_MODEL,
81+
user=user_id.hex,
82+
)
83+
except openai.error.AuthenticationError as e:
84+
logger.exception(e)
85+
return []
86+
87+
return res.data[0].embedding
88+
89+
@retry(
90+
reraise=True,
91+
stop=stop_after_attempt(3),
92+
wait=wait_random_exponential(multiplier=0.5, max=30),
93+
)
94+
async def completion(self, messages: List[Dict[str, str]], current_user: UserModel) -> Union[str, None]:
95+
try:
96+
# Use chat completion to get a more natural response and lower the usage cost
97+
completion = openai.ChatCompletion.create(
98+
deployment_id=OAI_GPT_DEPLOY_ID,
99+
messages=messages,
100+
model=OAI_GPT_MODEL,
101+
presence_penalty=1, # Increase the model's likelihood to talk about new topics
102+
user=hash_token(current_user.id.bytes).hex,
103+
)
104+
content = completion["choices"][0].message.content
105+
except openai.error.AuthenticationError as e:
106+
logger.exception(e)
107+
return
108+
109+
return content
110+
111+
@retry(
112+
reraise=True,
113+
stop=stop_after_attempt(3),
114+
wait=wait_random_exponential(multiplier=0.5, max=30),
115+
)
116+
async def completion_stream(self, messages: List[Dict[str, str]], current_user: UserModel) -> AsyncGenerator[Any, None]:
117+
try:
118+
# Use chat completion to get a more natural response and lower the usage cost
119+
chunks = openai.ChatCompletion.create(
120+
deployment_id=OAI_GPT_DEPLOY_ID,
121+
messages=messages,
122+
model=OAI_GPT_MODEL,
123+
presence_penalty=1, # Increase the model's likelihood to talk about new topics
124+
stream=True,
125+
user=hash_token(current_user.id.bytes).hex,
126+
)
127+
except openai.error.AuthenticationError as e:
128+
logger.exception(e)
129+
return
130+
131+
for chunk in chunks:
132+
content = chunk["choices"][0].get("delta", {}).get("content")
133+
if content is not None:
134+
yield content

0 commit comments

Comments
 (0)