-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
150 lines (126 loc) · 5.47 KB
/
server.py
File metadata and controls
150 lines (126 loc) · 5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import socket
import threading
import io
import numpy as np
import soundfile as sf
import whisper
import lmstudio as lms
# Settings
SAMPLE_RATE = 16000
CHANNELS = 1
BYTES_PER_SAMPLE = 2
SECONDS_PER_CHUNK = 10
PROMPT = "You are an AI designed to determine character flaws and insecurities of the person speaking. Based on the user's speech, output a comma-separated list of any flaws or insecurities. Keep responses short."
MODEL_TEXT_NAME = "qwen/qwen3-4b-2507"
MODEL_AUDIO_NAME = "small"
HOST = "0.0.0.0"
PORT = 5000
NO_SPEECH_THRESHOLD = 0.4
MAX_HISTORY_MESSAGES = 4
# DEBUG
# SECONDS_PER_CHUNK = 1
# NO_SPEECH_THRESHOLD = 0.9
# Globals
model_text = None
model_audio = None
history = {"messages": [{"role": "system", "content": PROMPT}]}
# For each client
def handle_client(conn, addr):
print(f"[+] Connected by {addr}")
buffer = bytearray()
try:
# Keep recieving data
while True:
data = conn.recv(10 * 4096)
if not data:
break
buffer.extend(data)
# How much to process at once
bytesToProcess = SAMPLE_RATE * CHANNELS * BYTES_PER_SAMPLE * SECONDS_PER_CHUNK
# If we have way too much buffer, truncate
if len(buffer) > 4 * bytesToProcess:
print(f"[!] Warning: Truncating buffer from {len(buffer)} to {bytesToProcess}")
buffer = buffer[-bytesToProcess:]
# Once we have accumulated some seconds of audio, process it
if len(buffer) >= bytesToProcess:
audio_chunk = bytes(buffer[:bytesToProcess])
buffer = buffer[bytesToProcess:]
# Convert PCM16 → float32 numpy array
audio_np = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
print("Processing buffer of length:", len(audio_chunk), "(remaining:", len(buffer), ")")
result = model_audio.transcribe(audio_np, fp16=False)
text = result.get("text", "").strip()
noSpeechProb = result["segments"][-1]["no_speech_prob"] if result.get("segments") else 1.0
language = result.get("language", "unknown")
print(text, noSpeechProb, language)
# If the text is not empty
if len(text.strip()) > 0 and noSpeechProb < NO_SPEECH_THRESHOLD and language == "en":
# Clear if there are too many
if len(history["messages"]) > MAX_HISTORY_MESSAGES:
history["messages"] = history["messages"][-MAX_HISTORY_MESSAGES:]
# First message is always the prompt
history["messages"][0] = {"role": "system", "content": PROMPT}
# Add to the message log
history["messages"].append({"role": "user", "content": text})
# Ask the AI
print(f"Person: {text}")
response = model_text.respond(history, config={
"maxTokens": 50,
})
print(f"Raw response: {response}")
# Process the response
response = str(response).strip()
response = ''.join([c for c in response if ord(c) < 128])
response = response.replace("\n", " ")
response = response.replace(".", ",")
response = response.replace("!", ",")
response = response.replace("_", " ")
response = response.replace(" ", " ")
response = response.replace("*", "")
split = response.split(",")
response = ""
for s in split:
s = s.strip()
if len(s) > 0:
if len(response) + len(s) <= 100:
print("adding ", s)
if len(response) > 0:
response += ", "
response += s
else:
break
response = response.strip()
response = response.upper()
print(f"Parsed response: {response}")
# Add to the message log
history["messages"].append({"role": "assistant", "content": response})
# Send response back to client
if response is not None:
conn.sendall((str(response) + "\n").encode("utf-8"))
except Exception as e:
print(f"[!] Error with {addr}: {e}")
finally:
conn.close()
print(f"[-] Disconnected {addr}")
# Called on run
def main():
global model_text, model_audio
# Load the LLM model
print(f"[*] Loading LLM model ({MODEL_TEXT_NAME})")
model_text = lms.llm(MODEL_TEXT_NAME)
print("[*] Model loaded")
# Load the Whisper model
print(f"[*] Loading Whisper model ({MODEL_AUDIO_NAME})")
model_audio = whisper.load_model(MODEL_AUDIO_NAME)
print("[*] Model loaded")
# Start the server
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((HOST, PORT))
s.listen()
print(f"[*] Listening on {HOST}:{PORT}")
while True:
conn, addr = s.accept()
threading.Thread(target=handle_client, args=(conn, addr), daemon=True).start()
# Standard Python
if __name__ == "__main__":
main()