halloween2025/server.py at main · Lumorti/halloween2025 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import socket
import threading
import io
import numpy as np
import soundfile as sf
import whisper
import lmstudio as lms

# Settings
SAMPLE_RATE = 16000
CHANNELS = 1
BYTES_PER_SAMPLE = 2
SECONDS_PER_CHUNK = 10
PROMPT = "You are an AI designed to determine character flaws and insecurities of the person speaking. Based on the user's speech, output a comma-separated list of any flaws or insecurities. Keep responses short."
MODEL_TEXT_NAME = "qwen/qwen3-4b-2507"
MODEL_AUDIO_NAME = "small"
HOST = "0.0.0.0"
PORT = 5000
NO_SPEECH_THRESHOLD = 0.4
MAX_HISTORY_MESSAGES = 4

# DEBUG
# SECONDS_PER_CHUNK = 1
# NO_SPEECH_THRESHOLD = 0.9

# Globals
model_text = None
model_audio = None
history = {"messages": [{"role": "system", "content": PROMPT}]}

# For each client
def handle_client(conn, addr):
    print(f"[+] Connected by {addr}")
    buffer = bytearray()
    try:

        # Keep recieving data
        while True:
            data = conn.recv(10 * 4096)
            if not data:
                break
            buffer.extend(data)

            # How much to process at once
            bytesToProcess = SAMPLE_RATE * CHANNELS * BYTES_PER_SAMPLE * SECONDS_PER_CHUNK

            # If we have way too much buffer, truncate
            if len(buffer) > 4 * bytesToProcess:
                print(f"[!] Warning: Truncating buffer from {len(buffer)} to {bytesToProcess}")
                buffer = buffer[-bytesToProcess:]

            # Once we have accumulated some seconds of audio, process it
            if len(buffer) >= bytesToProcess:
                audio_chunk = bytes(buffer[:bytesToProcess])
                buffer = buffer[bytesToProcess:]

                # Convert PCM16 → float32 numpy array
                audio_np = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
                print("Processing buffer of length:", len(audio_chunk), "(remaining:", len(buffer), ")")
                result = model_audio.transcribe(audio_np, fp16=False)
                text = result.get("text", "").strip()
                noSpeechProb = result["segments"][-1]["no_speech_prob"] if result.get("segments") else 1.0
                language = result.get("language", "unknown")
                print(text, noSpeechProb, language)

                # If the text is not empty
                if len(text.strip()) > 0 and noSpeechProb < NO_SPEECH_THRESHOLD and language == "en":

                    # Clear if there are too many
                    if len(history["messages"]) > MAX_HISTORY_MESSAGES:
                        history["messages"] = history["messages"][-MAX_HISTORY_MESSAGES:]

                    # First message is always the prompt
                    history["messages"][0] = {"role": "system", "content": PROMPT}

                    # Add to the message log
                    history["messages"].append({"role": "user", "content": text})

                    # Ask the AI
                    print(f"Person: {text}")
                    response = model_text.respond(history, config={
                        "maxTokens": 50,
                    })
                    print(f"Raw response: {response}")

                    # Process the response
                    response = str(response).strip()
                    response = ''.join([c for c in response if ord(c) < 128])
                    response = response.replace("\n", " ")
                    response = response.replace(".", ",")
                    response = response.replace("!", ",")
                    response = response.replace("_", " ")
                    response = response.replace("  ", " ")
                    response = response.replace("*", "")
                    split = response.split(",")
                    response = ""
                    for s in split:
                        s = s.strip()
                        if len(s) > 0:
                            if len(response) + len(s) <= 100:
                                print("adding ", s)
                                if len(response) > 0:
                                    response += ", "
                                response += s
                            else:
                                break
                    response = response.strip()
                    response = response.upper()
                    print(f"Parsed response: {response}")

                    # Add to the message log
                    history["messages"].append({"role": "assistant", "content": response})

                    # Send response back to client
                    if response is not None:
                        conn.sendall((str(response) + "\n").encode("utf-8"))

    except Exception as e:
        print(f"[!] Error with {addr}: {e}")
    finally:
        conn.close()
        print(f"[-] Disconnected {addr}")

# Called on run
def main():
    global model_text, model_audio

    # Load the LLM model
    print(f"[*] Loading LLM model ({MODEL_TEXT_NAME})")
    model_text = lms.llm(MODEL_TEXT_NAME)
    print("[*] Model loaded")

    # Load the Whisper model
    print(f"[*] Loading Whisper model ({MODEL_AUDIO_NAME})")
    model_audio = whisper.load_model(MODEL_AUDIO_NAME)
    print("[*] Model loaded")

    # Start the server
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind((HOST, PORT))
        s.listen()
        print(f"[*] Listening on {HOST}:{PORT}")
        while True:
            conn, addr = s.accept()
            threading.Thread(target=handle_client, args=(conn, addr), daemon=True).start()

# Standard Python
if __name__ == "__main__":
    main()