-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstall.py
More file actions
170 lines (141 loc) · 4.73 KB
/
install.py
File metadata and controls
170 lines (141 loc) · 4.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import os
import sys
import subprocess
import platform
import shutil
# --- CONFIGURATION ---
MODELS = {
"1": {
"name": "Peridot Lite (Phi-3 Mini)",
"repo": "microsoft/Phi-3-mini-4k-instruct-gguf",
"filename": "Phi-3-mini-4k-instruct-q4.gguf",
"min_vram": 2,
"desc": "Fastest. Best for Intel UHD, Iris Xe, or non-NVIDIA GPUs.",
},
"2": {
"name": "Peridot Standard (Llama-3 8B Quantized)",
"repo": "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF",
"filename": "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
"min_vram": 6,
"desc": "Balanced. The Gold Standard for RTX 3060/4060/5050.",
},
"3": {
"name": "Peridot Pro (Mistral 7B v0.3)",
"repo": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3.Q6_K.gguf",
"min_vram": 10,
"desc": "High Fidelity. Requires 12GB+ VRAM (RTX 3080/4090).",
},
}
def check_engine_installed():
"""Checks if the Inference Engine is already working."""
try:
import llama_cpp
return True
except ImportError:
return False
def install_deps(has_nvidia):
print(">> [1/4] Checking Core Dependencies...")
# Base packages (install only if missing)
pkgs = [
"huggingface_hub",
"requests",
"colorama",
"pynvml",
"psutil",
"sounddevice",
"numpy",
"pillow",
"flask",
"flask-cors",
]
subprocess.check_call([sys.executable, "-m", "pip", "install"] + pkgs)
# Llama-cpp-python (Hardware Accelerated)
print(">> [2/4] Verifying Inference Engine...")
if check_engine_installed():
print(" [SKIP] Engine already installed. Skipping to prevent conflicts.")
return
if has_nvidia:
print(" [GPU] NVIDIA Detected. Attempting binary install...")
# Use --prefer-binary to avoid compilation errors
cmd = [
sys.executable,
"-m",
"pip",
"install",
"llama-cpp-python",
"--prefer-binary",
"--extra-index-url",
"https://abetlen.github.io/llama-cpp-python/whl/cu124",
"--no-cache-dir",
]
try:
subprocess.check_call(cmd)
except:
print(" [WARN] CUDA 12.4 install failed. Trying CUDA 12.1 fallback...")
cmd[6] = "https://abetlen.github.io/llama-cpp-python/whl/cu121"
subprocess.check_call(cmd)
else:
print(" [CPU] No NVIDIA GPU. Installing CPU backend...")
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "llama-cpp-python"]
)
def detect_gpu():
try:
import pynvml
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
name = pynvml.nvmlDeviceGetName(handle)
return {"name": name, "vram": round(mem.total / (1024**3), 1)}
except:
return None
def main():
print(f"\n{'='*50}")
print(" PERIDOT SOVEREIGN KERNEL | SETUP WIZARD")
print(f"{'='*50}\n")
# 1. Hardware Scan
print(">> Scanning Hardware...")
gpu = detect_gpu()
rec_model = "1"
if gpu:
print(f" [DETECTED] GPU: {gpu['name']} ({gpu['vram']} GB VRAM)")
if gpu["vram"] >= 10:
rec_model = "3"
elif gpu["vram"] >= 6:
rec_model = "2"
else:
print(" [DETECTED] Integrated Graphics / CPU Mode")
print(" [NOTE] Running in Lite Mode.")
# 2. Install Deps
install_deps(has_nvidia=bool(gpu))
# 3. Model Selection
print("\n>> Select Inference Engine:")
for k, v in MODELS.items():
tag = " <--- RECOMMENDED" if k == rec_model else ""
print(f" [{k}] {v['name']}{tag}")
print(f" {v['desc']}")
choice = input("\n Enter Choice (1-3): ").strip()
if choice not in MODELS:
choice = rec_model
selected = MODELS[choice]
print(f"\n>> [3/4] Downloading {selected['name']}...")
from huggingface_hub import hf_hub_download
os.makedirs("models", exist_ok=True)
try:
path = hf_hub_download(
selected["repo"], selected["filename"], local_dir="models"
)
# Rename to generic 'brain.gguf' so server.py finds it
dest = os.path.join("models", "brain.gguf")
if os.path.exists(dest):
os.remove(dest)
os.rename(path, dest)
print(f"\n[SUCCESS] Engine Installed: {selected['filename']}")
print(f"{'='*50}")
print("SETUP COMPLETE. Run 'python launcher.py' to begin.")
print(f"{'='*50}")
except Exception as e:
print(f"[ERROR] Download failed: {e}")
if __name__ == "__main__":
main()