feat: add llama-cpp service, lsp-ai/llm-ls, and hypridle

aster-void · claude · aster-void · commit ecf209d57ffe · 2025-12-08T12:39:32.000+09:00
- llama-cpp: add NixOS service with Vulkan support - auto-download Qwen2.5-Coder-7B-Instruct via fetchurl - configurable via my.profiles.development.llama-cpp.enable - helix: configure lsp-ai and llm-ls for local llama-cpp backend - hypridle: lock screen at 5min, screen off at 15min 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/home-manager/global/dev.nix b/home-manager/global/dev.nix
@@ -99,6 +99,7 @@
 
       # General LSPs
       lsp-ai
+      llm-ls
       helix-gpt
     ])
     # mcp servers
diff --git a/home-manager/global/services/default.nix b/home-manager/global/services/default.nix
@@ -1,5 +1,6 @@
 {
   imports = [
+    ./hypridle.nix
     ./syncthing.nix
   ];
 
diff --git a/home-manager/global/services/hypridle.nix b/home-manager/global/services/hypridle.nix
@@ -0,0 +1,26 @@
+{
+  services.hypridle = {
+    enable = true;
+    settings = {
+      general = {
+        lock_cmd = "pidof hyprlock || hyprlock";
+        before_sleep_cmd = "loginctl lock-session";
+        after_sleep_cmd = "hyprctl dispatch dpms on";
+      };
+
+      listener = [
+        # 5分 (300秒) でロック
+        {
+          timeout = 300;
+          on-timeout = "loginctl lock-session";
+        }
+        # 15分 (900秒) で画面オフ
+        {
+          timeout = 900;
+          on-timeout = "hyprctl dispatch dpms off";
+          on-resume = "hyprctl dispatch dpms on";
+        }
+      ];
+    };
+  };
+}
diff --git a/nixos/profiles/default.nix b/nixos/profiles/default.nix
@@ -15,9 +15,16 @@
         type = lib.types.bool;
         default = false;
       };
-      development .enable = lib.mkOption {
-        type = lib.types.bool;
-        default = true;
+      development = {
+        enable = lib.mkOption {
+          type = lib.types.bool;
+          default = true;
+        };
+        llama-cpp.enable = lib.mkOption {
+          type = lib.types.bool;
+          default = true;
+          description = "Enable llama-cpp server with Vulkan support";
+        };
       };
     };
   };
diff --git a/nixos/profiles/development/default.nix b/nixos/profiles/development/default.nix
@@ -2,5 +2,6 @@
   imports = [
     ./virtualization.nix
     ./packages.nix
+    ./llama-cpp.nix
   ];
 }
diff --git a/nixos/profiles/development/llama-cpp.nix b/nixos/profiles/development/llama-cpp.nix
@@ -0,0 +1,51 @@
+{
+  lib,
+  pkgs,
+  config,
+  ...
+}: let
+  cfg = config.my.profiles.development;
+  llamaVulkan = pkgs.llama-cpp.override {vulkanSupport = true;};
+
+  # Qwen2.5-Coder-7B-Instruct Q4_K_M (4.68 GB)
+  # https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF
+  model = pkgs.fetchurl {
+    url = "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf";
+    hash = "sha256-UJKH94y01M9rOENzRzO5FLLBWOQ+Iqf0v16WOACJTTw=";
+  };
+in {
+  config = lib.mkIf (cfg.enable && cfg.llama-cpp.enable) {
+    services.llama-cpp = {
+      enable = true;
+      package = llamaVulkan;
+      host = "127.0.0.1";
+      port = 11434;
+      model = "${model}";
+      openFirewall = false;
+      extraFlags = [
+        "--ctx-size"
+        "4096"
+        "--threads"
+        "12"
+        "--batch-size"
+        "256"
+        "--ubatch-size"
+        "32"
+        "--n-gpu-layers"
+        "22"
+        "--flash-attn"
+        "--cont-batching"
+        "--temp"
+        "0.15"
+        "--top-p"
+        "0.9"
+        "--top-k"
+        "40"
+        "--repeat-penalty"
+        "1.05"
+      ];
+    };
+
+    hardware.graphics.enable = true;
+  };
+}
diff --git a/stow/.config/helix/languages.toml b/stow/.config/helix/languages.toml
@@ -10,12 +10,34 @@ deno = { command = "deno", args = ["lsp"] }
 lsp-ai.command = "lsp-ai"
 helix-gpt.command = "helix-gpt"
 
+# lsp-ai: ローカル llama-cpp バックエンド
 [language-server.lsp-ai.config.models.model1]
 type = "open_ai"
-chat_endpoint = "https://api.groq.com/openai/v1/chat/completions"
-model = "llama3-70b-8192"
+chat_endpoint = "http://127.0.0.1:11434/v1/chat/completions"
+model = "qwen2.5-coder-7b-instruct-q4_k_m"
 max_requests_per_second = 1
-auth_token_env_var_name = "GROQ_API_KEY"
+
+# llm-ls: ローカル llama-cpp バックエンド (コード補完用)
+[language-server.llm-ls]
+command = "llm-ls"
+
+[language-server.llm-ls.config]
+backend = "llama_cpp"
+url = "http://127.0.0.1:11434"
+model = "qwen2.5-coder-7b-instruct-q4_k_m"
+context_window = 4096
+tls_skip_verify_insecure = false
+
+[language-server.llm-ls.config.fim]
+enabled = true
+prefix = "<|fim_prefix|>"
+middle = "<|fim_middle|>"
+suffix = "<|fim_suffix|>"
+
+[language-server.llm-ls.config.request_body]
+max_tokens = 128
+temperature = 0.15
+top_p = 0.9
 
 [[language]]
 name = "nix"

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`imports = [`
	`3`	`+ ./hypridle.nix`
`3`	`4`	`./syncthing.nix`
`4`	`5`	`];`
`5`	`6`
Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,6 @@`
`2`	`2`	`imports = [`
`3`	`3`	`./virtualization.nix`
`4`	`4`	`./packages.nix`
	`5`	`+ ./llama-cpp.nix`
`5`	`6`	`];`
`6`	`7`	`}`