Fix #57: Make triton and flash-attn dependencies Linux-specific and update README.

SoulSniper1212 · SoulSniper1212 · commit 0e7525856864 · 2025-11-03T20:04:50.000-05:00
diff --git a/README.md b/README.md
@@ -16,32 +16,40 @@ A lightweight vLLM implementation built from scratch.
 * 📖 **Readable codebase** - Clean implementation in ~ 1,200 lines of Python code
 * ⚡ **Optimization Suite** - Prefix caching, Tensor Parallelism, Torch compilation, CUDA graph, etc.
 
+## Requirements
+
+- **OS:** Linux
+- **GPU:** NVIDIA GPU with CUDA support
+- **Python:** 3.10 - 3.12
+
+`nano-vllm` relies on `triton` and `flash-attn` for high-performance custom CUDA kernels. These packages are currently only available on Linux platforms with NVIDIA GPUs.
+
 ## Installation
 
-```bash
+
 pip install git+https://github.com/GeeeekExplorer/nano-vllm.git
-```
+
 
 ## Model Download
 
 To download the model weights manually, use the following command:
-```bash
+
 huggingface-cli download --resume-download Qwen/Qwen3-0.6B \
   --local-dir ~/huggingface/Qwen3-0.6B/ \
   --local-dir-use-symlinks False
-```
+
 
 ## Quick Start
 
 See `example.py` for usage. The API mirrors vLLM's interface with minor differences in the `LLM.generate` method:
-```python
+
 from nanovllm import LLM, SamplingParams
 llm = LLM("/YOUR/MODEL/PATH", enforce_eager=True, tensor_parallel_size=1)
 sampling_params = SamplingParams(temperature=0.6, max_tokens=256)
 prompts = ["Hello, Nano-vLLM."]
 outputs = llm.generate(prompts, sampling_params)
 outputs[0]["text"]
-```
+
 
 ## Benchmark
 
@@ -63,4 +71,4 @@ See `bench.py` for benchmark.
 
 ## Star History
 
-[![Star History Chart](https://api.star-history.com/svg?repos=GeeeekExplorer/nano-vllm&type=Date)](https://www.star-history.com/#GeeeekExplorer/nano-vllm&Date)
+[
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "nano-vllm"
-version = "0.2.0"
+version = "0.2.1"
 authors = [{ name = "Xingkai Yu" }]
 license = "MIT"
 license-files = ["LICENSE"]
@@ -13,15 +13,15 @@ description = "a lightweight vLLM implementation built from scratch"
 requires-python = ">=3.10,<3.13"
 dependencies = [
     "torch>=2.4.0",
-    "triton>=3.0.0",
     "transformers>=4.51.0",
-    "flash-attn",
     "xxhash",
+    "triton>=3.0.0; sys_platform == 'linux'",
+    "flash-attn; sys_platform == 'linux'",
 ]
 
 [project.urls]
 Homepage="https://github.com/GeeeekExplorer/nano-vllm"
 
 [tool.setuptools.packages.find]
 where = ["."]
-include = ["nanovllm*"]
+include = ["nanovllm*"]