sipeed
diff --git a/‎.claude/settings.local.json‎
Lines changed: 74 additions & 0 deletions b/‎.claude/settings.local.json‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 50 additions & 0 deletions b/‎.github/workflows/build.yml‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 171 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 171 additions & 0 deletions
@@ -0,0 +1,74 @@
+{
+  "permissions": {
+    "allow": [
+      "WebSearch",
+      "WebFetch(domain:trends.google.com)",
+      "WebFetch(domain:ollama.com)",
+      "WebFetch(domain:www.remio.ai)",
+      "WebFetch(domain:www.notebookcheck.net)",
+      "WebFetch(domain:linustechtips.com)",
+      "WebFetch(domain:bay41.com)",
+      "WebFetch(domain:gigcitygeek.com)",
+      "WebFetch(domain:llm-tracker.info)",
+      "WebFetch(domain:www.houmoai.com)",
+      "Bash(python3:*)",
+      "WebFetch(domain:www.jeremymorgan.com)",
+      "WebFetch(domain:itsfoss.com)",
+      "WebFetch(domain:www.stratosphereips.org)",
+      "WebFetch(domain:blog.csdn.net)",
+      "WebFetch(domain:tinycomputers.io)",
+      "WebFetch(domain:medium.com)",
+      "WebFetch(domain:zhuanlan.zhihu.com)",
+      "WebFetch(domain:singhajit.com)",
+      "WebFetch(domain:developer.ridgerun.com)",
+      "WebFetch(domain:like2byte.com)",
+      "WebFetch(domain:developer.nvidia.com)",
+      "WebFetch(domain:www.dfrobot.com)",
+      "WebFetch(domain:github.com)",
+      "WebFetch(domain:blog.olares.com)",
+      "WebFetch(domain:hothardware.com)",
+      "WebFetch(domain:www.hardware-corner.net)",
+      "WebFetch(domain:www.pugetsystems.com)",
+      "WebFetch(domain:jetsonhacks.com)",
+      "WebFetch(domain:forums.macrumors.com)",
+      "WebFetch(domain:nikolasent.github.io)",
+      "WebFetch(domain:siliconbench.radicchio.page)",
+      "WebFetch(domain:hyperpc.ae)",
+      "WebFetch(domain:seanvosler.medium.com)",
+      "WebFetch(domain:community.frame.work)",
+      "WebFetch(domain:nishtahir.com)",
+      "WebFetch(domain:lmsys.org)",
+      "WebFetch(domain:arxiv.org)",
+      "Bash(ls:*)",
+      "Bash(grep:*)",
+      "Bash(python3 -c \":*)",
+      "WebFetch(domain:www.reddit.com)",
+      "Bash(python scripts/build_data.py)",
+      "Bash(sed -i 's/^price_usd: 449$/price_usd: 569/' nvidia-rtx-4060-ti-16gb.md)",
+      "Bash(sed -i 's/^price_usd: 999$/price_usd: 1299/' amd-rx-7900-xtx-24gb.md)",
+      "Bash(sed -i 's/^price_usd: 1200$/price_usd: 1699/' nvidia-rtx-3090-ti-24gb.md)",
+      "Bash(sed -i 's/^price_usd: 1200$/price_usd: 1600/' qualcomm-snapdragon-x-elite-laptop.md)",
+      "Bash(sed -i 's/^price_usd: 1299$/price_usd: 1349/' amd-radeon-ai-pro-r9700-32gb.md)",
+      "WebFetch(domain:www.localscore.ai)",
+      "WebFetch(domain:dev.to)",
+      "WebFetch(domain:gigachadllc.com)",
+      "WebFetch(domain:www.microcenter.com)",
+      "WebFetch(domain:lattice.uptownhr.com)",
+      "WebFetch(domain:deepnewz.com)",
+      "WebFetch(domain:hostbor.com)",
+      "WebFetch(domain:creativestrategies.com)",
+      "WebFetch(domain:www.markus-schall.de)",
+      "WebFetch(domain:www.koyeb.com)",
+      "WebFetch(domain:www.millstoneai.com)",
+      "WebFetch(domain:sparecores.com)",
+      "WebFetch(domain:www.fingon.iki.fi)",
+      "WebFetch(domain:docs.nvidia.com)",
+      "WebFetch(domain:qwen.readthedocs.io)",
+      "WebFetch(domain:openllmbenchmarks.com)",
+      "WebFetch(domain:www.inferless.com)",
+      "WebFetch(domain:blog.silexdata.com)",
+      "WebFetch(domain:www.databasemart.com)",
+      "WebFetch(domain:docs.valdi.ai)"
+    ]
+  }
+}
@@ -0,0 +1,50 @@
+name: Build & Deploy
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  validate-and-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: pip install pyyaml
+
+      - name: Build devices.json
+        run: python scripts/build_data.py
+
+      - name: Upload artifact
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs
+
+  deploy:
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    needs: validate-and-build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
@@ -0,0 +1,171 @@
+# Contributing Guide
+
+Thank you for contributing! Every real-world benchmark helps the community make better purchasing decisions.
+
+## How to Submit
+
+### 1. Fork & Clone
+
+```bash
+git clone https://github.com/sipeed/llmdev.guide.git
+cd llmdev.guide
+```
+
+### 2. Create a Device File
+
+```bash
+cp devices/_template.md devices/your-device-name.md
+```
+
+Naming convention: `vendor-model.md`, lowercase with hyphens. Examples:
+- `nvidia-jetson-orin-nano-8gb.md`
+- `apple-mac-mini-m4-pro-48gb.md`
+- `rockchip-rk3588-16gb.md`
+
+### 3. Fill in the Data
+
+Follow the YAML frontmatter format in the template.
+
+**Required fields:**
+- `id`: Unique identifier (same as filename without `.md`)
+- `name`: Full product name
+- `vendor`: Manufacturer
+- `device_type`: Dev Board / PCIe Card / USB Accelerator / Mini PC / Server / Module
+- `memory_capacity_gb`: Memory capacity in GB
+- `memory_bandwidth_gbs`: Memory bandwidth in GB/s
+- `price_usd`: Reference price in USD
+- `power_watts`: Power consumption under load (W)
+- `benchmarks`: At least one Qwen3.5 model benchmark
+- `submitted_by`: Your GitHub username
+- `date`: Submission date
+
+**Per-benchmark required fields:**
+- `model`: Model name (Qwen3.5-9B / Qwen3.5-27B etc.)
+- `quant`: Quantization (int4 / fp4 / int8 / fp8 / bf16 / f32)
+- `framework`: Inference framework (Ollama / llama.cpp / LM Studio / vendor SDK etc.)
+- `decode_tps`: Output generation speed in tokens/s
+
+**Per-benchmark optional fields:**
+- `prefill_tps`: Prefill speed in tokens/s (if your tool reports it)
+- `context_length`: Context length used during testing
+- `image_encode_ms`: Image encoding time in ms (for vision models)
+
+### 4. How to Benchmark
+
+Choose the method that works best for you:
+
+#### Easy: Chat & Screenshot
+
+Just run the model in Ollama or LM Studio and note the tokens/s displayed:
+
+```bash
+ollama run qwen3.5:9b-q4_K_M
+```
+
+Ask a question that generates a long response. Most tools display the generation speed (tokens/s) at the bottom of the output or in the UI. Screenshot this for your evidence.
+
+#### Standard: Ollama Verbose
+
+```bash
+ollama run qwen3.5:9b-q4_K_M --verbose
+```
+
+This shows both **prompt eval rate** (prefill) and **eval rate** (decode) after each response. Copy these numbers directly.
+
+#### Advanced: llama-bench
+
+```bash
+# Qwen3.5-9B INT4
+llama-bench -m qwen3.5-9b-q4_k_m.gguf -p 512 -n 128
+
+# Qwen3.5-27B INT4 (if your device has enough memory)
+llama-bench -m qwen3.5-27b-q4_k_m.gguf -p 512 -n 128
+```
+
+This gives precise prefill (pp) and decode (tg) speeds with multiple runs averaged.
+
+#### Tips
+
+- **Run the test a few times** and use a representative result (not the first cold run)
+- **Ensure stable thermals**: let the device warm up, avoid thermal throttling
+- **Test early in the conversation** (short context) for the most comparable results
+- If you have a power meter, measure the actual system power draw under load
+
+#### Power Measurement
+
+A USB power meter or wall plug meter is ideal. If not available, use software readings (e.g., `tegrastats` on Jetson, `powermetrics` on Mac) and note the source.
+
+### 5. Provide Evidence
+
+In the markdown body, please include:
+
+- **Test environment**: OS, framework version, model source
+- **Screenshot or log output**: Proving the benchmark numbers are real
+- **Device photo**: At least one photo of the actual device
+
+Images can be uploaded via GitHub Issues and referenced by URL.
+
+### 6. Submit PR
+
+```bash
+git add devices/your-device-name.md
+git commit -m "Add benchmark: Device Name"
+git push origin main
+```
+
+Then create a Pull Request on GitHub.
+
+## Estimation from Other Models
+
+If Qwen3.5 benchmarks are not yet available for your device, you may estimate from other models of **similar architecture and similar size**:
+
+- **Dense → Dense only** (never cross Dense/MoE)
+- **MoE → MoE only** (never cross Dense/MoE)
+- **Use the closest size** — do not estimate across large size gaps
+- **Formula**: `estimated_tps = measured_tps × (source_active_params / target_active_params)`
+- Mark with `estimated: true` and `estimated_from: "description"` in the benchmark entry
+
+Estimated values are displayed with an asterisk (*) on the website.
+
+### Approved Estimation Sources
+
+#### Dense → Dense
+
+| Qwen3.5 Target | Active | Approved Source Models | Source Active | Factor |
+|----------------|--------|-----------------------|---------------|--------|
+| **9B** | 9B | Llama 3.1 8B, Qwen3 8B, Gemma 2 9B, DeepSeek-R1-Distill 8B | 8-9B | ×0.89 ~ ×1.00 |
+| **27B** | 27B | Qwen3 32B, Qwen 2.5 32B, Gemma 2 27B | 27-32B | ×1.00 ~ ×1.19 |
+
+#### MoE → MoE
+
+| Qwen3.5 Target | Active | Approved Source Models | Source Active | Factor |
+|----------------|--------|-----------------------|---------------|--------|
+| **35B-A3B** | 3B | Qwen3 30B-A3B, GPT-OSS-20B (3.6B active) | 3-3.6B | ×1.00 ~ ×1.20 |
+| **122B-A10B** | 10B | GPT-OSS-120B (5.1B active), Mixtral 8x7B (12.9B active) | 5.1-12.9B | ×0.51 ~ ×1.29 |
+| **397B-A17B** | 17B | Qwen3 235B-A22B (22B active), DeepSeek R1 671B (37B active) | 17-37B | ×1.29 ~ ×2.18 |
+
+## Validation
+
+CI will automatically check:
+- YAML frontmatter format
+- Required fields are present
+- Values are within reasonable ranges
+
+Maintainers will manually review evidence for authenticity.
+
+## FAQ
+
+**Q: My device can't run Qwen3.5-27B, what do I do?**
+A: No problem — submit whatever models your device can run. Not being able to run a model is itself valuable information.
+
+**Q: Can I submit data from different frameworks on the same device?**
+A: Yes, add multiple entries in `benchmarks` with different `framework` values.
+
+**Q: I can only see one "tokens/s" number, not separate prefill/decode.**
+A: That's fine — just fill in `decode_tps`. The `prefill_tps` field is optional. If you want both numbers, try `ollama run --verbose` or `llama-bench`.
+
+**Q: Prices fluctuate a lot, what should I put?**
+A: Use the price you paid, or the current mainstream channel price. Note it in the body text.
+
+**Q: I'm not sure about the claimed TOPS figure.**
+A: `tops_int8` is optional. If you fill it in, use `tops_note` to explain the methodology (e.g., "GPU only", "sparse", "GPU+DLA").