vllm-project
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/helm-lint.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/helm-lint.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/helm-release.yml‎
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/helm-release.yml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎.github/workflows/matchers/actionlint.json‎
Lines changed: 17 additions & 0 deletions b/‎.github/workflows/matchers/actionlint.json‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/workflows/pre-commit.yml‎
Lines changed: 17 additions & 0 deletions b/‎.github/workflows/pre-commit.yml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.markdownlint.yaml‎
Lines changed: 5 additions & 0 deletions b/‎.markdownlint.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 45 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 18 additions & 9 deletions b/‎README.md‎
Lines changed: 18 additions & 9 deletions
diff --git a/‎helm/README.md‎
Lines changed: 3 additions & 3 deletions b/‎helm/README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎helm/ct.yaml‎
Lines changed: 1 addition & 1 deletion b/‎helm/ct.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -28,7 +28,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: '3.8'
 
 
@@ -23,4 +23,3 @@ jobs:
       - name: Lint open-webui Helm Chart
         run: |
           helm lint ./helm
-
@@ -24,7 +24,7 @@ jobs:
           git config user.name "$GITHUB_ACTOR"
           git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
 
-      # Could add Prometheus as a dependent chart here if desired    
+      # Could add Prometheus as a dependent chart here if desired
       # - name: Add Dependency Repos
       #   run: |
       #     helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
@@ -52,6 +52,5 @@ jobs:
               break
             fi
             REPO=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')
-            helm push "${pkg}" oci://ghcr.io/$REPO
+            helm push "${pkg}" "oci://ghcr.io/$REPO"
           done
-
@@ -0,0 +1,17 @@
+{
+    "problemMatcher": [
+      {
+        "owner": "actionlint",
+        "pattern": [
+          {
+            "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
+            "file": 1,
+            "line": 2,
+            "column": 3,
+            "message": 4,
+            "code": 5
+          }
+        ]
+      }
+    ]
+  }
@@ -0,0 +1,17 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+    - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+      with:
+        python-version: "3.12"
+    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
@@ -0,0 +1,5 @@
+MD013: false # line-length
+MD028: false # no-blanks-blockquote
+MD029: # ol-prefix
+  style: ordered
+MD033: false # no-inline-html
@@ -0,0 +1,45 @@
+repos:
+- repo: https://github.com/rhysd/actionlint
+  rev: v1.7.7
+  hooks:
+  - id: actionlint
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v5.0.0
+  hooks:
+  - id: check-json
+  - id: check-toml
+  - id: check-yaml
+    exclude: ^helm/templates/
+  - id: end-of-file-fixer
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+# TODO: Enable these hooks when environment issues are resolved
+# - repo: https://github.com/hadolint/hadolint
+#   rev: v2.12.0
+#   hooks:
+#   - id: hadolint
+# - repo: https://github.com/gruntwork-io/pre-commit
+#   rev: v0.1.25
+#   hooks:
+#   - id: helmlint
+- repo: https://github.com/psf/black
+  rev: '25.1.0'
+  hooks:
+  - id: black
+- repo: https://github.com/pycqa/isort
+  rev: '6.0.0'
+  hooks:
+  - id: isort
+# TODO: Enable this hook when environment issues are resolved
+# - repo: https://github.com/koalaman/shellcheck-precommit
+#   rev: v0.10.0
+#   hooks:
+#   - id: shellcheck
+- repo: https://github.com/igorshubovych/markdownlint-cli
+  rev: v0.44.0
+  hooks:
+  - id: markdownlint
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.4.1
+  hooks:
+  - id: codespell
@@ -1,13 +1,12 @@
-# vLLM Production Stack: reference stack for production vLLM deployment 
-
+# vLLM Production Stack: reference stack for production vLLM deployment
 
 **vLLM Production Stack** project provides a reference implementation on how to build an inference stack on top of vLLM, which allows you to:
 
 - 🚀 Scale from single vLLM instance to distributed vLLM deployment without changing any application code
 - 💻 Monitor the  through a web dashboard
 - 😄 Enjoy the performance benefits brought by request routing and KV cache offloading
 
-## Latest News:
+## Latest News
 
 - 🔥 vLLM Production Stack is released! Checkout our [release blogs](https://blog.lmcache.ai/2025-01-21-stack-release) [01-22-2025]
 - ✨Join us at #production-stack channel of vLLM [slack](https://slack.vllm.ai/), LMCache [slack](https://join.slack.com/t/lmcacheworkspace/shared_invite/zt-2viziwhue-5Amprc9k5hcIdXT7XevTaQ), or fill out this [interest form](https://forms.gle/wSoeNpncmPVdXppg8) for a chat!
@@ -20,7 +19,6 @@ The stack is set up using [Helm](https://helm.sh/docs/), and contains the follow
 - **Request router**: Directs requests to appropriate backends based on routing keys or session IDs to maximize KV cache reuse.
 - **Observability stack**: monitors the metrics of the backends through [Prometheus](https://github.com/prometheus/prometheus) + [Grafana](https://grafana.com/)
 
-
  <img src="https://github.com/user-attachments/assets/8f05e7b9-0513-40a9-9ba9-2d3acca77c0c" alt="Architecture of the stack" width="800"/>
 
 ## Roadmap
@@ -42,6 +40,7 @@ We are actively working on this project and will release the following features
 ### Deployment
 
 vLLM Production Stack can be deployed via helm charts. Clone the repo to local and execute the following commands for a minimal deployment:
+
 ```bash
 git clone https://github.com/vllm-project/production-stack.git
 cd production-stack/
@@ -55,21 +54,18 @@ To validate the installation and and send query to the stack, refer to [this tut
 
 For more information about customizing the helm chart, please refer to [values.yaml](https://github.com/vllm-project/production-stack/blob/main/helm/values.yaml) and our other [tutorials](https://github.com/vllm-project/production-stack/tree/main/tutorials).
 
-
 ### Uninstall
 
 ```bash
 sudo helm uninstall vllm
 ```
 
-
 ## Grafana Dashboard
 
 ### Features
 
 The Grafana dashboard provides the following insights:
 
-
 1. **Available vLLM Instances**: Displays the number of healthy instances.
 2. **Request Latency Distribution**: Visualizes end-to-end request latency.
 3. **Time-to-First-Token (TTFT) Distribution**: Monitors response times for token generation.
@@ -98,7 +94,6 @@ The router ensures efficient request distribution among backends. It supports:
   - Session-ID based routing
   - (WIP) prefix-aware routing
 
-
 ## Contributing
 
 Contributions are welcome! Please follow the standard GitHub flow:
@@ -107,11 +102,25 @@ Contributions are welcome! Please follow the standard GitHub flow:
 2. Create a feature branch.
 3. Submit a pull request with detailed descriptions.
 
+We use `pre-commit` for formatting, it is installed as follows:
+
+```bash
+pip install -r requirements-lint.txt
+pre-commit install
+```
+
+It will run automatically before every commit. You cana also run it manually on all files with:
+
+```bash
+pre-commit run --all-files
+```
+
+> You can read more about `pre-commit` at <https://pre-commit.com>.
+
 ## License
 
 This project is licensed under the MIT License. See the `LICENSE` file for details.
 
 ---
 
 For any issues or questions, feel free to open an issue or contact the maintainers.
-
 
@@ -2,14 +2,14 @@
 
 This helm chart lets users deploy multiple serving engines and a router into the Kubernetes cluster.
 
-## Key features:
+## Key features
 
 - Support running multiple serving engines with multiple different models
-- Load the model weights directly from the existing PersistentVolumes 
+- Load the model weights directly from the existing PersistentVolumes
 
 ## Prerequisites
 
-1. A running Kubernetes cluster with GPU. (You can set it up through `minikube`: https://minikube.sigs.k8s.io/docs/tutorials/nvidia/)
+1. A running Kubernetes cluster with GPU. (You can set it up through `minikube`: <https://minikube.sigs.k8s.io/docs/tutorials/nvidia/>)
 2. [Helm](https://helm.sh/docs/intro/install/)
 
 ## Install the helm chart
 
@@ -1,3 +1,3 @@
 chart-dirs:
   - charts
-validate-maintainers: false
+validate-maintainers: false