Skip to content

Commit ec3a270

Browse files
committed
add fa3_mtp
1 parent 0f7a50e commit ec3a270

File tree

7,508 files changed

+1989915
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

7,508 files changed

+1989915
-1
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
lightllm_kernel/_C.so
2+
**/dist/
3+
**/build/
4+
**/*.egg-info/
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# This workflow will:
2+
# - Create a new Github release
3+
# - Build wheels for supported architectures
4+
# - Deploy the wheels to the Github release
5+
# - Release the static code to PyPi
6+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
7+
8+
name: Build wheels and deploy
9+
10+
on:
11+
create:
12+
tags:
13+
- v*
14+
15+
jobs:
16+
17+
setup_release:
18+
name: Create Release
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Get the tag version
22+
id: extract_branch
23+
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
24+
shell: bash
25+
26+
- name: Create Release
27+
id: create_release
28+
uses: actions/create-release@v1
29+
env:
30+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
with:
32+
tag_name: ${{ steps.extract_branch.outputs.branch }}
33+
release_name: ${{ steps.extract_branch.outputs.branch }}
34+
35+
build_wheels:
36+
name: Build Wheel
37+
needs: setup_release
38+
runs-on: ${{ matrix.os }}
39+
40+
strategy:
41+
fail-fast: false
42+
matrix:
43+
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
44+
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
45+
os: [ubuntu-22.04]
46+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
47+
torch-version: ['2.4.0', '2.5.1', '2.6.0', '2.7.1']
48+
cuda-version: ['12.9.1']
49+
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
50+
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
51+
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
52+
# when building without C++11 ABI and using it on nvcr images.
53+
cxx11_abi: ['FALSE', 'TRUE']
54+
exclude:
55+
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
56+
# Pytorch < 2.5 does not support Python 3.13
57+
- torch-version: '2.4.0'
58+
python-version: '3.13'
59+
60+
steps:
61+
- name: Checkout
62+
uses: actions/checkout@v4
63+
64+
- name: Set up Python
65+
uses: actions/setup-python@v5
66+
with:
67+
python-version: ${{ matrix.python-version }}
68+
69+
- name: Set CUDA and PyTorch versions
70+
run: |
71+
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
72+
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
73+
echo "WHEEL_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1'})" >> $GITHUB_ENV
74+
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
75+
76+
- name: Free up disk space
77+
if: ${{ runner.os == 'Linux' }}
78+
# https://github.com/easimon/maximize-build-space/blob/master/action.yml
79+
# https://github.com/easimon/maximize-build-space/tree/test-report
80+
run: |
81+
sudo rm -rf /usr/share/dotnet
82+
sudo rm -rf /opt/ghc
83+
sudo rm -rf /opt/hostedtoolcache/CodeQL
84+
85+
- name: Set up swap space
86+
if: runner.os == 'Linux'
87+
uses: pierotofy/[email protected]
88+
with:
89+
swap-size-gb: 10
90+
91+
- name: Install CUDA ${{ matrix.cuda-version }}
92+
if: ${{ matrix.cuda-version != 'cpu' }}
93+
uses: Jimver/[email protected]
94+
id: cuda-toolkit
95+
with:
96+
cuda: ${{ matrix.cuda-version }}
97+
linux-local-args: '["--toolkit"]'
98+
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
99+
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
100+
method: 'network'
101+
sub-packages: '["nvcc"]'
102+
103+
- name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
104+
run: |
105+
pip install --upgrade pip
106+
# With python 3.13 and torch 2.5.1, unless we update typing-extensions, we get error
107+
# AttributeError: attribute '__default__' of 'typing.ParamSpec' objects is not writable
108+
pip install typing-extensions==4.12.2
109+
# We want to figure out the CUDA version to download pytorch
110+
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
111+
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
112+
# This code is ugly, maybe there's a better way to do this.
113+
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
114+
minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118}[env['MATRIX_TORCH_VERSION']]; \
115+
maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128}[env['MATRIX_TORCH_VERSION']]; \
116+
print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
117+
)
118+
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
119+
# pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
120+
# Can't use --no-deps because we need cudnn etc.
121+
# Hard-coding this version of pytorch-triton for torch 2.6.0.dev20241001
122+
pip install jinja2
123+
pip install https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
124+
pip install --no-cache-dir --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ matrix.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
125+
else
126+
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
127+
fi
128+
nvcc --version
129+
python --version
130+
python -c "import torch; print('PyTorch:', torch.__version__)"
131+
python -c "import torch; print('CUDA:', torch.version.cuda)"
132+
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
133+
shell:
134+
bash
135+
136+
- name: Build wheel
137+
run: |
138+
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
139+
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
140+
# However this still fails so I'm using a newer version of setuptools
141+
pip install setuptools==75.8.0
142+
pip install ninja packaging wheel
143+
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
144+
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
145+
# Limit MAX_JOBS otherwise the github runner goes OOM
146+
# nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
147+
MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2) NVCC_THREADS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
148+
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
149+
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
150+
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
151+
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
152+
153+
- name: Log Built Wheels
154+
run: |
155+
ls dist
156+
157+
- name: Get the tag version
158+
id: extract_branch
159+
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
160+
161+
- name: Get Release with tag
162+
id: get_current_release
163+
uses: joutvhu/get-release@v1
164+
with:
165+
tag_name: ${{ steps.extract_branch.outputs.branch }}
166+
env:
167+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
168+
169+
- name: Upload Release Asset
170+
id: upload_release_asset
171+
uses: actions/upload-release-asset@v1
172+
env:
173+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
174+
with:
175+
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
176+
asset_path: ./dist/${{env.wheel_name}}
177+
asset_name: ${{env.wheel_name}}
178+
asset_content_type: application/*
179+
180+
publish_package:
181+
name: Publish package
182+
needs: [build_wheels]
183+
184+
runs-on: ubuntu-latest
185+
186+
steps:
187+
- uses: actions/checkout@v4
188+
189+
- uses: actions/setup-python@v5
190+
with:
191+
python-version: '3.10'
192+
193+
- name: Install dependencies
194+
run: |
195+
pip install ninja packaging wheel twine
196+
# Install latest setuptools with support for pypi metadata 2.2 (improved compat w/ uv)
197+
pip install setuptools==75.8.0
198+
# We don't want to download anything CUDA-related here
199+
pip install torch --index-url https://download.pytorch.org/whl/cpu
200+
201+
- name: Build core package
202+
env:
203+
FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
204+
run: |
205+
python setup.py sdist --dist-dir=dist
206+
207+
- name: Deploy
208+
env:
209+
TWINE_USERNAME: "__token__"
210+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
211+
run: |
212+
python -m twine upload dist/*

flash-attention/.gitignore

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
*.ncu-rep
2+
.DS_store
3+
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
bin/
13+
build/
14+
develop-eggs/
15+
dist/
16+
eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
*.egg-info/
23+
.installed.cfg
24+
*.egg
25+
.eggs/
26+
27+
# IDE-related
28+
.idea/
29+
30+
# Dev
31+
venv

flash-attention/.gitmodules

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[submodule "csrc/cutlass"]
2+
path = csrc/cutlass
3+
url = https://github.com/NVIDIA/cutlass.git
4+
[submodule "csrc/composable_kernel"]
5+
path = csrc/composable_kernel
6+
url = https://github.com/ROCm/composable_kernel.git
7+
branch = amd-master

flash-attention/AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

flash-attention/LICENSE

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
BSD 3-Clause License
2+
3+
Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
9+
* Redistributions of source code must retain the above copyright notice, this
10+
list of conditions and the following disclaimer.
11+
12+
* Redistributions in binary form must reproduce the above copyright notice,
13+
this list of conditions and the following disclaimer in the documentation
14+
and/or other materials provided with the distribution.
15+
16+
* Neither the name of the copyright holder nor the names of its
17+
contributors may be used to endorse or promote products derived from
18+
this software without specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

flash-attention/MANIFEST.in

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
recursive-include csrc *.cu
2+
recursive-include csrc *.h
3+
recursive-include csrc *.cuh
4+
recursive-include csrc *.cpp
5+
recursive-include csrc *.hpp
6+
recursive-include csrc *.py
7+
8+
recursive-include flash_attn *.cu
9+
recursive-include flash_attn *.h
10+
recursive-include flash_attn *.cuh
11+
recursive-include flash_attn *.cpp
12+
recursive-include flash_attn *.hpp

flash-attention/Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
clean_dist:
3+
rm -rf dist/*
4+
5+
create_dist: clean_dist
6+
python setup.py sdist
7+
8+
upload_package: create_dist
9+
twine upload dist/*

0 commit comments

Comments
 (0)