Skip to content

Commit f80ab35

Browse files
authored
Clean up remaining Punica C information (#7027)
1 parent 16a1cc9 commit f80ab35

File tree

5 files changed

+3
-15
lines changed

5 files changed

+3
-15
lines changed

.github/workflows/clang-format.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ jobs:
3030
run: |
3131
EXCLUDES=(
3232
'csrc/moe/topk_softmax_kernels.cu'
33-
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
34-
'csrc/punica/bgmv/bgmv_config.h'
35-
'csrc/punica/bgmv/bgmv_impl.cuh'
36-
'csrc/punica/bgmv/vec_dtypes.cuh'
37-
'csrc/punica/punica_ops.cu'
38-
'csrc/punica/type_convert.h'
3933
)
4034
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
4135
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \

cmake/utils.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
181181
#
182182
# The torch cmake setup hardcodes the detected architecture flags in
183183
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
184-
# can't modified on a per-target basis, e.g. for the `punica` extension.
184+
# can't modified on a per-target basis.
185185
# So, all the `-gencode` flags need to be extracted and removed from
186186
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
187187
# Since it's not possible to use `target_compiler_options` for adding target

format.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
242242
# NOTE: Keep up to date with .github/workflows/clang-format.yml
243243
CLANG_FORMAT_EXCLUDES=(
244244
'csrc/moe/topk_softmax_kernels.cu'
245-
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
246-
'csrc/punica/bgmv/bgmv_config.h'
247-
'csrc/punica/bgmv/bgmv_impl.cuh'
248-
'csrc/punica/bgmv/vec_dtypes.cuh'
249-
'csrc/punica/punica_ops.cu'
250-
'csrc/punica/type_convert.h'
251245
)
252246

253247
# Format specified files with clang-format

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ class LoRAConfig:
13041304
long_lora_scaling_factors: Optional[Tuple[float]] = None
13051305

13061306
def __post_init__(self):
1307-
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
1307+
# TODO: Increase the range of rank
13081308
possible_max_ranks = (8, 16, 32, 64)
13091309
possible_lora_extra_vocab_size = (0, 256, 512)
13101310
if self.max_lora_rank not in possible_max_ranks:

vllm/lora/layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,7 @@ def create_lora_weights(
10731073
lora_config: LoRAConfig,
10741074
model_config: Optional[PretrainedConfig] = None,
10751075
) -> None:
1076-
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
1076+
# TODO: Verify if this condition can be relaxed
10771077
if 32000 < self.base_layer.vocab_size > 128512:
10781078
raise ValueError("When using LoRA, vocab size must be "
10791079
"32000 >= vocab_size <= 128512")

0 commit comments

Comments
 (0)