@@ -551,39 +551,9 @@ if (WITH_CUDA)
551551 else ()
552552 list (APPEND LIBRARIES ${CUDA_CUBLAS_LIBRARIES} )
553553 endif ()
554- set (CUDA_LINK_LIBRARIES_KEYWORD PRIVATE )
555- cuda_add_library(${PROJECT_NAME}
556- ${SOURCES}
557- src/cuda/allocator.cc
558- src/cuda/primitives.cu
559- src/cuda/random.cu
560- src/cuda/utils.cc
561- src/ops/alibi_add_gpu.cu
562- src/ops/bias_add_gpu.cu
563- src/ops/concat_split_slide_gpu.cu
564- src/ops/conv1d_gpu.cu
565- src/ops/dequantize_gpu.cu
566- src/ops/flash_attention_gpu.cu
567- src/ops/gather_gpu.cu
568- src/ops/gumbel_max_gpu.cu
569- src/ops/layer_norm_gpu.cu
570- src/ops/mean_gpu.cu
571- src/ops/multinomial_gpu.cu
572- src/ops/rms_norm_gpu.cu
573- src/ops/rotary_gpu.cu
574- src/ops/softmax_gpu.cu
575- src/ops/tile_gpu.cu
576- src/ops/topk_gpu.cu
577- src/ops/topp_mask_gpu.cu
578- src/ops/quantize_gpu.cu
579- src/ops/nccl_ops_gpu.cu
580- src/ops/awq/gemm_gpu.cu
581- src/ops/awq/gemv_gpu.cu
582- src/ops/awq/dequantize_gpu.cu
583- )
584554 if (WITH_FLASH_ATTN)
585555 add_definitions (-DCT2_WITH_FLASH_ATTN)
586- cuda_add_library( ${PROJECT_NAME}
556+ list ( APPEND SOURCES
587557 src/ops/flash-attention/flash_fwd_hdim32_bf16_sm80.cu
588558 src/ops/flash-attention/flash_fwd_hdim32_fp16_sm80.cu
589559 src/ops/flash-attention/flash_fwd_hdim64_bf16_sm80.cu
@@ -653,6 +623,36 @@ if (WITH_CUDA)
653623 src/ops/flash-attention/flash_fwd_split_hdim256_fp16_sm80.cu
654624 PROPERTIES COMPILE_FLAGS "--use_fast_math" )
655625 endif ()
626+ set (CUDA_LINK_LIBRARIES_KEYWORD PRIVATE )
627+ cuda_add_library(${PROJECT_NAME}
628+ ${SOURCES}
629+ src/cuda/allocator.cc
630+ src/cuda/primitives.cu
631+ src/cuda/random.cu
632+ src/cuda/utils.cc
633+ src/ops/alibi_add_gpu.cu
634+ src/ops/bias_add_gpu.cu
635+ src/ops/concat_split_slide_gpu.cu
636+ src/ops/conv1d_gpu.cu
637+ src/ops/dequantize_gpu.cu
638+ src/ops/flash_attention_gpu.cu
639+ src/ops/gather_gpu.cu
640+ src/ops/gumbel_max_gpu.cu
641+ src/ops/layer_norm_gpu.cu
642+ src/ops/mean_gpu.cu
643+ src/ops/multinomial_gpu.cu
644+ src/ops/rms_norm_gpu.cu
645+ src/ops/rotary_gpu.cu
646+ src/ops/softmax_gpu.cu
647+ src/ops/tile_gpu.cu
648+ src/ops/topk_gpu.cu
649+ src/ops/topp_mask_gpu.cu
650+ src/ops/quantize_gpu.cu
651+ src/ops/nccl_ops_gpu.cu
652+ src/ops/awq/gemm_gpu.cu
653+ src/ops/awq/gemv_gpu.cu
654+ src/ops/awq/dequantize_gpu.cu
655+ )
656656
657657
658658elseif (WITH_CUDNN)
0 commit comments