From 945c71fb1e70165f6888ebf9d54563663d19edf2 Mon Sep 17 00:00:00 2001 From: BasicCoder Date: Wed, 21 Jun 2023 11:43:49 +0800 Subject: [PATCH] remove duplicate code Repeated memory reallocation operations --- .../layers/attention_layers/GptContextAttentionLayer.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc b/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc index 2a5b7fecc..80b0e6599 100644 --- a/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc +++ b/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc @@ -565,14 +565,6 @@ void GptContextAttentionLayer::allocateBuffer(size_t batch_size, size_t seq_l } } - if (int8_mode_ == 1) { - // We use max_size for n and k since we reuse buffers for both FCs and want to allocate the max - // possible memory that would be required by any of the individual gemms. - const int max_size = std::max(hidden_units_, 3 * local_hidden_units_); - mixed_gemm_ws_bytes_ = weight_only_int8_fc_runner_->getWorkspaceSize(batch_size * seq_len, max_size, max_size); - mixed_gemm_workspace_ = (char*)allocator_->reMalloc(mixed_gemm_workspace_, mixed_gemm_ws_bytes_, false); - } - if (int8_mode_ == 1) { // We use max_size for n and k since we reuse buffers for both FCs and want to allocate the max // possible memory that would be required by any of the individual gemms.