From 945c71fb1e70165f6888ebf9d54563663d19edf2 Mon Sep 17 00:00:00 2001
From: BasicCoder <abasiccoder@gmail.com>
Date: Wed, 21 Jun 2023 11:43:49 +0800
Subject: [PATCH] remove duplicate code

Repeated memory reallocation operations
---
 .../layers/attention_layers/GptContextAttentionLayer.cc   | 8 --------
 1 file changed, 8 deletions(-)
diff --git a/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc b/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc
index 2a5b7fecc..80b0e6599 100644
--- a/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc
+++ b/src/fastertransformer/layers/attention_layers/GptContextAttentionLayer.cc
@@ -565,14 +565,6 @@ void GptContextAttentionLayer<T>::allocateBuffer(size_t batch_size, size_t seq_l
         }
     }
 
-    if (int8_mode_ == 1) {
-        // We use max_size for n and k since we reuse buffers for both FCs and want to allocate the max
-        // possible memory that would be required by any of the individual gemms.
-        const int max_size    = std::max(hidden_units_, 3 * local_hidden_units_);
-        mixed_gemm_ws_bytes_  = weight_only_int8_fc_runner_->getWorkspaceSize(batch_size * seq_len, max_size, max_size);
-        mixed_gemm_workspace_ = (char*)allocator_->reMalloc(mixed_gemm_workspace_, mixed_gemm_ws_bytes_, false);
-    }
-
     if (int8_mode_ == 1) {
         // We use max_size for n and k since we reuse buffers for both FCs and want to allocate the max
         // possible memory that would be required by any of the individual gemms.