vectorch-ai
diff --git a/‎src/layers/fused_linear.cpp‎
Lines changed: 27 additions & 44 deletions b/‎src/layers/fused_linear.cpp‎
Lines changed: 27 additions & 44 deletions
diff --git a/‎src/layers/fused_linear.h‎
Lines changed: 1 addition & 11 deletions b/‎src/layers/fused_linear.h‎
Lines changed: 1 addition & 11 deletions
diff --git a/‎src/layers/linear.cpp‎
Lines changed: 68 additions & 17 deletions b/‎src/layers/linear.cpp‎
Lines changed: 68 additions & 17 deletions
diff --git a/‎src/layers/linear.h‎
Lines changed: 17 additions & 2 deletions b/‎src/layers/linear.h‎
Lines changed: 17 additions & 2 deletions
@@ -19,20 +19,21 @@ FusedColumnParallelLinearImpl::FusedColumnParallelLinearImpl(
     const QuantArgs& quant_args,
     const ParallelArgs& parallel_args,
     const torch::TensorOptions& options) {
-  prefixes_ = prefixes;
   // check if the linear layers can be fused
   fused_ = quant_args.can_be_fused();
   if (fused_) {
     // fused linear layer
-    const int64_t out_features = std::accumulate(
-        out_features_vec.begin(), out_features_vec.end(), int64_t(0));
-    fused_linear_ = ColumnParallelLinear(in_features,
-                                         out_features,
-                                         bias,
-                                         gather_output,
-                                         quant_args,
-                                         parallel_args,
-                                         options);
+    fused_linear_ = register_module("fused_linear",
+                                    ColumnParallelLinear(in_features,
+                                                         out_features_vec,
+                                                         prefixes,
+                                                         bias,
+                                                         gather_output,
+                                                         quant_args,
+                                                         parallel_args,
+                                                         options),
+                                    /*selector=*/nullptr);
+    // TODO: clean up following code for calculating split sizes
     // calculate split sizes
     split_sizes_.reserve(out_features_vec.size());
     const auto world_size = parallel_args.world_size();
@@ -45,14 +46,22 @@ FusedColumnParallelLinearImpl::FusedColumnParallelLinearImpl(
   } else {
     // non-fused linear layers
     parallel_linears_.reserve(out_features_vec.size());
-    for (const auto& out_features : out_features_vec) {
-      parallel_linears_.emplace_back(in_features,
-                                     out_features,
-                                     bias,
-                                     gather_output,
-                                     quant_args,
-                                     parallel_args,
-                                     options);
+    for (size_t i = 0; i < out_features_vec.size(); ++i) {
+      const auto& prefix = prefixes[i];
+      const auto out_features = out_features_vec[i];
+
+      const auto linear = register_module("linear",
+                                          ColumnParallelLinear(in_features,
+                                                               out_features,
+                                                               bias,
+                                                               gather_output,
+                                                               quant_args,
+                                                               parallel_args,
+                                                               options,
+                                                               prefix),
+                                          /*selector=*/nullptr);
+
+      parallel_linears_.emplace_back(linear);
     }
   }
 }
@@ -73,30 +82,4 @@ std::vector<torch::Tensor> FusedColumnParallelLinearImpl::forward(
   }
   return outputs;
 }
-
-size_t FusedColumnParallelLinearImpl::load(const StateDict& state_dict,
-                                           const std::string&) {
-  if (fused_) {
-    fused_linear_->load_state_dict(state_dict, prefixes_);
-  } else {
-    CHECK_EQ(parallel_linears_.size(), prefixes_.size());
-    for (size_t i = 0; i < parallel_linears_.size(); ++i) {
-      parallel_linears_[i]->load_state_dict(state_dict.select(prefixes_[i]));
-    }
-  }
-  return 0;
-}
-
-bool FusedColumnParallelLinearImpl::verify(
-    const std::string& name_prefix) const {
-  if (fused_) {
-    fused_linear_->verify_loaded_weights(name_prefix);
-  } else {
-    for (const auto& parallel_linear : parallel_linears_) {
-      parallel_linear->verify_loaded_weights(name_prefix);
-    }
-  }
-  return true;
-}
-
 }  // namespace llm
@@ -25,14 +25,6 @@ class FusedColumnParallelLinearImpl : public Module {
 
   std::vector<torch::Tensor> forward(torch::Tensor input);
 
-  // load weights from the checkpoint, override this method if necessary
-  // returns the number of loaded parameters
-  size_t load(const StateDict& state_dict,
-              const std::string& name_prefix = std::string()) override;
-
-  // verify whether the weights are loaded, override this method if necessary
-  bool verify(const std::string& name_prefix = std::string()) const override;
-
   // whether the linear layer is fused
   bool fused() const { return fused_; }
 
@@ -43,11 +35,9 @@ class FusedColumnParallelLinearImpl : public Module {
   // fused linear layer
   ColumnParallelLinear fused_linear_{nullptr};
 
-  // sizes for each split
+  // size for each split
   std::vector<int64_t> split_sizes_;
 
-  std::vector<std::string> prefixes_;
-
   // whether the linear layer is fused
   bool fused_ = false;
 };
 
@@ -38,18 +38,6 @@ namespace {
                                       parallel_args,    \
                                       options);
 
-#define MAKE_ROW_PARALLEL_LINEAR(LinearlImplClass)          \
-  std::make_shared<LinearlImplClass>(in_features,           \
-                                     out_features,          \
-                                     bias,                  \
-                                     input_is_parallelized, \
-                                     parallel_args,         \
-                                     options);
-
-#define MAKE_COLUMN_PARALLEL_LINEAR(LinearlImplClass) \
-  std::make_shared<LinearlImplClass>(                 \
-      in_features, out_features, bias, gather_output, parallel_args, options);
-
 std::shared_ptr<ParallelLinearImpl> create_column_parallel_qlinear_by_impl(
     int64_t in_features,
     int64_t out_features,
@@ -139,6 +127,7 @@ std::shared_ptr<ParallelLinearImpl> create_column_parallel_qlinear(
   }
   // not supported quant method
   LOG(FATAL) << "Unsupported quant method: " << quant_args.quant_method();
+  return nullptr;
 }
 
 std::shared_ptr<ParallelLinearImpl> create_row_parallel_qlinear(
@@ -170,6 +159,7 @@ std::shared_ptr<ParallelLinearImpl> create_row_parallel_qlinear(
   }
   // not supported quant method
   LOG(FATAL) << "Unsupported quant method: " << quant_args.quant_method();
+  return nullptr;
 }
 
 std::shared_ptr<ParallelLinearImpl> create_column_parallel_linear(
@@ -179,7 +169,8 @@ std::shared_ptr<ParallelLinearImpl> create_column_parallel_linear(
     bool gather_output,
     const QuantArgs& quant_args,
     const ParallelArgs& parallel_args,
-    const torch::TensorOptions& options) {
+    const torch::TensorOptions& options,
+    const std::string& prefix) {
   if (!quant_args.quant_method().empty()) {
     return create_column_parallel_qlinear(in_features,
                                           out_features,
@@ -189,7 +180,40 @@ std::shared_ptr<ParallelLinearImpl> create_column_parallel_linear(
                                           parallel_args,
                                           options);
   }
-  return MAKE_COLUMN_PARALLEL_LINEAR(ColumnParallelLinearImpl);
+  return std ::make_shared<ColumnParallelLinearImpl>(in_features,
+                                                     out_features,
+                                                     bias,
+                                                     gather_output,
+                                                     parallel_args,
+                                                     options,
+                                                     prefix);
+}
+
+std::shared_ptr<ParallelLinearImpl> create_column_parallel_linear(
+    int64_t in_features,
+    const std::vector<int64_t>& out_features,
+    const std::vector<std::string>& prefixes,
+    bool bias,
+    bool gather_output,
+    const QuantArgs& quant_args,
+    const ParallelArgs& parallel_args,
+    const torch::TensorOptions& options) {
+  // if (!quant_args.quant_method().empty()) {
+  //   return create_column_parallel_qlinear(in_features,
+  //                                         out_features,
+  //                                         bias,
+  //                                         gather_output,
+  //                                         quant_args,
+  //                                         parallel_args,
+  //                                         options);
+  // }
+  return std ::make_shared<FColumnParallelLinearImpl>(in_features,
+                                                      out_features,
+                                                      prefixes,
+                                                      bias,
+                                                      gather_output,
+                                                      parallel_args,
+                                                      options);
 }
 
 std::shared_ptr<ParallelLinearImpl> create_row_parallel_linear(
@@ -209,7 +233,13 @@ std::shared_ptr<ParallelLinearImpl> create_row_parallel_linear(
                                        parallel_args,
                                        options);
   }
-  return MAKE_ROW_PARALLEL_LINEAR(RowParallelLinearImpl);
+  return std ::make_shared<RowParallelLinearImpl>(in_features,
+                                                  out_features,
+                                                  bias,
+                                                  input_is_parallelized,
+                                                  parallel_args,
+                                                  options);
+  ;
 }
 }  // namespace
 
@@ -221,9 +251,29 @@ ColumnParallelLinear::ColumnParallelLinear(int64_t in_features,
                                            bool gather_output,
                                            const QuantArgs& quant_args,
                                            const ParallelArgs& parallel_args,
-                                           const torch::TensorOptions& options)
+                                           const torch::TensorOptions& options,
+                                           const std::string& prefix)
+    : ModuleHolder(create_column_parallel_linear(in_features,
+                                                 out_features,
+                                                 bias,
+                                                 gather_output,
+                                                 quant_args,
+                                                 parallel_args,
+                                                 options,
+                                                 prefix)) {}
+
+ColumnParallelLinear::ColumnParallelLinear(
+    int64_t in_features,
+    const std::vector<int64_t>& out_features,
+    const std::vector<std::string>& prefixes,
+    bool bias,
+    bool gather_output,
+    const QuantArgs& quant_args,
+    const ParallelArgs& parallel_args,
+    const torch::TensorOptions& options)
     : ModuleHolder(create_column_parallel_linear(in_features,
                                                  out_features,
+                                                 prefixes,
                                                  bias,
                                                  gather_output,
                                                  quant_args,
@@ -242,7 +292,8 @@ ColumnParallelLinear::ColumnParallelLinear(int64_t in_features,
                                                  gather_output,
                                                  {}, /*quant_args*/
                                                  parallel_args,
-                                                 options)) {}
+                                                 options,
+                                                 "")) {}
 
 // construct a rotary positional embedding.
 // chose right implementation based on the args.
 
@@ -22,9 +22,14 @@ class ParallelLinearImpl : public Module {
 
   virtual torch::Tensor forward(torch::Tensor input) = 0;
 
-  virtual void load_state_dict(const StateDict& state_dict) = 0;
+  // TODO: clean up the interface of load_state_dict
+  virtual void load_state_dict(const StateDict& state_dict) {
+    LOG(FATAL) << "not implemented";
+  }
 
-  virtual void verify_loaded_weights(const std::string& prefix = "") const = 0;
+  virtual void verify_loaded_weights(const std::string& prefix = "") const {
+    LOG(FATAL) << "not implemented";
+  }
 
   // special load_state_dict for fused cases
   virtual void load_state_dict(const StateDict& /*state_dict*/,
@@ -46,6 +51,16 @@ class ColumnParallelLinear : public ModuleHolder<ParallelLinearImpl> {
                        bool gather_output,
                        const QuantArgs& quant_args,
                        const ParallelArgs& parallel_args,
+                       const torch::TensorOptions& options,
+                       const std::string& prefix = "");
+
+  ColumnParallelLinear(int64_t in_features,
+                       const std::vector<int64_t>& out_features,
+                       const std::vector<std::string>& prefixes,
+                       bool bias,
+                       bool gather_output,
+                       const QuantArgs& quant_args,
+                       const ParallelArgs& parallel_args,
                        const torch::TensorOptions& options);
 
   ColumnParallelLinear(int64_t in_features,