PaddlePaddle
diff --git a/‎paddle/fluid/API.spec
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/API.spec
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/operators/math/sampler.cc
Lines changed: 9 additions & 54 deletions b/‎paddle/fluid/operators/math/sampler.cc
Lines changed: 9 additions & 54 deletions
diff --git a/‎paddle/fluid/operators/math/sampler.h
Lines changed: 9 additions & 4 deletions b/‎paddle/fluid/operators/math/sampler.h
Lines changed: 9 additions & 4 deletions
diff --git a/‎paddle/fluid/operators/nce_op.cc
Lines changed: 58 additions & 10 deletions b/‎paddle/fluid/operators/nce_op.cc
Lines changed: 58 additions & 10 deletions
@@ -97,7 +97,7 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
 paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
-paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0))
+paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
 paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
 
@@ -60,75 +60,30 @@ float LogUniformSampler::Probability(int64_t value) const {
   return (log((value + 2.0) / (value + 1.0))) / log_range_;
 }
 
-CustomSampler::CustomSampler(int64_t range, const float* probabilities,
+CustomSampler::CustomSampler(int64_t range, const float *probabilities,
+                             const int *alias, const float *alias_probabilities,
                              unsigned int seed)
     : Sampler(range, seed) {
-  random_engine_ = std::make_shared<std::mt19937_64>(seed_);
+  random_engine_ = std::make_shared<std::mt19937>(seed_);
   real_dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
   int_dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
-  alias_probs_ = std::make_shared<std::vector<float>>(range + 1);
-  alias_ = std::make_shared<std::vector<int64_t>>(range + 1);
-  probs_ = std::make_shared<std::vector<float>>(range + 1);
-
-  std::queue<std::pair<int64_t, float>> bigs;
-  std::queue<std::pair<int64_t, float>> littles;
-  for (int64_t i = 0; i <= range; ++i) {
-    (*probs_)[i] = probabilities[i];
-    float normal_prob = probabilities[i] * (range + 1);
-    if (normal_prob - 1.0 > 1e-4) {
-      bigs.emplace(i, normal_prob);
-    } else if (1.0 - normal_prob > 1e-4) {
-      littles.emplace(i, normal_prob);
-    } else {
-      (*alias_probs_)[i] = normal_prob;
-      (*alias_)[i] = -1;
-    }
-  }
-
-  while ((!littles.empty()) && (!bigs.empty())) {
-    auto big = bigs.front();
-    auto little = littles.front();
-    bigs.pop();
-    littles.pop();
-    (*alias_probs_)[little.first] = little.second;
-    (*alias_)[little.first] = big.first;
-    auto big_left = big.second - (1 - little.second);
-    if (big_left - 1.0 > 1e-4) {
-      bigs.emplace(big.first, big_left);
-    } else if (1.0 - big_left > 1e-4) {
-      littles.emplace(big.first, big_left);
-    } else {
-      (*alias_probs_)[big.first] = big_left;
-      (*alias_)[big.first] = -1;
-    }
-  }
 
-  if (!littles.empty()) {  // littles.second is close to 1.0
-    auto little = littles.front();
-    (*alias_probs_)[little.first] = 1.0;
-    (*alias_)[little.first] = -1;
-  }
-
-  if (!bigs.empty()) {  // bigs.second is close to 1.0
-    auto big = bigs.front();
-    (*alias_probs_)[big.first] = 1.0;
-    (*alias_)[big.first] = -1;
-  }
+  alias_probs_ = alias_probabilities;
+  probs_ = probabilities;
+  alias_ = alias;
 }
 
 int64_t CustomSampler::Sample() const {
   auto index = (*int_dist_)(*random_engine_);
   auto p = (*real_dist_)(*random_engine_);
-  if (p > (*alias_probs_)[index]) {
-    return (*alias_)[index];
+  if (p > alias_probs_[index]) {
+    return alias_[index];
   } else {
     return index;
   }
 }
 
-float CustomSampler::Probability(int64_t value) const {
-  return (*probs_)[value];
-}
+float CustomSampler::Probability(int64_t value) const { return probs_[value]; }
 
 }  // namespace math
 }  // namespace operators
 
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+
 #include <cstdint>
 #include <memory>
 #include <random>
@@ -38,9 +39,12 @@ class Sampler {
       seed_ = seed;
     }
   }
+
   virtual ~Sampler();
+
   // Sample a single value
   virtual int64_t Sample() const = 0;
+
   // The probability that a single call to Sample() returns the given value.
   virtual float Probability(int64_t value) const = 0;
 
@@ -99,6 +103,7 @@ class LogUniformSampler : public Sampler {
 class CustomSampler : public Sampler {
  public:
   explicit CustomSampler(int64_t range, const float* probabilities,
+                         const int* alias, const float* alias_probabilities,
                          unsigned int seed = 0UL);
 
   ~CustomSampler() override {}
@@ -108,10 +113,10 @@ class CustomSampler : public Sampler {
   float Probability(int64_t value) const override;
 
  private:
-  std::shared_ptr<std::vector<float>> alias_probs_;
-  std::shared_ptr<std::vector<int64_t>> alias_;
-  std::shared_ptr<std::vector<float>> probs_;
-  std::shared_ptr<std::mt19937_64> random_engine_;
+  const float* alias_probs_;
+  const int* alias_;
+  const float* probs_;
+  std::shared_ptr<std::mt19937> random_engine_;
   std::shared_ptr<std::uniform_real_distribution<>> real_dist_;
   std::shared_ptr<std::uniform_int_distribution<>> int_dist_;
 };
 
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/nce_op.h"
 
+#include <string>
 #include <vector>
 
 namespace paddle {
@@ -25,7 +26,7 @@ class NCEOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
-  void InferShape(framework::InferShapeContext* ctx) const override {
+  void InferShape(framework::InferShapeContext *ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("Input"));
     PADDLE_ENFORCE(ctx->HasInput("Label"));
     PADDLE_ENFORCE(ctx->HasInput("Weight"));
@@ -67,7 +68,7 @@ class NCEOp : public framework::OperatorWithKernel {
 
  protected:
   framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
+      const framework::ExecutionContext &ctx) const override {
     return framework::OpKernelType(
         framework::ToDataType(ctx.Input<Tensor>("Input")->type()),
         platform::CPUPlace());
@@ -101,11 +102,24 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
         .AsDispensable();
 
     AddInput(
-        "CustomDistribution",
+        "CustomDistProbs",
         "(Tensor) It is used in 'CostumDist' sampler. "
         "It is a tensor with shape [num_total_classes]."
         "The i-th element is the probsbility of the i-th class being sampled.")
         .AsDispensable();
+    AddInput(
+        "CustomDistAlias",
+        "(Tensor) It is used in 'CostumDist' sampler. "
+        "It is a tensor with shape [num_total_classes]."
+        "The i-th element is the probsbility of the i-th class being sampled.")
+        .AsDispensable();
+    AddInput(
+        "CustomDistAliasProbs",
+        "(Tensor) It is used in 'CostumDist' sampler. "
+        "It is a tensor with shape [num_total_classes]."
+        "The i-th element is the probsbility of the i-th class being sampled.")
+        .AsDispensable();
+
     AddOutput("Cost",
               "(Tensor) A tensor of shape [batch_size, 1]. Cost of samples.");
     AddOutput("SampleLogits",
@@ -124,21 +138,22 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
               "kernel to compute grads."
               "")
         .AsIntermediate();
+
     AddAttr<int>("num_total_classes",
                  "Total number of classes in all samples.");
     AddAttr<int>("num_neg_samples",
                  "The number of negative classes. The default value is 10.")
         .SetDefault(10);
-
     AddAttr<int>("sampler",
                  "(int) Which sampler to be used to sample negative class."
                  "0: Uniform; 1: LogUniform; 2: CostumDist.")
         .SetDefault(0);
-
     AddAttr<int>("seed",
                  "(int) The seed used in sampler. If it is 0, "
                  "the sampler will generate a seed randomly.")
         .SetDefault(0);
+    AddAttr<bool>("is_sparse", "(boolean, default false) Sparse update.")
+        .SetDefault(false);
 
     AddAttr<std::vector<int>>("custom_neg_classes",
                               "This attribute only be used in unitest. Classes "
@@ -156,11 +171,19 @@ By default this operator uses a uniform distribution for sampling.
   }
 };
 
+class NCEOpGradDescMaker : public framework::DefaultGradOpDescMaker<true> {
+  using ::paddle::framework::DefaultGradOpDescMaker<
+      true>::DefaultGradOpDescMaker;
+
+ protected:
+  virtual std::string GradOpType() const { return "nce_grad"; }
+};
+
 class NCEOpGrad : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
-  void InferShape(framework::InferShapeContext* ctx) const override {
+  void InferShape(framework::InferShapeContext *ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("Input"));
     PADDLE_ENFORCE(ctx->HasInput("Weight"));
     PADDLE_ENFORCE(ctx->HasInput("Cost"));
@@ -190,20 +213,45 @@ class NCEOpGrad : public framework::OperatorWithKernel {
 
  protected:
   framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
+      const framework::ExecutionContext &ctx) const override {
     return framework::OpKernelType(
         framework::ToDataType(ctx.Input<Tensor>("Input")->type()),
         platform::CPUPlace());
   }
 };
 
+class NCEOpGradVarTypeInference : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc &op_desc,
+                  framework::BlockDesc *block) const override {
+    auto weight_grad = op_desc.Output(framework::GradVarName("Weight")).front();
+    auto bias_grad = op_desc.Output(framework::GradVarName("Bias")).front();
+
+    auto attr = op_desc.GetAttr("is_sparse");
+    bool is_sparse = boost::get<bool>(attr);
+    if (is_sparse) {
+      VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad
+               << " is set to SelectedRows";
+      block->Var(weight_grad)
+          ->SetType(framework::proto::VarType::SELECTED_ROWS);
+      block->Var(bias_grad)->SetType(framework::proto::VarType::SELECTED_ROWS);
+    } else {
+      VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad
+               << " is set to LoDTensor";
+      block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
+      block->Var(bias_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
+    }
+    block->Var(weight_grad)->SetDataType(block->Var("Input")->GetDataType());
+    block->Var(bias_grad)->SetDataType(block->Var("Input")->GetDataType());
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OPERATOR(nce, ops::NCEOp, ops::NCEOpMaker,
-                  paddle::framework::DefaultGradOpDescMaker<true>);
-REGISTER_OPERATOR(nce_grad, ops::NCEOpGrad);
+REGISTER_OPERATOR(nce, ops::NCEOp, ops::NCEOpGradDescMaker, ops::NCEOpMaker);
+REGISTER_OPERATOR(nce_grad, ops::NCEOpGrad, ops::NCEOpGradVarTypeInference);
 REGISTER_OP_CPU_KERNEL(nce, ops::NCEKernel<paddle::platform::CPUPlace, float>,
                        ops::NCEKernel<paddle::platform::CPUPlace, double>);
 REGISTER_OP_CPU_KERNEL(nce_grad,