rand() -> torch::randint() (#217)

ZenoTan · rusty1s · web-flow · commit 122811a6f4e1 · 2022-04-02T22:04:59.000+02:00
* torch randint

* update

* fix type

* update

* add test

Co-authored-by: rusty1s &lt;matthias.fey@tu-dortmund.de&gt;
diff --git a/csrc/cpu/ego_sample_cpu.cpp b/csrc/cpu/ego_sample_cpu.cpp
@@ -19,8 +19,6 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
                          torch::Tensor idx, int64_t depth,
                          int64_t num_neighbors, bool replace) {
 
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
-
   std::vector<torch::Tensor> out_rowptrs(idx.numel() + 1);
   std::vector<torch::Tensor> out_cols(idx.numel());
   std::vector<torch::Tensor> out_n_ids(idx.numel());
@@ -56,14 +54,14 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
             }
           } else if (replace) {
             for (int64_t j = 0; j < num_neighbors; j++) {
-              w = col_data[row_start + (rand() % row_count)];
+              w = col_data[row_start + uniform_randint(row_count)];
               n_id_set.insert(w);
               n_ids.push_back(w);
             }
           } else {
             std::unordered_set<int64_t> perm;
             for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
-              if (!perm.insert(rand() % j).second) {
+              if (!perm.insert(uniform_randint(j)).second) {
                 perm.insert(j);
               }
             }
diff --git a/csrc/cpu/hgt_sample_cpu.cpp b/csrc/cpu/hgt_sample_cpu.cpp
@@ -105,8 +105,6 @@ hgt_sample_cpu(const c10::Dict<rel_t, torch::Tensor> &colptr_dict,
                const c10::Dict<node_t, vector<int64_t>> &num_samples_dict,
                const int64_t num_hops) {
 
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
-
   // Create a mapping to convert single string relations to edge type triplets:
   unordered_map<rel_t, edge_t> to_edge_type;
   for (const auto &kv : colptr_dict) {
diff --git a/csrc/cpu/neighbor_sample_cpu.cpp b/csrc/cpu/neighbor_sample_cpu.cpp
@@ -15,8 +15,6 @@ tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
 sample(const torch::Tensor &colptr, const torch::Tensor &row,
        const torch::Tensor &input_node, const vector<int64_t> num_neighbors) {
 
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
-
   // Initialize some data structures for the sampling process:
   vector<int64_t> samples;
   unordered_map<int64_t, int64_t> to_local_node;
@@ -59,7 +57,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
         }
       } else if (replace) {
         for (int64_t j = 0; j < num_samples; j++) {
-          const int64_t offset = col_start + rand() % col_count;
+          const int64_t offset = col_start + uniform_randint(col_count);
           const int64_t &v = row_data[offset];
           const auto res = to_local_node.insert({v, samples.size()});
           if (res.second)
@@ -73,7 +71,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
       } else {
         unordered_set<int64_t> rnd_indices;
         for (int64_t j = col_count - num_samples; j < col_count; j++) {
-          int64_t rnd = rand() % j;
+          int64_t rnd = uniform_randint(j);
           if (!rnd_indices.insert(rnd).second) {
             rnd = j;
             rnd_indices.insert(j);
@@ -127,8 +125,6 @@ hetero_sample(const vector<node_t> &node_types,
               const c10::Dict<rel_t, vector<int64_t>> &num_neighbors_dict,
               const int64_t num_hops) {
 
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
-
   // Create a mapping to convert single string relations to edge type triplets:
   unordered_map<rel_t, edge_t> to_edge_type;
   for (const auto &k : edge_types)
@@ -180,8 +176,10 @@ hetero_sample(const vector<node_t> &node_types,
       auto &src_samples = samples_dict.at(src_node_type);
       auto &to_local_src_node = to_local_node_dict.at(src_node_type);
 
-      const auto *colptr_data = ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>();
-      const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *colptr_data =
+          ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *row_data =
+          ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
 
       auto &rows = rows_dict.at(rel_type);
       auto &cols = cols_dict.at(rel_type);
@@ -212,7 +210,7 @@ hetero_sample(const vector<node_t> &node_types,
           }
         } else if (replace) {
           for (int64_t j = 0; j < num_samples; j++) {
-            const int64_t offset = col_start + rand() % col_count;
+            const int64_t offset = col_start + uniform_randint(col_count);
             const int64_t &v = row_data[offset];
             const auto res = to_local_src_node.insert({v, src_samples.size()});
             if (res.second)
@@ -226,7 +224,7 @@ hetero_sample(const vector<node_t> &node_types,
         } else {
           unordered_set<int64_t> rnd_indices;
           for (int64_t j = col_count - num_samples; j < col_count; j++) {
-            int64_t rnd = rand() % j;
+            int64_t rnd = uniform_randint(j);
             if (!rnd_indices.insert(rnd).second) {
               rnd = j;
               rnd_indices.insert(j);
@@ -262,7 +260,8 @@ hetero_sample(const vector<node_t> &node_types,
       auto &to_local_src_node = to_local_node_dict.at(src_node_type);
 
       const auto *colptr_data = ((torch::Tensor)kv.value()).data_ptr<int64_t>();
-      const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *row_data =
+          ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
 
       auto &rows = rows_dict.at(rel_type);
       auto &cols = cols_dict.at(rel_type);
diff --git a/csrc/cpu/sample_cpu.cpp b/csrc/cpu/sample_cpu.cpp
@@ -15,8 +15,6 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
   CHECK_CPU(idx);
   CHECK_INPUT(idx.dim() == 1);
 
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
-
   auto rowptr_data = rowptr.data_ptr<int64_t>();
   auto col_data = col.data_ptr<int64_t>();
   auto idx_data = idx.data_ptr<int64_t>();
@@ -69,7 +67,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
 
       if (row_count > 0) {
         for (int64_t j = 0; j < num_neighbors; j++) {
-          e = row_start + rand() % row_count;
+          e = row_start + uniform_randint(row_count);
           c = col_data[e];
 
           if (n_id_map.count(c) == 0) {
@@ -96,7 +94,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
       } else { // See: https://www.nowherenearithaca.com/2013/05/
                //      robert-floyds-tiny-and-beautiful.html
         for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
-          if (!perm.insert(rand() % j).second)
+          if (!perm.insert(uniform_randint(j)).second)
             perm.insert(j);
         }
       }
diff --git a/csrc/cpu/utils.h b/csrc/cpu/utils.h
@@ -35,6 +35,18 @@ from_vector(const std::unordered_map<key_t, std::vector<scalar_t>> &vec_dict,
   return out_dict;
 }
 
+inline int64_t uniform_randint(int64_t low, int64_t high) {
+  CHECK_LT(low, high);
+  auto options = torch::TensorOptions().dtype(torch::kInt64);
+  auto ret = torch::randint(low, high, {1}, options);
+  auto ptr = ret.data_ptr<int64_t>();
+  return *ptr;
+}
+
+inline int64_t uniform_randint(int64_t high) {
+  return uniform_randint(0, high);
+}
+
 inline torch::Tensor
 choice(int64_t population, int64_t num_samples, bool replace = false,
        torch::optional<torch::Tensor> weight = torch::nullopt) {
@@ -52,7 +64,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
     const auto out = torch::empty(num_samples, at::kLong);
     auto *out_data = out.data_ptr<int64_t>();
     for (int64_t i = 0; i < num_samples; i++) {
-      out_data[i] = rand() % population;
+      out_data[i] = uniform_randint(population);
     }
     return out;
 
@@ -64,7 +76,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
     auto *out_data = out.data_ptr<int64_t>();
     std::unordered_set<int64_t> samples;
     for (int64_t i = population - num_samples; i < population; i++) {
-      int64_t sample = rand() % i;
+      int64_t sample = uniform_randint(i);
       if (!samples.insert(sample).second) {
         sample = i;
         samples.insert(sample);
@@ -86,7 +98,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
 
   if (replace) {
     for (int64_t i = 0; i < num_samples; i++) {
-      const int64_t &v = idx_data[rand() % population];
+      const int64_t &v = idx_data[uniform_randint(population)];
       if (to_local_node->insert({v, samples->size()}).second)
         samples->push_back(v);
     }
@@ -99,7 +111,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
   } else {
     std::unordered_set<int64_t> indices;
     for (int64_t i = population - num_samples; i < population; i++) {
-      int64_t j = rand() % i;
+      int64_t j = uniform_randint(i);
       if (!indices.insert(j).second) {
         j = i;
         indices.insert(j);
diff --git a/csrc/extensions.h b/csrc/extensions.h
@@ -1,9 +1,2 @@
 #include "macros.h"
 #include <torch/extension.h>
-
-// for getpid()
-#ifdef _WIN32
-#include <process.h>
-#else
-#include <unistd.h>
-#endif
diff --git a/test/test_neighbor_sample.py b/test/test_neighbor_sample.py
@@ -25,3 +25,18 @@ def test_neighbor_sample():
     assert out[0].tolist() == [1, 0]
     assert out[1].tolist() == [1]
     assert out[2].tolist() == [0]
+
+
+def test_neighbor_sample_seed():
+    colptr = torch.tensor([0, 3, 6, 9])
+    row = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2])
+    input_nodes = torch.tensor([0, 1])
+
+    torch.manual_seed(42)
+    out1 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
+
+    torch.manual_seed(42)
+    out2 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
+
+    for data1, data2 in zip(out1, out2):
+        assert data1.tolist() == data2.tolist()

Original file line number	Diff line number	Diff line change
`@@ -19,8 +19,6 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,`
`19`	`19`	`torch::Tensor idx, int64_t depth,`
`20`	`20`	`int64_t num_neighbors, bool replace) {`
`21`	`21`
`22`		`- srand(time(NULL) + 1000 * getpid()); // Initialize random seed.`
`23`		`-`
`24`	`22`	`std::vector<torch::Tensor> out_rowptrs(idx.numel() + 1);`
`25`	`23`	`std::vector<torch::Tensor> out_cols(idx.numel());`
`26`	`24`	`std::vector<torch::Tensor> out_n_ids(idx.numel());`
`@@ -56,14 +54,14 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,`
`56`	`54`	`}`
`57`	`55`	`} else if (replace) {`
`58`	`56`	`for (int64_t j = 0; j < num_neighbors; j++) {`
`59`		`- w = col_data[row_start + (rand() % row_count)];`
	`57`	`+ w = col_data[row_start + uniform_randint(row_count)];`
`60`	`58`	`n_id_set.insert(w);`
`61`	`59`	`n_ids.push_back(w);`
`62`	`60`	`}`
`63`	`61`	`} else {`
`64`	`62`	`std::unordered_set<int64_t> perm;`
`65`	`63`	`for (int64_t j = row_count - num_neighbors; j < row_count; j++) {`
`66`		`- if (!perm.insert(rand() % j).second) {`
	`64`	`+ if (!perm.insert(uniform_randint(j)).second) {`
`67`	`65`	`perm.insert(j);`
`68`	`66`	`}`
`69`	`67`	`}`