@@ -46,11 +46,13 @@ bool CuW2V::Init(std::string opt_path) {
46
46
// if zero, we will use hierarchical softmax
47
47
neg_ = opt_[" negative_sampling" ].int_value ();
48
48
49
- // set seed for constructing random table of negative sampling
49
+ // random seed
50
50
table_seed_ = opt_[" table_seed" ].int_value ();
51
- const unsigned int table_seed = table_seed_;
52
- table_rng_.seed (table_seed);
53
-
51
+ cuda_seed_ = opt_[" cuda_seed" ].int_value ();
52
+ dev_rngs_.resize (block_cnt_);
53
+ InitRngsKernel<<<block_cnt_, 1 >>> (
54
+ thrust::raw_pointer_cast (dev_rngs_.data ()), cuda_seed_);
55
+
54
56
INFO (" num_dims: {}, block_dim: {}, block_cnt: {}, objective type: {}, neg: {}" ,
55
57
num_dims_, block_dim_, block_cnt_, sg_? " skip gram" : " cbow" , neg_);
56
58
return true ;
@@ -63,22 +65,25 @@ void CuW2V::BuildRandomTable(const float* word_count, const int num_words,
63
65
std::vector<float > acc;
64
66
float cumsum = 0 ;
65
67
for (int i = 0 ; i < num_words; ++i) {
66
- cumsum += word_count[i];
67
68
acc.push_back (cumsum);
69
+ cumsum += word_count[i];
68
70
}
69
71
70
- std::uniform_real_distribution<float > dist (0 .0f , cumsum);
71
72
dev_random_table_.resize (random_size_);
72
73
std::vector<int > host_random_table (table_size);
73
74
#pragma omp parallel num_threads(num_threads)
74
75
{
76
+ const unsigned int table_seed = table_seed_ + omp_get_thread_num ();
77
+ std::mt19937 rng (table_seed);
78
+ std::uniform_real_distribution<float > dist (0 .0f , cumsum);
75
79
#pragma omp for schedule(static)
76
80
for (int i = 0 ; i < random_size_; ++i) {
77
- float r = dist (table_rng_ );
81
+ float r = dist (rng );
78
82
int pos = std::lower_bound (acc.begin (), acc.end (), r) - acc.begin ();
79
83
host_random_table[i] = pos;
80
84
}
81
85
}
86
+ table_seed_ += num_threads;
82
87
83
88
thrust::copy (host_random_table.begin (), host_random_table.end (), dev_random_table_.begin ());
84
89
CHECK_CUDA (cudaDeviceSynchronize ());
@@ -148,6 +153,8 @@ void CuW2V::BuildHuffmanTree(const float* word_count, const int num_words) {
148
153
thrust::copy (host_points.begin (), host_points.end (), dev_points_.begin ());
149
154
thrust::copy (host_hs_indptr.begin (), host_hs_indptr.end (), dev_hs_indptr_.begin ());
150
155
CHECK_CUDA (cudaDeviceSynchronize ());
156
+
157
+ huffman_nodes.clear ();
151
158
}
152
159
153
160
void CuW2V::LoadModel (float * emb_in, float * emb_out) {
0 commit comments