@@ -150,6 +150,14 @@ object_generator::object_generator(size_t n_key_iterators/*= OBJECT_GENERATOR_KE
150150 m_key_max(0 ),
151151 m_key_stddev(0 ),
152152 m_key_median(0 ),
153+ m_key_zipf_min(0 ),
154+ m_key_zipf_max(0 ),
155+ m_key_zipf_exp(1 ),
156+ m_key_zipf_1mexp(0 ),
157+ m_key_zipf_1mexpInv(0 ),
158+ m_key_zipf_Hmin(0 ),
159+ m_key_zipf_Hmax(0 ),
160+ m_key_zipf_s(0 ),
153161 m_value_buffer(NULL ),
154162 m_random_fd(-1 ),
155163 m_value_buffer_size(0 ),
@@ -172,6 +180,14 @@ object_generator::object_generator(const object_generator& copy) :
172180 m_key_max(copy.m_key_max),
173181 m_key_stddev(copy.m_key_stddev),
174182 m_key_median(copy.m_key_median),
183+ m_key_zipf_min(copy.m_key_zipf_min),
184+ m_key_zipf_max(copy.m_key_zipf_max),
185+ m_key_zipf_exp(copy.m_key_zipf_exp),
186+ m_key_zipf_1mexp(copy.m_key_zipf_1mexp),
187+ m_key_zipf_1mexpInv(copy.m_key_zipf_1mexpInv),
188+ m_key_zipf_Hmin(copy.m_key_zipf_Hmin),
189+ m_key_zipf_Hmax(copy.m_key_zipf_Hmax),
190+ m_key_zipf_s(copy.m_key_zipf_s),
175191 m_value_buffer(NULL ),
176192 m_random_fd(-1 ),
177193 m_value_buffer_size(0 ),
@@ -348,6 +364,47 @@ void object_generator::set_key_distribution(double key_stddev, double key_median
348364 m_key_median = key_median;
349365}
350366
367+ // should be called after set_key_range in memtier_benchmark.cpp
368+ void object_generator::set_key_zipf_distribution (double key_exp)
369+ {
370+ const double eps = 1e-4 ;
371+
372+ if (key_exp < eps)
373+ m_key_zipf_exp = 0 .;
374+ else if (fabs (key_exp - 1 ) < eps)
375+ m_key_zipf_exp = 1 .;
376+ else
377+ m_key_zipf_exp = key_exp;
378+
379+ if (m_key_min == 0 )
380+ m_key_zipf_min = 1 ;
381+ else
382+ m_key_zipf_min = m_key_min;
383+
384+ if (m_key_max <= m_key_zipf_min)
385+ m_key_zipf_max = m_key_zipf_min;
386+ else
387+ m_key_zipf_max = m_key_max;
388+
389+ if (m_key_zipf_exp < eps)
390+ return ; // degenerated to uniform distribution
391+ else if (fabs (key_exp - 1 ) < eps) {
392+ m_key_zipf_Hmin = log (m_key_zipf_min + 0.5 ) - 1 . / m_key_zipf_min;
393+ m_key_zipf_Hmax = log (m_key_zipf_max + 0.5 );
394+ double t = log (m_key_zipf_min + 1.5 ) - 1 . / (m_key_zipf_min + 1 );
395+ m_key_zipf_s = m_key_zipf_min + 1 - exp (t);
396+ } else {
397+ m_key_zipf_1mexp = 1 . - m_key_zipf_exp;
398+ m_key_zipf_1mexpInv = 1 . / m_key_zipf_1mexp;
399+ m_key_zipf_Hmin = pow (m_key_zipf_min + 0.5 , m_key_zipf_1mexp) -
400+ m_key_zipf_1mexp * pow (m_key_zipf_min, -m_key_zipf_exp);
401+ m_key_zipf_Hmax = pow (m_key_zipf_max + 0.5 , m_key_zipf_1mexp);
402+ double t = pow (m_key_zipf_min + 1.5 , m_key_zipf_1mexp) -
403+ m_key_zipf_1mexp * pow (m_key_zipf_min + 1 , -m_key_zipf_exp);
404+ m_key_zipf_s = m_key_zipf_min + 1 - pow (t, m_key_zipf_1mexpInv);
405+ }
406+ }
407+
351408// return a random number between r_min and r_max
352409unsigned long long object_generator::random_range (unsigned long long r_min, unsigned long long r_max)
353410{
@@ -361,15 +418,62 @@ unsigned long long object_generator::normal_distribution(unsigned long long r_mi
361418 return m_random.gaussian_distribution_range (r_stddev, r_median, r_min, r_max);
362419}
363420
421+ // following sampler is based on:
422+ // Rejection-inversion to generate variates from monotone discrete distributions
423+ // ACM Transactions on Modeling and Computer Simulation.
424+ // Volume 6 Issue 3 July 1996 pp 169–184
425+ // https://doi.org/10.1145/235025.235029
426+ unsigned long long object_generator::zipf_distribution ()
427+ {
428+ const double eps = 1e-4 ;
429+
430+ if (m_key_zipf_exp < eps)
431+ return random_range (m_key_zipf_min, m_key_zipf_max);
432+ else if (fabs (m_key_zipf_exp - 1.0 ) < eps) {
433+ while (true ) {
434+ double p = m_random.get_random () / (double )(m_random.get_random_max ());
435+ double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
436+ double x = exp (u);
437+ if (x < m_key_zipf_min - 0.5 )
438+ x = m_key_zipf_min + 0.5 ;
439+ if (x >= m_key_zipf_max + 0.5 )
440+ x = m_key_zipf_max;
441+ double k = floor (x + 0.5 );
442+ if (k - x <= m_key_zipf_s)
443+ return k;
444+ if (u > log (k + 0.5 ) - 1 . / k)
445+ return k;
446+ }
447+ } else {
448+ while (true ) {
449+ double p = m_random.get_random () / (double )(m_random.get_random_max ());
450+ double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
451+ double x = pow (u, m_key_zipf_1mexpInv);
452+ if (x < m_key_zipf_min - 0.5 )
453+ x = m_key_zipf_min + 0.5 ;
454+ if (x >= m_key_zipf_max + 0.5 )
455+ x = m_key_zipf_max;
456+ double k = floor (x + 0.5 );
457+ if (k - x <= m_key_zipf_s)
458+ return k;
459+ double t = (u - pow (k + 0.5 , m_key_zipf_1mexp));
460+ if (m_key_zipf_1mexpInv * t > -pow (k, -m_key_zipf_exp))
461+ return k;
462+ }
463+ }
464+ }
465+
364466unsigned long long object_generator::get_key_index (int iter)
365467{
366- assert (iter < static_cast <int >(m_next_key.size ()) && iter >= OBJECT_GENERATOR_KEY_GAUSSIAN );
468+ assert (iter < static_cast <int >(m_next_key.size ()) && iter >= OBJECT_GENERATOR_KEY_ZIPFIAN );
367469
368470 unsigned long long k;
369471 if (iter==OBJECT_GENERATOR_KEY_RANDOM) {
370472 k = random_range (m_key_min, m_key_max);
371473 } else if (iter==OBJECT_GENERATOR_KEY_GAUSSIAN) {
372474 k = normal_distribution (m_key_min, m_key_max, m_key_stddev, m_key_median);
475+ } else if (iter == OBJECT_GENERATOR_KEY_ZIPFIAN) {
476+ k = zipf_distribution ();
373477 } else {
374478 if (m_next_key[iter] < m_key_min)
375479 m_next_key[iter] = m_key_min;
0 commit comments