Skip to content

Commit ce46552

Browse files
author
Bartosz Kostrzewa
committed
hmc_tm: add real reproducible randum numbers mode which will keep the RNG state consistent across executions of different numbers of trajectories (say you have a serial run doing 1000 trajectories at a time in 10 goes and a parallel run doing 10000 trajectories in one run, with this mode, the two runs will still have exactly the same random numbers because the RNG state is saved to file between executions of hmc_tm)
1 parent 6dd5c7f commit ce46552

File tree

8 files changed

+150
-5
lines changed

8 files changed

+150
-5
lines changed

default_input_values.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@
5454
#define _default_g_beta 6.0
5555
#define _default_g_N_s 20
5656
#define _default_g_dflgcr_flag 0
57+
#define _default_save_ranlux_state 0
5758
#define _default_random_seed 123456
58-
#define _default_rlxd_level 1
59+
#define _default_rlxd_level 2
5960
#define _default_solver_flag 1
6061
#define _default_startoption 0
6162
#define _default_Ntherm 0
@@ -64,7 +65,8 @@
6465
#define _default_write_cp_flag 1
6566
#define _default_cp_interval 5
6667
#define _default_nstore 0
67-
#define _default_rlxd_input_filename "last_state"
68+
#define _default_rlxd_input_filename "rlxd_state"
69+
#define _default_rlxs_input_filename "rlxs_state"
6870
#define _default_gauge_input_filename "conf"
6971
#define _default_read_source_flag 0
7072
#define _default_source_filename "source"

doc/input.tex

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,41 @@ \subsection{Input parameter for main program}
7979
See {\ttfamily NrXProcs}.
8080

8181
\item {\ttfamily seed}:\\
82-
The seed for the random number generator. Default value is $123456$.
82+
The seed for the random number generator. Possible values: Integer
83+
$i \in \left[ 1, 2^{31}-1 \right] $ or {\ttfamily statefile}. Default value is $123456$.
84+
The seeds for different MPI processes are computed from this number XOR'ed
85+
with the store counter and a regular pattern according to the process number.
86+
When {\ttfamily statefile} is specified here, ranlux will be initialised
87+
from the state stored in the files {\ttfamily rlxd\_state} and {\ttfamily rlxs\_state}.
88+
Also, {\ttfamily hmc\_tm} will save the ranlux state at the end of each
89+
trajectory. Note that when using MPI, {\ttfamily seed = statefile} should only ever be used
90+
when in {\ttfamily ReproduceRandomNumbers} mode because all processes
91+
will be initialised with the same random number generator. Serial or OpenMP
92+
executions of {\ttfamily hmc\_tm} have only one random number generator and
93+
so this option allows running a simulation with a single chain of
94+
random numbers fully specified by the initial seed set for the first
95+
execution.
96+
97+
\item {\ttfamily SaveRanluxState}:\\
98+
Store the state of the random number generator of MPI process 0
99+
to the files {\ttfamily rlxd\_state} and {\ttfamily rlxs\_state}
100+
at the end of each trajectory. This is useful to make a fully reproducible
101+
chain of random numbers even across multiple executions of {\ttfamily hmc\_tm}.
102+
When using MPI, this only makes sense if {\ttfamily ReproduceRandomNumbers} is
103+
used. (see {\ttfamily seed})
104+
105+
\item {\ttfamily ReproduceRandomNumbers}:\\
106+
Possible values are {\ttfamily yes} or {\ttfamily no}, default is {\ttfamily yes}.
107+
When set to {\ttfamily yes}, when random numbers are requested the random
108+
number generators of all processes are set to the same state. Then each
109+
process generates random numbers for the whole volume but uses only those
110+
which belong to its local volume. This ensures that a parallelised run
111+
is equivalent to a serial one as far as the random numbers are concerned and
112+
is useful for testing purposes.
113+
This can also be extended by setting {\ttfamily seed = statefile }
114+
to produce a single chain of random numbers across multiple executions
115+
of the program (such as using {\ttfamily StartCondition = continue} and
116+
{\ttfamily InitialStoreCounter = readin}, even when using MPI.
83117

84118
\item {\ttfamily kappa}:\\
85119
The $\kappa$ value. Default is $0.12$. For the {\ttfamily hmc\_tm}

hmc_tm.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,15 @@ int main(int argc,char *argv[]) {
290290
#endif
291291

292292
/* Initialise random number generator */
293-
start_ranlux(rlxd_level, random_seed^trajectory_counter);
293+
/* if running in reproducible mode we can initialize ranlux using
294+
* a saved state */
295+
if(reproduce_randomnumber_flag && random_seed == -1) {
296+
start_ranlux_from_file(rlxd_input_filename,rlxs_input_filename);
297+
} else if (random_seed == -1) {
298+
fatal_error("Initializing RANLUX from file only works in reproducible random numbers mode. Aborting!","hmc_tm");
299+
} else {
300+
start_ranlux(rlxd_level, random_seed^trajectory_counter);
301+
}
294302

295303
/* Set up the gauge field */
296304
/* continue and restart */

read_input.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,15 @@ extern "C"
5353
extern int nstore;
5454
extern int crylov_space_dim;
5555
extern char rlxd_input_filename[500];
56+
extern char rlxs_input_filename[500];
5657
extern char gauge_input_filename[500];
5758
extern int subforwilson_flag;
5859
extern int eigenvalue_method_flag;
5960
extern int eigenvalue_max_iterations;
6061
extern double eigenvalue_precision;
6162
extern int index_start;
6263
extern int index_end;
64+
extern int save_ranlux_state;
6365
extern int random_seed;
6466
extern int rlxd_level;
6567
extern double X0, X1, X2, X3;

read_input.l

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ inline void rmQuotes(char *str){
116116
int nstore;
117117
int index_start, index_end;
118118
int random_seed;
119+
int save_ranlux_state;
119120
int rlxd_level;
120121
char rlxd_input_filename[500];
122+
char rlxs_input_filename[500];
121123
char gauge_input_filename[500];
122124
int read_source_flag;
123125
int return_check_flag, return_check_interval;
@@ -191,6 +193,7 @@ inline void rmQuotes(char *str){
191193
%x MU
192194
%x CSW
193195
%x SEED
196+
%x SAVERANLUXSTATE
194197
%x RLXDLEVEL
195198
%x NSAVE
196199
%x RLXDINPUTFILE
@@ -336,6 +339,7 @@ inline void rmQuotes(char *str){
336339
^NoEigenvalues{EQL} BEGIN(NOEV);
337340
^EigenvaluePrecision{EQL} BEGIN(PRECEV);
338341
^seed{EQL} BEGIN(SEED);
342+
^SaveRanluxState{EQL} BEGIN(SAVERANLUXSTATE);
339343
^StartCondition{EQL} BEGIN(STARTCOND);
340344
^ThermalisationSweeps{EQL} BEGIN(THERMSWEEPS);
341345
^Measurements{EQL} BEGIN(NMEAS);
@@ -1622,10 +1626,23 @@ inline void rmQuotes(char *str){
16221626
dfl_poly_iter=atoi(yytext);
16231627
if(myverbose!=0) printf("dfl_poly_iter = %s \n", yytext);
16241628
}
1629+
<SAVERANLUXSTATE>yes {
1630+
save_ranlux_state = 1;
1631+
if(myverbose!=0) printf("Save RANLUX state at end of trajectory.\n");
1632+
}
1633+
<SAVERANLUXSTATE>no {
1634+
save_ranlux_state = 0;
1635+
if(myverbose!=0) printf("Don't save RANLUX state at end of trajectory.\n");
1636+
}
16251637
<SEED>{DIGIT}+ {
16261638
random_seed=atoi(yytext);
16271639
if(myverbose!=0) printf("seed=%s \n", yytext);
16281640
}
1641+
<SEED>statefile {
1642+
random_seed=-1;
1643+
save_ranlux_state=1;
1644+
if(myverbose!=0) printf("seed=%s; Trying to read ranlux state from file! Saving RANLUX state at end of trajectory!\n", yytext);
1645+
}
16291646
<RLXDLEVEL>[12] {
16301647
rlxd_level = atoi(yytext);
16311648
if(myverbose!=0) printf("RanluxdLevel set to %d \n", rlxd_level);
@@ -2113,6 +2130,7 @@ int read_input(char * conf_file){
21132130
g_N_s = _default_g_N_s;
21142131
g_dflgcr_flag = _default_g_dflgcr_flag;
21152132
random_seed = _default_random_seed;
2133+
save_ranlux_state = _default_save_ranlux_state;
21162134
rlxd_level = _default_rlxd_level;
21172135
startoption = _default_startoption;
21182136
Ntherm = _default_Ntherm;
@@ -2122,6 +2140,7 @@ int read_input(char * conf_file){
21222140
cp_interval = _default_cp_interval;
21232141
nstore = _default_nstore;
21242142
strcpy(rlxd_input_filename, _default_rlxd_input_filename);
2143+
strcpy(rlxs_input_filename, _default_rlxs_input_filename);
21252144
strcpy(gauge_input_filename, _default_gauge_input_filename);
21262145
g_stdio_proc = _default_g_stdio_proc;
21272146
index_start = _default_index_start;

start.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
#include "ranlxd.h"
7878
#include "ranlxs.h"
7979
#include "start.h"
80+
#include "fatal_error.h"
8081

8182
static void gauss_vector(double v[],int n)
8283
{
@@ -838,6 +839,69 @@ void start_ranlux(int level, int seed)
838839
rlxd_init(level, loc_seed);
839840
}
840841

842+
/* read warning in start.h before using this function! */
843+
void start_ranlux_from_file(char * const rlxd_state_filename, char * const rlxs_state_filename) {
844+
FILE * rlxd_state_file;
845+
FILE * rlxs_state_file;
846+
847+
char error_message[1000];
848+
849+
rlxd_state_file = fopen(rlxd_state_filename,"r");
850+
rlxs_state_file = fopen(rlxs_state_filename,"r");
851+
852+
if(rlxd_state_file != NULL) {
853+
int rlxd_state[105];
854+
fread(rlxd_state, sizeof(rlxd_state), 1, rlxd_state_file);
855+
fclose(rlxd_state_file);
856+
rlxd_reset(rlxd_state);
857+
} else {
858+
snprintf(error_message,1000,"Problem reading RLXD state file \"%s\", aborting!",rlxd_state_filename);
859+
fatal_error(error_message,"start_ranlux_from_file");
860+
}
861+
862+
if(rlxs_state_file != NULL) {
863+
int rlxs_state[105];
864+
fread(rlxs_state, sizeof(rlxs_state), 1, rlxs_state_file);
865+
fclose(rlxs_state_file);
866+
rlxs_reset(rlxs_state);
867+
} else {
868+
snprintf(error_message,1000,"Problem reading RLXS state file \"%s\", aborting!",rlxs_state_filename);
869+
fatal_error(error_message,"start_ranlux_from_file");
870+
}
871+
872+
}
873+
874+
void store_ranlux_state(char * const rlxd_state_filename, char * const rlxs_state_filename) {
875+
FILE * rlxd_state_file;
876+
FILE * rlxs_state_file;
877+
878+
char error_message[1000];
879+
880+
rlxd_state_file = fopen(rlxd_state_filename,"w");
881+
rlxs_state_file = fopen(rlxs_state_filename,"w");
882+
883+
if(rlxd_state_file != NULL) {
884+
int rlxd_state[105];
885+
rlxd_get(rlxd_state);
886+
fwrite(rlxd_state, sizeof(rlxd_state), 1, rlxd_state_file);
887+
fclose(rlxd_state_file);
888+
} else {
889+
snprintf(error_message,1000,"Problem opening RLXD state file \"%s\" for writing, aborting!",rlxd_state_filename);
890+
fatal_error(error_message,"store_ranlux_state");
891+
}
892+
893+
if(rlxs_state_file != NULL) {
894+
int rlxs_state[105];
895+
rlxs_get(rlxs_state);
896+
fwrite(rlxs_state, sizeof(rlxs_state), 1, rlxs_state_file);
897+
fclose(rlxs_state_file);
898+
} else {
899+
snprintf(error_message,1000,"Problem opening RLXS state file \"%s\" for writing, aborting!",rlxs_state_filename);
900+
fatal_error(error_message,"store_ranlux_state");
901+
}
902+
903+
}
904+
841905
void gen_test_spinor_field(spinor * const k, const int eoflag) {
842906

843907
int ix,iy,effvol;

start.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,18 @@ void source_spinor_field_point_from_file(spinor * const P, spinor * const Q, int
6868

6969
void start_ranlux(int level,int seed);
7070

71+
/* This function allows initializing RANLUX from a saved state. IMPORTANT NOTE BELOW:
72+
* Because of the way that random numbers are used in tmLQCD, this function should only ever be
73+
* used in "reproducible random numbers" mode with the full understanding that all processes
74+
* will have exactly the same random number generators!!
75+
* The main routines in start.c all accomodate this by making every process generate random
76+
* numbers for the whole volume and only using those relevant for the local volume while throwing
77+
* all others away.
78+
* If some function requests random numbers via any of the utility functions or ranlx[d,s] directly
79+
* without taking this fact into account, all processes will generate the same ones! You have been warned. */
80+
void start_ranlux_from_file(char * const rlxd_state_filename, char * const rlxs_state_filename);
81+
void store_ranlux_state(char * const rlxd_state_filename, char * const rlxs_state_filename);
82+
7183
void gen_test_spinor_field(spinor * const k , const int eoflag);
7284
void write_test_spinor_field(spinor * const k , const int eoflag, char * postfix);
7385
#endif

update_tm.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
343343
#endif
344344
etime=gettime();
345345

346-
/* printing data in the .data file */
346+
/* printing data in the .data file and save ranlux state if the option is set */
347347
if(g_proc_id==0) {
348348
datafile = fopen(filename, "a");
349349
if (!bc_flag) { /* if Periodic Boundary Conditions */
@@ -370,6 +370,10 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
370370
fprintf(datafile, "\n");
371371
fflush(datafile);
372372
fclose(datafile);
373+
374+
if(save_ranlux_state) {
375+
store_ranlux_state(rlxd_input_filename, rlxs_input_filename);
376+
}
373377
}
374378
return(accept);
375379
}

0 commit comments

Comments
 (0)