Do not use global Numpy RNG (#461)

boomanaiden154 · web-flow · commit c450ded4adde · 2025-03-03T21:48:15.000-08:00
This patch switches from using the default numpy RNG to explicitly using a specific RNG. This is reccomended by numpy and identified by Ruff's NPY002 rule. This keeps things out of global state which makes reasoning about RNG a bit easier, although the code is a bit more verbose. Fixes #460.
diff --git a/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py b/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py
@@ -367,6 +367,8 @@ def __init__(
     self._explore_on_features = explore_on_features
     logging.info('Reward key in exploration worker: %s', self._reward_key)
 
+    self._rng = np.random.default_rng()
+
   def compile_module(
       self,
       policy: Callable[[time_step.TimeStep | None], np.ndarray],
@@ -545,7 +547,7 @@ def explore_at_state_generator(
       distr_logits[replay_prefix[explore_step]] = -np.inf
       if all(-np.inf == logit for logit in distr_logits):
         break
-      replay_prefix[explore_step] = np.random.choice(
+      replay_prefix[explore_step] = self._rng.choice(
           range(distr_logits.shape[0]), p=scipy.special.softmax(distr_logits))
       base_policy = ExplorationWithPolicy(
           replay_prefix,
diff --git a/compiler_opt/tools/generate_vocab.py b/compiler_opt/tools/generate_vocab.py
@@ -120,12 +120,13 @@ def _parser_fn(serialized_proto):
   return _parser_fn
 
 
-def _generate_vocab(feature_values_arrays, feature_name):
+def _generate_vocab(feature_values_arrays, feature_name,
+                    rng: np.random.Generator):
   """Downsample and generate vocab using brute force method."""
   feature_values = np.concatenate(feature_values_arrays)
   sample_length = math.floor(
       np.shape(feature_values)[0] * FLAGS.sampling_fraction)
-  values = np.random.choice(feature_values, sample_length, replace=False)
+  values = rng.choice(feature_values, sample_length, replace=False)
   bin_edges = np.quantile(values, np.linspace(0, 1, FLAGS.num_buckets))
   filename = os.path.join(FLAGS.output_dir, f'{feature_name}.buckets')
   with open(filename, 'w', encoding='utf-8') as f:
@@ -168,14 +169,13 @@ def main(_) -> None:
   dataset = dataset.map(parser_fn, num_parallel_calls=tf.data.AUTOTUNE)
   data_list = np.array(list(dataset.as_numpy_iterator()), dtype=object)
   data_list = data_list.swapaxes(0, 1)
+  rng = np.random.default_rng()
 
   with mp.Pool(FLAGS.parallelism) as pool:
     feature_names = sorted(sequence_features)
     for i, feature_values_arrays in enumerate(data_list):
-      pool.apply_async(_generate_vocab, (
-          feature_values_arrays,
-          feature_names[i],
-      ))
+      pool.apply_async(_generate_vocab,
+                       (feature_values_arrays, feature_names[i], rng))
     pool.close()
     pool.join()
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.ruff]
 line-length = 103
-lint.select = [ "C40", "C9", "E", "F", "PERF", "UP", "W", "YTT" ]
+lint.select = [ "C40", "C9", "E", "F", "PERF", "UP", "W", "YTT", "NPY", "PD" ]
 lint.ignore = [ "E722", "E731", "F401", "PERF203" ]
 lint.mccabe.max-complexity = 18
 target-version = "py310"