Standardize output file naming

amas0 · amas0 · commit 02e627d478aa · 2025-11-06T13:15:43.000-05:00
diff --git a/cmdstanpy/stanfit/runset.py b/cmdstanpy/stanfit/runset.py
@@ -38,62 +38,59 @@ def __init__(
         self._args = args
         self._chains = chains
         self._one_process_per_chain = one_process_per_chain
-        if one_process_per_chain:
-            self._num_procs = chains
-        else:
-            self._num_procs = 1
+        self._num_procs = chains if one_process_per_chain else 1
         self._retcodes = [-1 for _ in range(self._num_procs)]
         self._timeout_flags = [False for _ in range(self._num_procs)]
         if chain_ids is None:
             chain_ids = [i + 1 for i in range(chains)]
         self._chain_ids = chain_ids
 
         if args.output_dir is not None:
-            self._output_dir = args.output_dir
-        else:
-            # make a per-run subdirectory of our master temp directory
-            self._output_dir = tempfile.mkdtemp(
-                prefix=args.model_name, dir=_TMPDIR
-            )
+            self._outdir = args.output_dir
+        else:  # make a per-run subdirectory of our master temp directory
+            self._outdir = tempfile.mkdtemp(prefix=args.model_name, dir=_TMPDIR)
 
         # output files prefix: ``<model_name>-<YYYYMMDDHHMM>_<chain_id>``
         self._base_outfile = (
             f'{args.model_name}-{datetime.now().strftime(time_fmt)}'
         )
-        # per-process outputs
-        self._stdout_files = [''] * self._num_procs
-        self._profile_files = [''] * self._num_procs  # optional
+        self._stdout_files, self._profile_files = [], []
+        self._csv_files, self._diagnostic_files = [], []
+
+        # per-process output files
         if one_process_per_chain:
-            for i in range(chains):
-                self._stdout_files[i] = self.file_path("-stdout.txt", id=i)
-                if args.save_profile:
-                    self._profile_files[i] = self.file_path(
-                        ".csv", extra="-profile", id=chain_ids[i]
-                    )
+            self._stdout_files = [
+                self.gen_file_name(".txt", extra="stdout", id=id)
+                for id in self._chain_ids
+            ]
+            if args.save_profile:
+                self._profile_files = [
+                    self.gen_file_name(".csv", extra="profile", id=id)
+                    for id in self._chain_ids
+                ]
         else:
-            self._stdout_files[0] = self.file_path("-stdout.txt")
+            self._stdout_files = [self.gen_file_name(".txt", extra="stdout")]
             if args.save_profile:
-                self._profile_files[0] = self.file_path(
-                    ".csv", extra="-profile"
-                )
+                self._profile_files = [
+                    self.gen_file_name(".csv", extra="profile")
+                ]
 
         # per-chain output files
-        self._csv_files: list[str] = [''] * chains
-        self._diagnostic_files = [''] * chains  # optional
-
         if chains == 1:
-            self._csv_files[0] = self.file_path(".csv")
+            self._csv_files = [self.gen_file_name(".csv")]
             if args.save_latent_dynamics:
-                self._diagnostic_files[0] = self.file_path(
-                    ".csv", extra="-diagnostic"
-                )
+                self._diagnostic_files = [
+                    self.gen_file_name(".csv", extra="diagnostic")
+                ]
         else:
-            for i in range(chains):
-                self._csv_files[i] = self.file_path(".csv", id=chain_ids[i])
-                if args.save_latent_dynamics:
-                    self._diagnostic_files[i] = self.file_path(
-                        ".csv", extra="-diagnostic", id=chain_ids[i]
-                    )
+            self._csv_files = [
+                self.gen_file_name(".csv", id=id) for id in self._chain_ids
+            ]
+            if args.save_latent_dynamics:
+                self._diagnostic_files = [
+                    self.gen_file_name(".csv", extra="diagnostic", id=id)
+                    for id in self._chain_ids
+                ]
 
     def __repr__(self) -> str:
         repr = 'RunSet: chains={}, chain_ids={}, num_processes={}'.format(
@@ -173,14 +170,14 @@ def cmd(self, idx: int) -> list[str]:
         else:
             return self._args.compose_command(
                 idx,
-                csv_file=self.file_path('.csv'),
+                csv_file=self.gen_file_name('.csv'),
                 diagnostic_file=(
-                    self.file_path(".csv", extra="-diagnostic")
+                    self.gen_file_name(".csv", extra="diagnostic")
                     if self._args.save_latent_dynamics
                     else None
                 ),
                 profile_file=(
-                    self.file_path(".csv", extra="-profile")
+                    self.gen_file_name(".csv", extra="profile")
                     if self._args.save_profile
                     else None
                 ),
@@ -216,16 +213,22 @@ def profile_files(self) -> list[str]:
         """List of paths to CmdStan profiler files."""
         return self._profile_files
 
-    # pylint: disable=invalid-name
-    def file_path(
+    def gen_file_name(
         self, suffix: str, *, extra: str = "", id: int | None = None
     ) -> str:
-        if id is not None:
-            suffix = f"_{id}{suffix}"
-        file = os.path.join(
-            self._output_dir, f"{self._base_outfile}{extra}{suffix}"
-        )
-        return file
+        """Generate a standard file name according to CmdStan output pattern"""
+        match (id, extra):
+            case (None, ""):
+                file = f"{self._base_outfile}{suffix}"
+            case (None, extra) if extra != "":
+                file = f"{self._base_outfile}_{extra}{suffix}"
+            case (id, ""):
+                file = f"{self._base_outfile}_{id}{suffix}"
+            case (id, extra) if extra != "":
+                file = f"{self._base_outfile}_{id}_{extra}{suffix}"
+            case _:
+                raise ValueError("Cannot construct valid file name")
+        return os.path.join(self._outdir, file)
 
     def _retcode(self, idx: int) -> int:
         """Get retcode for process[idx]."""
diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py
@@ -533,7 +533,7 @@ def test_serialization() -> None:
     fit1 = model.generate_quantities(data=jdata, previous_fit=fit_sampling)
 
     dumped = pickle.dumps(fit1)
-    shutil.rmtree(fit1.runset._output_dir)
+    shutil.rmtree(fit1.runset._outdir)
     fit2: CmdStanGQ[CmdStanMCMC] = pickle.loads(dumped)
     variables1 = fit1.stan_variables()
     variables2 = fit2.stan_variables()
diff --git a/test/test_optimize.py b/test/test_optimize.py
@@ -664,7 +664,7 @@ def test_serialization() -> None:
         history_size=5,
     )
     dumped = pickle.dumps(mle1)
-    shutil.rmtree(mle1.runset._output_dir)
+    shutil.rmtree(mle1.runset._outdir)
     mle2: CmdStanMLE = pickle.loads(dumped)
     np.testing.assert_array_equal(
         mle1.optimized_params_np, mle2.optimized_params_np
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -2135,7 +2135,7 @@ def test_serialization(stanfile: str = 'bernoulli.stan') -> None:
     )
     # Dump the result (which assembles draws) and delete the source files.
     dumped = pickle.dumps(bern_fit1)
-    shutil.rmtree(bern_fit1.runset._output_dir)
+    shutil.rmtree(bern_fit1.runset._outdir)
     # Load the serialized result and compare results.
     bern_fit2: CmdStanMCMC = pickle.loads(dumped)
     variables1 = bern_fit1.stan_variables()
diff --git a/test/test_variational.py b/test/test_variational.py
@@ -335,7 +335,7 @@ def test_serialization() -> None:
     model = CmdStanModel(stan_file=stan)
     variational1 = model.variational(algorithm='meanfield', seed=999999)
     dumped = pickle.dumps(variational1)
-    shutil.rmtree(variational1.runset._output_dir)
+    shutil.rmtree(variational1.runset._outdir)
     variational2: CmdStanVB = pickle.loads(dumped)
     np.testing.assert_array_equal(
         variational1.variational_sample, variational2.variational_sample

Original file line number	Diff line number	Diff line change
`@@ -664,7 +664,7 @@ def test_serialization() -> None:`
`664`	`664`	`history_size=5,`
`665`	`665`	`)`
`666`	`666`	`dumped = pickle.dumps(mle1)`
`667`		`- shutil.rmtree(mle1.runset._output_dir)`
	`667`	`+ shutil.rmtree(mle1.runset._outdir)`
`668`	`668`	`mle2: CmdStanMLE = pickle.loads(dumped)`
`669`	`669`	`np.testing.assert_array_equal(`
`670`	`670`	`mle1.optimized_params_np, mle2.optimized_params_np`
Original file line number	Diff line number	Diff line change
`@@ -2135,7 +2135,7 @@ def test_serialization(stanfile: str = 'bernoulli.stan') -> None:`
`2135`	`2135`	`)`
`2136`	`2136`	`# Dump the result (which assembles draws) and delete the source files.`
`2137`	`2137`	`dumped = pickle.dumps(bern_fit1)`
`2138`		`- shutil.rmtree(bern_fit1.runset._output_dir)`
	`2138`	`+ shutil.rmtree(bern_fit1.runset._outdir)`
`2139`	`2139`	`# Load the serialized result and compare results.`
`2140`	`2140`	`bern_fit2: CmdStanMCMC = pickle.loads(dumped)`
`2141`	`2141`	`variables1 = bern_fit1.stan_variables()`