Skip to content

Commit 4502aef

Browse files
committed
Remove functionality and tests for parsing metric info from CSV
1 parent 29ddbfd commit 4502aef

File tree

3 files changed

+0
-329
lines changed

3 files changed

+0
-329
lines changed

cmdstanpy/utils/stancsv.py

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -103,44 +103,6 @@ def csv_bytes_list_to_numpy(
103103
return out
104104

105105

106-
def parse_hmc_adaptation_lines(
107-
comment_lines: list[bytes],
108-
) -> tuple[float | None, npt.NDArray[np.float64] | None]:
109-
"""Extracts step size/mass matrix information from the Stan CSV comment
110-
lines by parsing the adaptation section. If the diag_e metric is used,
111-
the returned mass matrix will be a 1D array of the diagnoal elements,
112-
if the dense_e metric is used, it will be a 2D array representing the
113-
entire matrix, and if unit_e is used then None will be returned.
114-
115-
Returns a (step_size, mass_matrix) tuple"""
116-
step_size, mass_matrix = None, None
117-
118-
cleaned_lines = (ln.lstrip(b"# ") for ln in comment_lines)
119-
in_matrix_block = False
120-
diag_e_metric = False
121-
matrix_lines = []
122-
for line in cleaned_lines:
123-
if in_matrix_block and line.strip():
124-
# Stop when we get to timing block
125-
if line.startswith(b"Elapsed Time"):
126-
break
127-
matrix_lines.append(line)
128-
elif line.startswith(b"Step size"):
129-
_, ss_str = line.split(b" = ")
130-
step_size = float(ss_str)
131-
elif line.startswith(b"Diagonal") or line.startswith(b"Elements"):
132-
in_matrix_block = True
133-
elif line.startswith(b"No free"):
134-
break
135-
elif b"diag_e" in line:
136-
diag_e_metric = True
137-
if matrix_lines:
138-
mass_matrix = csv_bytes_list_to_numpy(matrix_lines)
139-
if diag_e_metric and mass_matrix.shape[0] == 1:
140-
mass_matrix = mass_matrix[0]
141-
return step_size, mass_matrix
142-
143-
144106
def extract_key_val_pairs(
145107
comment_lines: list[bytes], remove_default_text: bool = True
146108
) -> Iterator[tuple[str, str]]:
@@ -346,67 +308,6 @@ def column_count(ln: bytes) -> int:
346308
)
347309

348310

349-
def raise_on_invalid_adaptation_block(comment_lines: list[bytes]) -> None:
350-
"""Throws ValueErrors if the parsed adaptation block is invalid, e.g.
351-
the metric information is not present, consistent with the rest of
352-
the file, or the step size info cannot be processed."""
353-
354-
def column_count(ln: bytes) -> int:
355-
return ln.count(b",") + 1
356-
357-
ln_iter = enumerate(comment_lines, start=2)
358-
metric = None
359-
for _, line in ln_iter:
360-
if b"metric =" in line:
361-
_, val = line.split(b" = ")
362-
metric = val.replace(b"(Default)", b"").strip().decode()
363-
if b"Adaptation terminated" in line:
364-
break
365-
else: # No adaptation block found
366-
raise ValueError("No adaptation block found, expecting metric")
367-
368-
if metric is None:
369-
raise ValueError("No reported metric found")
370-
# At this point iterator should be in the adaptation block
371-
372-
# Ensure step size exists and is valid float
373-
num, line = next(ln_iter)
374-
if not line.startswith(b"# Step size"):
375-
raise ValueError(
376-
f"line {num}: expecting step size, found:\n\t \"{line.decode()}\""
377-
)
378-
_, step_size = line.split(b" = ")
379-
try:
380-
float(step_size.strip())
381-
except ValueError as exc:
382-
raise ValueError(
383-
f"line {num}: invalid step size: {step_size.decode()}"
384-
) from exc
385-
386-
# Ensure mass matrix valid
387-
num, line = next(ln_iter)
388-
if metric == "unit_e":
389-
return
390-
if not (
391-
(metric == "diag_e" and line.startswith(b"# Diagonal elements of "))
392-
or (metric == "dense_e" and line.startswith(b"# Elements of inverse"))
393-
):
394-
raise ValueError(
395-
f"line {num}: invalid or missing mass matrix specification"
396-
)
397-
398-
# Validating mass matrix shape
399-
_, line = next(ln_iter)
400-
num_unconstrained_params = column_count(line)
401-
if metric == "diag_e":
402-
return
403-
for (num, line), _ in zip(ln_iter, range(1, num_unconstrained_params)):
404-
if column_count(line) != num_unconstrained_params:
405-
raise ValueError(
406-
f"line {num}: invalid or missing mass matrix specification"
407-
)
408-
409-
410311
def parse_timing_lines(
411312
comment_lines: list[bytes],
412313
) -> dict[str, float]:
@@ -489,7 +390,6 @@ def parse_sampler_metadata_from_csv(
489390
and header
490391
and not is_sneaky_fixed_param(header)
491392
):
492-
raise_on_invalid_adaptation_block(comments)
493393
max_depth: int = config["max_depth"] # type: ignore
494394
max_tree_hits, divs = extract_max_treedepth_and_divergence_counts(
495395
header, draws, max_depth, num_warmup

test/test_stancsv.py

Lines changed: 0 additions & 201 deletions
Original file line numberDiff line numberDiff line change
@@ -105,75 +105,6 @@ def test_parse_comments_header_and_draws() -> None:
105105
assert draws_lines == [b"3\n"]
106106

107107

108-
def test_parsing_adaptation_lines() -> None:
109-
lines = [
110-
b"# Adaptation terminated\n",
111-
b"# Step size = 0.787025\n",
112-
b"# Diagonal elements of inverse mass matrix:\n",
113-
b"# 1\n",
114-
b"# Elapsed Time\n",
115-
]
116-
step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
117-
assert step_size == 0.787025
118-
print(mass_matrix)
119-
assert mass_matrix == 1
120-
121-
122-
def test_parsing_adaptation_lines_diagonal() -> None:
123-
lines = [
124-
b"diag_e", # Will be present in the Stan CSV config
125-
b"# Adaptation terminated\n",
126-
b"# Step size = 0.787025\n",
127-
b"# Diagonal elements of inverse mass matrix:\n",
128-
b"# 1,2,3\n",
129-
]
130-
step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
131-
assert step_size == 0.787025
132-
assert mass_matrix is not None
133-
assert np.array_equal(mass_matrix, np.array([1, 2, 3]))
134-
135-
136-
def test_parsing_adaptation_lines_dense() -> None:
137-
lines = [
138-
b"# Adaptation terminated\n",
139-
b"# Step size = 0.775147\n",
140-
b"# Elements of inverse mass matrix:\n",
141-
b"# 2.84091, 0.230843, 0.0509365\n",
142-
b"# 0.230843, 3.92459, 0.126989\n",
143-
b"# 0.0509365, 0.126989, 3.82718\n",
144-
]
145-
step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
146-
expected = np.array(
147-
[
148-
[2.84091, 0.230843, 0.0509365],
149-
[0.230843, 3.92459, 0.126989],
150-
[0.0509365, 0.126989, 3.82718],
151-
],
152-
dtype=np.float64,
153-
)
154-
assert step_size == 0.775147
155-
assert mass_matrix is not None
156-
assert np.array_equal(mass_matrix, expected)
157-
158-
159-
def test_parsing_adaptation_lines_missing_everything() -> None:
160-
lines = [
161-
b"# Adaptation terminated\n",
162-
b"# Elements of inverse mass matrix:\n",
163-
]
164-
assert stancsv.parse_hmc_adaptation_lines(lines) == (None, None)
165-
166-
167-
def test_parsing_adaptation_lines_no_free_params() -> None:
168-
lines = [
169-
b"# Adaptation terminated\n",
170-
b"# Step size = 1.77497\n",
171-
b"# No free parameters for unit metric\n",
172-
]
173-
_, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
174-
assert mass_matrix is None
175-
176-
177108
def test_csv_polars_and_numpy_equiv() -> None:
178109
lines = [
179110
b"-6.76206,1,0.787025,1,1,0,6.81411,0.229458\n",
@@ -512,138 +443,6 @@ def test_inconsistent_draws_shape_empty() -> None:
512443
stancsv.raise_on_inconsistent_draws_shape("", [])
513444

514445

515-
def test_invalid_adaptation_block_good() -> None:
516-
csv_path = os.path.join(DATAFILES_PATH, "bernoulli_output_1.csv")
517-
comments, *_ = stancsv.parse_comments_header_and_draws(csv_path)
518-
stancsv.raise_on_invalid_adaptation_block(comments)
519-
520-
521-
def test_invalid_adaptation_block_missing() -> None:
522-
lines = [
523-
b"# metric = diag_e (Default)\n",
524-
(
525-
b"lp__,accept_stat__,stepsize__,treedepth__,"
526-
b"n_leapfrog__,divergent__,energy__,theta\n"
527-
),
528-
b"-6.76206,1,0.787025,1,1,0,6.81411,0.229458\n",
529-
b"# \n",
530-
b"# Elapsed Time: 0.001332 seconds (Warm-up)\n",
531-
]
532-
with pytest.raises(ValueError, match="expecting metric"):
533-
stancsv.raise_on_invalid_adaptation_block(lines)
534-
535-
536-
def test_invalid_adaptation_block_no_metric() -> None:
537-
lines = [
538-
(
539-
b"lp__,accept_stat__,stepsize__,treedepth__,"
540-
b"n_leapfrog__,divergent__,energy__,theta\n"
541-
),
542-
b"# Adaptation terminated\n",
543-
b"# Step size = 0.787025\n",
544-
b"# Diagonal elements of inverse mass matrix:\n",
545-
b"# 1\n",
546-
]
547-
with pytest.raises(ValueError, match="No reported metric"):
548-
stancsv.raise_on_invalid_adaptation_block(lines)
549-
550-
551-
def test_invalid_adaptation_block_invalid_step_size() -> None:
552-
lines = [
553-
b"# metric = diag_e (Default)\n",
554-
(
555-
b"lp__,accept_stat__,stepsize__,treedepth__,"
556-
b"n_leapfrog__,divergent__,energy__,theta\n"
557-
),
558-
b"# Adaptation terminated\n",
559-
b"# Step size = bad\n",
560-
b"# Diagonal elements of inverse mass matrix:\n",
561-
b"# 1\n",
562-
]
563-
with pytest.raises(ValueError, match="invalid step size"):
564-
stancsv.raise_on_invalid_adaptation_block(lines)
565-
566-
567-
def test_invalid_adaptation_block_mismatched_structure() -> None:
568-
lines = [
569-
b"# metric = diag_e (Default)\n",
570-
(
571-
b"lp__,accept_stat__,stepsize__,treedepth__,"
572-
b"n_leapfrog__,divergent__,energy__,theta\n"
573-
),
574-
b"# Adaptation terminated\n",
575-
b"# Step size = 0.787025\n",
576-
b"# Elements of inverse mass matrix:\n",
577-
b"# 1\n",
578-
]
579-
with pytest.raises(ValueError, match="invalid or missing"):
580-
stancsv.raise_on_invalid_adaptation_block(lines)
581-
582-
583-
def test_invalid_adaptation_block_missing_step_size() -> None:
584-
lines = [
585-
b"# metric = diag_e (Default)\n",
586-
(
587-
b"lp__,accept_stat__,stepsize__,treedepth__,"
588-
b"n_leapfrog__,divergent__,energy__,theta\n"
589-
),
590-
b"# Adaptation terminated\n",
591-
b"# Diagonal elements of inverse mass matrix:\n",
592-
b"# 1\n",
593-
]
594-
with pytest.raises(ValueError, match="expecting step size"):
595-
stancsv.raise_on_invalid_adaptation_block(lines)
596-
597-
598-
def test_invalid_adaptation_block_unit_e() -> None:
599-
lines = [
600-
b"# metric = unit_e\n",
601-
(
602-
b"lp__,accept_stat__,stepsize__,treedepth__,"
603-
b"n_leapfrog__,divergent__,energy__,theta\n"
604-
),
605-
b"# Adaptation terminated\n",
606-
b"# Step size = 1.77497\n",
607-
b"# No free parameters for unit metric\n",
608-
]
609-
stancsv.raise_on_invalid_adaptation_block(lines)
610-
611-
612-
def test_invalid_adaptation_block_dense_e_valid() -> None:
613-
lines = [
614-
b"# metric = dense_e\n",
615-
(
616-
b"lp__,accept_stat__,stepsize__,treedepth__,"
617-
b"n_leapfrog__,divergent__,energy__,theta.1,theta.2,theta.3\n"
618-
),
619-
b"# Adaptation terminated\n",
620-
b"# Step size = 0.775147\n",
621-
b"# Elements of inverse mass matrix:\n",
622-
b"# 2.84091, 0.230843, 0.0509365\n",
623-
b"# 0.230843, 3.92459, 0.126989\n",
624-
b"# 0.0509365, 0.126989, 3.82718\n",
625-
]
626-
stancsv.raise_on_invalid_adaptation_block(lines)
627-
628-
629-
def test_invalid_adaptation_block_dense_e_invalid() -> None:
630-
lines = [
631-
b"# metric = dense_e\n",
632-
(
633-
b"lp__,accept_stat__,stepsize__,treedepth__,"
634-
b"n_leapfrog__,divergent__,energy__,theta.1,theta.2,theta.3\n"
635-
),
636-
b"# Adaptation terminated\n",
637-
b"# Step size = 0.775147\n",
638-
b"# Elements of inverse mass matrix:\n",
639-
b"# 2.84091, 0.230843, 0.0509365\n",
640-
b"# 2.84091, 0.230843\n",
641-
b"# 0.230843, 3.92459\n",
642-
]
643-
with pytest.raises(ValueError, match="invalid or missing"):
644-
stancsv.raise_on_invalid_adaptation_block(lines)
645-
646-
647446
def test_parsing_timing_lines() -> None:
648447
lines = [
649448
b"# \n",

test/test_utils.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -337,34 +337,6 @@ def test_check_sampler_csv_4() -> None:
337337
check_sampler_csv(csv_bad)
338338

339339

340-
def test_check_sampler_csv_metric_1() -> None:
341-
csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_1.csv')
342-
with raises_nested(Exception, 'expecting metric'):
343-
check_sampler_csv(csv_bad)
344-
345-
346-
def test_check_sampler_csv_metric_2() -> None:
347-
csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_2.csv')
348-
with raises_nested(Exception, 'invalid step size'):
349-
check_sampler_csv(csv_bad)
350-
351-
352-
def test_check_sampler_csv_metric_3() -> None:
353-
csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_3.csv')
354-
with raises_nested(
355-
Exception, 'invalid or missing mass matrix specification'
356-
):
357-
check_sampler_csv(csv_bad)
358-
359-
360-
def test_check_sampler_csv_metric_4() -> None:
361-
csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_4.csv')
362-
with raises_nested(
363-
Exception, 'invalid or missing mass matrix specification'
364-
):
365-
check_sampler_csv(csv_bad)
366-
367-
368340
def test_check_sampler_csv_thin() -> None:
369341
stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan')
370342
bern_model = CmdStanModel(stan_file=stan)

0 commit comments

Comments
 (0)