Remove functionality and tests for parsing metric info from CSV

amas0 · amas0 · commit 4502aefc9d68 · 2025-12-02T15:27:39.000-05:00
diff --git a/cmdstanpy/utils/stancsv.py b/cmdstanpy/utils/stancsv.py
@@ -103,44 +103,6 @@ def csv_bytes_list_to_numpy(
     return out
 
 
-def parse_hmc_adaptation_lines(
-    comment_lines: list[bytes],
-) -> tuple[float | None, npt.NDArray[np.float64] | None]:
-    """Extracts step size/mass matrix information from the Stan CSV comment
-    lines by parsing the adaptation section. If the diag_e metric is used,
-    the returned mass matrix will be a 1D array of the diagnoal elements,
-    if the dense_e metric is used, it will be a 2D array representing the
-    entire matrix, and if unit_e is used then None will be returned.
-
-    Returns a (step_size, mass_matrix) tuple"""
-    step_size, mass_matrix = None, None
-
-    cleaned_lines = (ln.lstrip(b"# ") for ln in comment_lines)
-    in_matrix_block = False
-    diag_e_metric = False
-    matrix_lines = []
-    for line in cleaned_lines:
-        if in_matrix_block and line.strip():
-            # Stop when we get to timing block
-            if line.startswith(b"Elapsed Time"):
-                break
-            matrix_lines.append(line)
-        elif line.startswith(b"Step size"):
-            _, ss_str = line.split(b" = ")
-            step_size = float(ss_str)
-        elif line.startswith(b"Diagonal") or line.startswith(b"Elements"):
-            in_matrix_block = True
-        elif line.startswith(b"No free"):
-            break
-        elif b"diag_e" in line:
-            diag_e_metric = True
-    if matrix_lines:
-        mass_matrix = csv_bytes_list_to_numpy(matrix_lines)
-        if diag_e_metric and mass_matrix.shape[0] == 1:
-            mass_matrix = mass_matrix[0]
-    return step_size, mass_matrix
-
-
 def extract_key_val_pairs(
     comment_lines: list[bytes], remove_default_text: bool = True
 ) -> Iterator[tuple[str, str]]:
@@ -346,67 +308,6 @@ def column_count(ln: bytes) -> int:
             )
 
 
-def raise_on_invalid_adaptation_block(comment_lines: list[bytes]) -> None:
-    """Throws ValueErrors if the parsed adaptation block is invalid, e.g.
-    the metric information is not present, consistent with the rest of
-    the file, or the step size info cannot be processed."""
-
-    def column_count(ln: bytes) -> int:
-        return ln.count(b",") + 1
-
-    ln_iter = enumerate(comment_lines, start=2)
-    metric = None
-    for _, line in ln_iter:
-        if b"metric =" in line:
-            _, val = line.split(b" = ")
-            metric = val.replace(b"(Default)", b"").strip().decode()
-        if b"Adaptation terminated" in line:
-            break
-    else:  # No adaptation block found
-        raise ValueError("No adaptation block found, expecting metric")
-
-    if metric is None:
-        raise ValueError("No reported metric found")
-    # At this point iterator should be in the adaptation block
-
-    # Ensure step size exists and is valid float
-    num, line = next(ln_iter)
-    if not line.startswith(b"# Step size"):
-        raise ValueError(
-            f"line {num}: expecting step size, found:\n\t \"{line.decode()}\""
-        )
-    _, step_size = line.split(b" = ")
-    try:
-        float(step_size.strip())
-    except ValueError as exc:
-        raise ValueError(
-            f"line {num}: invalid step size: {step_size.decode()}"
-        ) from exc
-
-    # Ensure mass matrix valid
-    num, line = next(ln_iter)
-    if metric == "unit_e":
-        return
-    if not (
-        (metric == "diag_e" and line.startswith(b"# Diagonal elements of "))
-        or (metric == "dense_e" and line.startswith(b"# Elements of inverse"))
-    ):
-        raise ValueError(
-            f"line {num}: invalid or missing mass matrix specification"
-        )
-
-    # Validating mass matrix shape
-    _, line = next(ln_iter)
-    num_unconstrained_params = column_count(line)
-    if metric == "diag_e":
-        return
-    for (num, line), _ in zip(ln_iter, range(1, num_unconstrained_params)):
-        if column_count(line) != num_unconstrained_params:
-            raise ValueError(
-                f"line {num}: invalid or missing mass matrix specification"
-            )
-
-
 def parse_timing_lines(
     comment_lines: list[bytes],
 ) -> dict[str, float]:
@@ -489,7 +390,6 @@ def parse_sampler_metadata_from_csv(
             and header
             and not is_sneaky_fixed_param(header)
         ):
-            raise_on_invalid_adaptation_block(comments)
             max_depth: int = config["max_depth"]  # type: ignore
             max_tree_hits, divs = extract_max_treedepth_and_divergence_counts(
                 header, draws, max_depth, num_warmup
diff --git a/test/test_stancsv.py b/test/test_stancsv.py
@@ -105,75 +105,6 @@ def test_parse_comments_header_and_draws() -> None:
     assert draws_lines == [b"3\n"]
 
 
-def test_parsing_adaptation_lines() -> None:
-    lines = [
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.787025\n",
-        b"# Diagonal elements of inverse mass matrix:\n",
-        b"# 1\n",
-        b"# Elapsed Time\n",
-    ]
-    step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
-    assert step_size == 0.787025
-    print(mass_matrix)
-    assert mass_matrix == 1
-
-
-def test_parsing_adaptation_lines_diagonal() -> None:
-    lines = [
-        b"diag_e",  # Will be present in the Stan CSV config
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.787025\n",
-        b"# Diagonal elements of inverse mass matrix:\n",
-        b"# 1,2,3\n",
-    ]
-    step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
-    assert step_size == 0.787025
-    assert mass_matrix is not None
-    assert np.array_equal(mass_matrix, np.array([1, 2, 3]))
-
-
-def test_parsing_adaptation_lines_dense() -> None:
-    lines = [
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.775147\n",
-        b"# Elements of inverse mass matrix:\n",
-        b"# 2.84091, 0.230843, 0.0509365\n",
-        b"# 0.230843, 3.92459, 0.126989\n",
-        b"# 0.0509365, 0.126989, 3.82718\n",
-    ]
-    step_size, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
-    expected = np.array(
-        [
-            [2.84091, 0.230843, 0.0509365],
-            [0.230843, 3.92459, 0.126989],
-            [0.0509365, 0.126989, 3.82718],
-        ],
-        dtype=np.float64,
-    )
-    assert step_size == 0.775147
-    assert mass_matrix is not None
-    assert np.array_equal(mass_matrix, expected)
-
-
-def test_parsing_adaptation_lines_missing_everything() -> None:
-    lines = [
-        b"# Adaptation terminated\n",
-        b"# Elements of inverse mass matrix:\n",
-    ]
-    assert stancsv.parse_hmc_adaptation_lines(lines) == (None, None)
-
-
-def test_parsing_adaptation_lines_no_free_params() -> None:
-    lines = [
-        b"# Adaptation terminated\n",
-        b"# Step size = 1.77497\n",
-        b"# No free parameters for unit metric\n",
-    ]
-    _, mass_matrix = stancsv.parse_hmc_adaptation_lines(lines)
-    assert mass_matrix is None
-
-
 def test_csv_polars_and_numpy_equiv() -> None:
     lines = [
         b"-6.76206,1,0.787025,1,1,0,6.81411,0.229458\n",
@@ -512,138 +443,6 @@ def test_inconsistent_draws_shape_empty() -> None:
     stancsv.raise_on_inconsistent_draws_shape("", [])
 
 
-def test_invalid_adaptation_block_good() -> None:
-    csv_path = os.path.join(DATAFILES_PATH, "bernoulli_output_1.csv")
-    comments, *_ = stancsv.parse_comments_header_and_draws(csv_path)
-    stancsv.raise_on_invalid_adaptation_block(comments)
-
-
-def test_invalid_adaptation_block_missing() -> None:
-    lines = [
-        b"#         metric = diag_e (Default)\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"-6.76206,1,0.787025,1,1,0,6.81411,0.229458\n",
-        b"# \n",
-        b"#  Elapsed Time: 0.001332 seconds (Warm-up)\n",
-    ]
-    with pytest.raises(ValueError, match="expecting metric"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_no_metric() -> None:
-    lines = [
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.787025\n",
-        b"# Diagonal elements of inverse mass matrix:\n",
-        b"# 1\n",
-    ]
-    with pytest.raises(ValueError, match="No reported metric"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_invalid_step_size() -> None:
-    lines = [
-        b"#         metric = diag_e (Default)\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = bad\n",
-        b"# Diagonal elements of inverse mass matrix:\n",
-        b"# 1\n",
-    ]
-    with pytest.raises(ValueError, match="invalid step size"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_mismatched_structure() -> None:
-    lines = [
-        b"#         metric = diag_e (Default)\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.787025\n",
-        b"# Elements of inverse mass matrix:\n",
-        b"# 1\n",
-    ]
-    with pytest.raises(ValueError, match="invalid or missing"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_missing_step_size() -> None:
-    lines = [
-        b"#         metric = diag_e (Default)\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Diagonal elements of inverse mass matrix:\n",
-        b"# 1\n",
-    ]
-    with pytest.raises(ValueError, match="expecting step size"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_unit_e() -> None:
-    lines = [
-        b"#         metric = unit_e\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = 1.77497\n",
-        b"# No free parameters for unit metric\n",
-    ]
-    stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_dense_e_valid() -> None:
-    lines = [
-        b"#         metric = dense_e\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta.1,theta.2,theta.3\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.775147\n",
-        b"# Elements of inverse mass matrix:\n",
-        b"# 2.84091, 0.230843, 0.0509365\n",
-        b"# 0.230843, 3.92459, 0.126989\n",
-        b"# 0.0509365, 0.126989, 3.82718\n",
-    ]
-    stancsv.raise_on_invalid_adaptation_block(lines)
-
-
-def test_invalid_adaptation_block_dense_e_invalid() -> None:
-    lines = [
-        b"#         metric = dense_e\n",
-        (
-            b"lp__,accept_stat__,stepsize__,treedepth__,"
-            b"n_leapfrog__,divergent__,energy__,theta.1,theta.2,theta.3\n"
-        ),
-        b"# Adaptation terminated\n",
-        b"# Step size = 0.775147\n",
-        b"# Elements of inverse mass matrix:\n",
-        b"# 2.84091, 0.230843, 0.0509365\n",
-        b"# 2.84091, 0.230843\n",
-        b"# 0.230843, 3.92459\n",
-    ]
-    with pytest.raises(ValueError, match="invalid or missing"):
-        stancsv.raise_on_invalid_adaptation_block(lines)
-
-
 def test_parsing_timing_lines() -> None:
     lines = [
         b"# \n",
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -337,34 +337,6 @@ def test_check_sampler_csv_4() -> None:
         check_sampler_csv(csv_bad)
 
 
-def test_check_sampler_csv_metric_1() -> None:
-    csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_1.csv')
-    with raises_nested(Exception, 'expecting metric'):
-        check_sampler_csv(csv_bad)
-
-
-def test_check_sampler_csv_metric_2() -> None:
-    csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_2.csv')
-    with raises_nested(Exception, 'invalid step size'):
-        check_sampler_csv(csv_bad)
-
-
-def test_check_sampler_csv_metric_3() -> None:
-    csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_3.csv')
-    with raises_nested(
-        Exception, 'invalid or missing mass matrix specification'
-    ):
-        check_sampler_csv(csv_bad)
-
-
-def test_check_sampler_csv_metric_4() -> None:
-    csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_4.csv')
-    with raises_nested(
-        Exception, 'invalid or missing mass matrix specification'
-    ):
-        check_sampler_csv(csv_bad)
-
-
 def test_check_sampler_csv_thin() -> None:
     stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan')
     bern_model = CmdStanModel(stan_file=stan)