From 9aa1d75d4c8a699bd4381a66ca2893ef3bf557bf Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Mon, 4 Aug 2025 09:23:53 +0200 Subject: [PATCH 1/2] chore: use DIST_ANME in docs - easier to read than NORMALIZED_DIST_NAME - addresses part of #986 --- docs/config.md | 2 +- docs/integrations.md | 8 ++++---- docs/overrides.md | 6 +++--- docs/usage.md | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/config.md b/docs/config.md index e4a77ef0..9a46f607 100644 --- a/docs/config.md +++ b/docs/config.md @@ -157,7 +157,7 @@ Callables or other Python objects have to be passed in `setup.py` (via the `use_ it is strongly recommended to use distribution-specific pretend versions (see below). -`SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${NORMALIZED_DIST_NAME}` +`SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${DIST_NAME}` : used as the primary source for the version number, in which case it will be an unparsed string. Specifying distribution-specific pretend versions will diff --git a/docs/integrations.md b/docs/integrations.md index e94a6ea0..69b18486 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -47,7 +47,7 @@ build: - export SETUPTOOLS_SCM_OVERRIDES_FOR_${READTHEDOCS_PROJECT//-/_}='{scm.git.pre_parse="fail_on_shallow"}' ``` -This configuration uses the `SETUPTOOLS_SCM_OVERRIDES_FOR_${NORMALIZED_DIST_NAME}` environment variable to override the `scm.git.pre_parse` setting specifically for your project when building on ReadTheDocs, forcing setuptools-scm to fail with a clear error if the repository is shallow. +This configuration uses the `SETUPTOOLS_SCM_OVERRIDES_FOR_${DIST_NAME}` environment variable to override the `scm.git.pre_parse` setting specifically for your project when building on ReadTheDocs, forcing setuptools-scm to fail with a clear error if the repository is shallow. ## CI/CD and Package Publishing @@ -67,7 +67,7 @@ These local version components (`+g1a2b3c4d5`, `+dirty`) prevent uploading to Py #### The Solution -Use the `SETUPTOOLS_SCM_OVERRIDES_FOR_${NORMALIZED_DIST_NAME}` environment variable to override the `local_scheme` to `no-local-version` when building for upload to PyPI. +Use the `SETUPTOOLS_SCM_OVERRIDES_FOR_${DIST_NAME}` environment variable to override the `local_scheme` to `no-local-version` when building for upload to PyPI. ### GitHub Actions Example @@ -287,9 +287,9 @@ publish-release: #### Environment Variable Format -The environment variable `SETUPTOOLS_SCM_OVERRIDES_FOR_${NORMALIZED_DIST_NAME}` must be set where: +The environment variable `SETUPTOOLS_SCM_OVERRIDES_FOR_${DIST_NAME}` must be set where: -1. **`${NORMALIZED_DIST_NAME}`** is your package name normalized according to PEP 503: +1. **`${DIST_NAME}`** is your package name normalized according to PEP 503: - Convert to uppercase - Replace hyphens and dots with underscores - Examples: `my-package` → `MY_PACKAGE`, `my.package` → `MY_PACKAGE` diff --git a/docs/overrides.md b/docs/overrides.md index 1a0f2d54..4d136db2 100644 --- a/docs/overrides.md +++ b/docs/overrides.md @@ -7,7 +7,7 @@ setuptools-scm provides a mechanism to override the version number build time. the environment variable `SETUPTOOLS_SCM_PRETEND_VERSION` is used as the override source for the version number unparsed string. -to be specific about the package this applies for, one can use `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${NORMALIZED_DIST_NAME}` +to be specific about the package this applies for, one can use `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${DIST_NAME}` where the dist name normalization follows adapted PEP 503 semantics. ## pretend metadata @@ -17,7 +17,7 @@ setuptools-scm provides a mechanism to override individual version metadata fiel The environment variable `SETUPTOOLS_SCM_PRETEND_METADATA` accepts a TOML inline table with field overrides for the ScmVersion object. -To be specific about the package this applies for, one can use `SETUPTOOLS_SCM_PRETEND_METADATA_FOR_${NORMALIZED_DIST_NAME}` +To be specific about the package this applies for, one can use `SETUPTOOLS_SCM_PRETEND_METADATA_FOR_${DIST_NAME}` where the dist name normalization follows adapted PEP 503 semantics. ### Supported fields @@ -82,7 +82,7 @@ export SETUPTOOLS_SCM_PRETEND_METADATA='{node="g1337beef", distance=4}' ## config overrides -setuptools-scm parses the environment variable `SETUPTOOLS_SCM_OVERRIDES_FOR_${NORMALIZED_DIST_NAME}` +setuptools-scm parses the environment variable `SETUPTOOLS_SCM_OVERRIDES_FOR_${DIST_NAME}` as a toml inline map to override the configuration data from `pyproject.toml`. ## subprocess timeouts diff --git a/docs/usage.md b/docs/usage.md index efa922cb..7938b4e6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,7 +221,7 @@ Note that running this Dockerfile requires docker with BuildKit enabled To avoid BuildKit and mounting of the .git folder altogether, one can also pass the desired version as a build argument. -Note that `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${NORMALIZED_DIST_NAME}` +Note that `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_${DIST_NAME}` is preferred over `SETUPTOOLS_SCM_PRETEND_VERSION`. From becaeb4c75552c8241dc861b085d0c91fb04d595 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Mon, 4 Aug 2025 10:20:31 +0200 Subject: [PATCH 2/2] hardend environment override finding - match non-normalized suffixes - show close matches on miss for typos --- src/setuptools_scm/_overrides.py | 140 +++++++++++++++++- testing/test_overrides.py | 246 +++++++++++++++++++++++++++++++ 2 files changed, 378 insertions(+), 8 deletions(-) create mode 100644 testing/test_overrides.py diff --git a/src/setuptools_scm/_overrides.py b/src/setuptools_scm/_overrides.py index 5621534f..4e06b7a7 100644 --- a/src/setuptools_scm/_overrides.py +++ b/src/setuptools_scm/_overrides.py @@ -2,9 +2,12 @@ import dataclasses import os -import re +from difflib import get_close_matches from typing import Any +from typing import Mapping + +from packaging.utils import canonicalize_name from . import _config from . import _log @@ -19,18 +22,139 @@ PRETEND_METADATA_KEY_NAMED = PRETEND_METADATA_KEY + "_FOR_{name}" +def _search_env_vars_with_prefix( + prefix: str, dist_name: str, env: Mapping[str, str] +) -> list[tuple[str, str]]: + """Search environment variables with a given prefix for potential dist name matches. + + Args: + prefix: The environment variable prefix (e.g., "SETUPTOOLS_SCM_PRETEND_VERSION_FOR_") + dist_name: The original dist name to match against + env: Environment dictionary to search in + + Returns: + List of (env_var_name, env_var_value) tuples for potential matches + """ + # Get the canonical name for comparison + canonical_dist_name = canonicalize_name(dist_name) + + matches = [] + for env_var, value in env.items(): + if env_var.startswith(prefix): + suffix = env_var[len(prefix) :] + # Normalize the suffix and compare to canonical dist name + try: + normalized_suffix = canonicalize_name(suffix.lower().replace("_", "-")) + if normalized_suffix == canonical_dist_name: + matches.append((env_var, value)) + except Exception: + # If normalization fails for any reason, skip this env var + continue + + return matches + + +def _find_close_env_var_matches( + prefix: str, expected_suffix: str, env: Mapping[str, str], threshold: float = 0.6 +) -> list[str]: + """Find environment variables with similar suffixes that might be typos. + + Args: + prefix: The environment variable prefix + expected_suffix: The expected suffix (canonicalized dist name in env var format) + env: Environment dictionary to search in + threshold: Similarity threshold for matches (0.0 to 1.0) + + Returns: + List of environment variable names that are close matches + """ + candidates = [] + for env_var in env: + if env_var.startswith(prefix): + suffix = env_var[len(prefix) :] + candidates.append(suffix) + + # Use difflib to find close matches + close_matches = get_close_matches( + expected_suffix, candidates, n=3, cutoff=threshold + ) + + return [f"{prefix}{match}" for match in close_matches if match != expected_suffix] + + def read_named_env( - *, tool: str = "SETUPTOOLS_SCM", name: str, dist_name: str | None + *, + tool: str = "SETUPTOOLS_SCM", + name: str, + dist_name: str | None, + env: Mapping[str, str] = os.environ, ) -> str | None: - """ """ + """Read a named environment variable, with fallback search for dist-specific variants. + + This function first tries the standard normalized environment variable name. + If that's not found and a dist_name is provided, it searches for alternative + normalizations and warns about potential issues. + + Args: + tool: The tool prefix (default: "SETUPTOOLS_SCM") + name: The environment variable name component + dist_name: The distribution name for dist-specific variables + env: Environment dictionary to search in (defaults to os.environ) + + Returns: + The environment variable value if found, None otherwise + """ + + # First try the generic version + generic_val = env.get(f"{tool}_{name}") + if dist_name is not None: - # Normalize the dist name as per PEP 503. - normalized_dist_name = re.sub(r"[-_.]+", "-", dist_name) - env_var_dist_name = normalized_dist_name.replace("-", "_").upper() - val = os.environ.get(f"{tool}_{name}_FOR_{env_var_dist_name}") + # Normalize the dist name using packaging.utils.canonicalize_name + canonical_dist_name = canonicalize_name(dist_name) + env_var_dist_name = canonical_dist_name.replace("-", "_").upper() + expected_env_var = f"{tool}_{name}_FOR_{env_var_dist_name}" + + # Try the standard normalized name first + val = env.get(expected_env_var) if val is not None: return val - return os.environ.get(f"{tool}_{name}") + + # If not found, search for alternative normalizations + prefix = f"{tool}_{name}_FOR_" + alternative_matches = _search_env_vars_with_prefix(prefix, dist_name, env) + + if alternative_matches: + # Found alternative matches - use the first one but warn + env_var, value = alternative_matches[0] + log.warning( + "Found environment variable '%s' for dist name '%s', " + "but expected '%s'. Consider using the standard normalized name.", + env_var, + dist_name, + expected_env_var, + ) + if len(alternative_matches) > 1: + other_vars = [var for var, _ in alternative_matches[1:]] + log.warning( + "Multiple alternative environment variables found: %s. Using '%s'.", + other_vars, + env_var, + ) + return value + + # No exact or alternative matches found - look for potential typos + close_matches = _find_close_env_var_matches(prefix, env_var_dist_name, env) + if close_matches: + log.warning( + "Environment variable '%s' not found for dist name '%s' " + "(canonicalized as '%s'). Did you mean one of these? %s", + expected_env_var, + dist_name, + canonical_dist_name, + close_matches, + ) + + return generic_val def _read_pretended_metadata_for( diff --git a/testing/test_overrides.py b/testing/test_overrides.py new file mode 100644 index 00000000..afba5339 --- /dev/null +++ b/testing/test_overrides.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +import logging + +import pytest + +from setuptools_scm._overrides import _find_close_env_var_matches +from setuptools_scm._overrides import _search_env_vars_with_prefix +from setuptools_scm._overrides import read_named_env + + +class TestSearchEnvVarsWithPrefix: + """Test the _search_env_vars_with_prefix helper function.""" + + def test_exact_match(self) -> None: + """Test finding exact normalized matches.""" + env = {"SETUPTOOLS_SCM_TEST_FOR_MY_PACKAGE": "value1"} + + matches = _search_env_vars_with_prefix( + "SETUPTOOLS_SCM_TEST_FOR_", "my-package", env + ) + + assert len(matches) == 1 + assert matches[0] == ("SETUPTOOLS_SCM_TEST_FOR_MY_PACKAGE", "value1") + + def test_multiple_normalizations(self) -> None: + """Test finding various normalization patterns.""" + # Set up different normalization patterns + env = { + "SETUPTOOLS_SCM_TEST_FOR_MY_AWESOME_PKG": "value1", + "SETUPTOOLS_SCM_TEST_FOR_MYAWESOMEPKG": "value2", + "SETUPTOOLS_SCM_TEST_FOR_MY_AWESOME-PKG": "value3", # duplicate + } + + matches = _search_env_vars_with_prefix( + "SETUPTOOLS_SCM_TEST_FOR_", "my-awesome.pkg", env + ) + + # Should find the variants that match our normalization patterns + assert len(matches) >= 1 + env_vars = [var for var, _ in matches] + assert "SETUPTOOLS_SCM_TEST_FOR_MY_AWESOME_PKG" in env_vars + + def test_no_matches(self) -> None: + """Test when no matches are found.""" + # Set up unrelated env vars + env = { + "OTHER_VAR": "value", + "SETUPTOOLS_SCM_OTHER_FOR_SOMETHING": "value", + } + + matches = _search_env_vars_with_prefix( + "SETUPTOOLS_SCM_TEST_FOR_", "nonexistent", env + ) + + assert len(matches) == 0 + + def test_case_variations(self) -> None: + """Test that case variations are handled.""" + env = {"SETUPTOOLS_SCM_TEST_FOR_MYPACKAGE": "value1"} + + matches = _search_env_vars_with_prefix( + "SETUPTOOLS_SCM_TEST_FOR_", "MyPackage", env + ) + + assert len(matches) == 1 + assert matches[0][1] == "value1" + + +class TestFindCloseEnvVarMatches: + """Test the _find_close_env_var_matches helper function.""" + + def test_close_matches(self) -> None: + """Test finding close matches for potential typos.""" + env = { + "SETUPTOOLS_SCM_TEST_FOR_MY_PACKAG": "typo1", # missing 'e' + "SETUPTOOLS_SCM_TEST_FOR_MY_PAKAGE": "typo2", # 'c' -> 'k' + "SETUPTOOLS_SCM_TEST_FOR_OTHER_PKG": "unrelated", + } + + close_matches = _find_close_env_var_matches( + "SETUPTOOLS_SCM_TEST_FOR_", "MY_PACKAGE", env + ) + + # Should find the close matches but not the unrelated one + assert "SETUPTOOLS_SCM_TEST_FOR_MY_PACKAG" in close_matches + assert "SETUPTOOLS_SCM_TEST_FOR_MY_PAKAGE" in close_matches + assert "SETUPTOOLS_SCM_TEST_FOR_OTHER_PKG" not in close_matches + + def test_threshold(self) -> None: + """Test that threshold filtering works.""" + env = {"SETUPTOOLS_SCM_TEST_FOR_COMPLETELY_DIFFERENT": "unrelated"} + + close_matches = _find_close_env_var_matches( + "SETUPTOOLS_SCM_TEST_FOR_", "MY_PACKAGE", env, threshold=0.8 + ) + + # With high threshold, completely different string shouldn't match + assert len(close_matches) == 0 + + def test_no_close_matches(self) -> None: + """Test when no close matches exist.""" + env: dict[str, str] = {} + close_matches = _find_close_env_var_matches( + "SETUPTOOLS_SCM_TEST_FOR_", "MY_PACKAGE", env + ) + + assert len(close_matches) == 0 + + +class TestReadNamedEnvEnhanced: + """Test the enhanced read_named_env function.""" + + def test_standard_behavior_unchanged(self) -> None: + """Test that standard behavior still works.""" + # Generic env var + env = {"SETUPTOOLS_SCM_TEST": "generic_value"} + assert read_named_env(name="TEST", dist_name=None, env=env) == "generic_value" + + # Dist-specific env var (standard normalization) + env = {"SETUPTOOLS_SCM_TEST_FOR_MY_PACKAGE": "specific_value"} + assert ( + read_named_env(name="TEST", dist_name="my-package", env=env) + == "specific_value" + ) + + def test_alternative_normalization_found( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Test finding alternative normalizations with warnings.""" + # Set up an alternative normalization pattern (user uses dots instead of canonical hyphens) + env = {"SETUPTOOLS_SCM_TEST_FOR_MY.PACKAGE": "alt_value"} + + with caplog.at_level(logging.WARNING): + result = read_named_env(name="TEST", dist_name="my.package", env=env) + + assert result == "alt_value" + assert "Found environment variable" in caplog.text + assert "but expected" in caplog.text + + def test_multiple_alternatives_warning( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Test warning when multiple alternative normalizations exist.""" + # Set up multiple alternatives that represent the same canonical package name + # but use different normalizations in the env var + env = { + "SETUPTOOLS_SCM_TEST_FOR_MY.PACKAGE": "alt1", # dots instead of hyphens + "SETUPTOOLS_SCM_TEST_FOR_MY-PACKAGE": "alt2", # dashes instead of underscores + "SETUPTOOLS_SCM_TEST_FOR_my.package": "alt3", # lowercase + } + + with caplog.at_level(logging.WARNING): + result = read_named_env(name="TEST", dist_name="my.package", env=env) + + assert result in ["alt1", "alt2", "alt3"] # Should use one of them + assert "Multiple alternative environment variables found" in caplog.text + + def test_typo_suggestions(self, caplog: pytest.LogCaptureFixture) -> None: + """Test suggestions for potential typos.""" + # Set up a close but not exact match + env = {"SETUPTOOLS_SCM_TEST_FOR_MY_PACKAG": "typo_value"} + + with caplog.at_level(logging.WARNING): + result = read_named_env(name="TEST", dist_name="my-package", env=env) + + # Should return None (generic fallback) but warn about close matches + assert result is None + assert "Did you mean one of these?" in caplog.text + assert "SETUPTOOLS_SCM_TEST_FOR_MY_PACKAG" in caplog.text + + def test_fallback_to_generic(self) -> None: + """Test fallback to generic env var when dist-specific not found.""" + env = {"SETUPTOOLS_SCM_TEST": "generic_fallback"} + + result = read_named_env(name="TEST", dist_name="nonexistent-package", env=env) + + assert result == "generic_fallback" + + def test_no_generic_fallback(self) -> None: + """Test behavior when neither dist-specific nor generic env vars exist.""" + env: dict[str, str] = {} + result = read_named_env(name="TEST", dist_name="some-package", env=env) + + assert result is None + + def test_dist_specific_overrides_generic(self) -> None: + """Test that dist-specific env vars override generic ones.""" + env = { + "SETUPTOOLS_SCM_TEST": "generic", + "SETUPTOOLS_SCM_TEST_FOR_MY_PACKAGE": "specific", + } + + result = read_named_env(name="TEST", dist_name="my-package", env=env) + + assert result == "specific" + + def test_custom_tool_prefix(self) -> None: + """Test that custom tool prefixes work.""" + env = {"CUSTOM_TOOL_TEST_FOR_MY_PACKAGE": "custom_value"} + + result = read_named_env( + tool="CUSTOM_TOOL", name="TEST", dist_name="my-package", env=env + ) + + assert result == "custom_value" + + def test_complex_dist_name_normalization( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Test complex dist name normalization scenarios.""" + # User uses a non-canonical format (keeping underscores instead of canonical hyphens) + # The canonical form of "complex.dist-name_with.dots" is "complex-dist-name-with-dots" + # which becomes "COMPLEX_DIST_NAME_WITH_DOTS" as env var + # But user set it with mixed format: + env = {"SETUPTOOLS_SCM_TEST_FOR_COMPLEX.DIST_NAME_WITH.DOTS": "value"} + + with caplog.at_level(logging.WARNING): + result = read_named_env( + name="TEST", dist_name="complex.dist-name_with.dots", env=env + ) + + assert result == "value" + assert "Found environment variable" in caplog.text + + def test_lowercase_environment_variable( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """Test that lowercase environment variables are found as alternatives.""" + env = {"SETUPTOOLS_SCM_TEST_FOR_my.package": "lowercase_value"} + + with caplog.at_level(logging.WARNING): + result = read_named_env(name="TEST", dist_name="my.package", env=env) + + assert result == "lowercase_value" + assert "Found environment variable" in caplog.text + assert "but expected" in caplog.text + + def test_edge_case_empty_dist_name(self) -> None: + """Test edge case with empty dist name.""" + env = {"SETUPTOOLS_SCM_TEST": "generic"} + + result = read_named_env(name="TEST", dist_name="", env=env) + + # Should still try dist-specific lookup but fall back to generic + assert result == "generic"