Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20250823-155551.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fixes issue with package locking, where using environment variables would cause lockfile regeneration in different environments
time: 2025-08-23T15:55:51.916929-07:00
custom:
Author: ttusing
Issue: "11953"
14 changes: 14 additions & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,27 @@ class GitPackage(Package):
subdirectory: Optional[str] = None
unrendered: Dict[str, Any] = field(default_factory=dict)
name: Optional[str] = None
exclude_env_vars_from_hash: Optional[bool] = field(
default=None, metadata={"alias": "exclude-env-vars-from-hash"}
)

def get_revisions(self) -> List[str]:
if self.revision is None:
return []
else:
return [str(self.revision)]

def to_dict_for_hash(self) -> Dict[str, Any]:
"""Create a dict representation for hash calculation that can optionally exclude env vars"""
data = self.to_dict()
if self.exclude_env_vars_from_hash:
# Use unrendered git URL if available to exclude env vars from hash
if "git" in self.unrendered:
data["git"] = self.unrendered["git"]
# Remove the exclude-env-vars-from-hash flag itself from the hash
data.pop("exclude-env-vars-from-hash", None)
return data


@dataclass
class PrivatePackage(Package):
Expand Down
8 changes: 8 additions & 0 deletions core/dbt/events/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,14 @@ def message(self) -> str:
return f"Found duplicate package in packages.yml, removing: {self.removed_package}"


class DepsLockfileRegenerating(WarnLevel):
def code(self):
return "M034"

def message(self) -> str:
return f"Package lockfile is out of sync with packages.yml. Regenerating lockfile at: {self.lock_filepath}"


class DepsScrubbedPackageName(WarnLevel):
def code(self):
return "M035"
Expand Down
11 changes: 10 additions & 1 deletion core/dbt/task/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DepsFoundDuplicatePackage,
DepsInstallInfo,
DepsListSubdirectory,
DepsLockfileRegenerating,
DepsLockUpdating,
DepsNoPackagesFound,
DepsNotifyUpdatesAvailable,
Expand Down Expand Up @@ -50,7 +51,14 @@ def _create_sha1_hash(packages: List[PackageSpec]) -> str:
Returns:
str: SHA1 hash of the packages list
"""
package_strs = [json.dumps(package.to_dict(), sort_keys=True) for package in packages]
package_strs = []
for package in packages:
if hasattr(package, "to_dict_for_hash"):
package_dict = package.to_dict_for_hash()
else:
package_dict = package.to_dict()
package_strs.append(json.dumps(package_dict, sort_keys=True))

package_strs = sorted(package_strs)

return sha1("\n".join(package_strs).encode("utf-8")).hexdigest()
Expand Down Expand Up @@ -220,6 +228,7 @@ def run(self) -> None:
current_hash = _create_sha1_hash(self.project.packages.packages)
previous_hash = load_yml_dict(lock_file_path).get(PACKAGE_LOCK_HASH_KEY, None)
if previous_hash != current_hash:
fire_event(DepsLockfileRegenerating(lock_filepath=lock_file_path))
self.lock()

# Early return when 'dbt deps --lock'
Expand Down
290 changes: 290 additions & 0 deletions tests/unit/contracts/test_git_package_env_var_exclusion.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ttusing this file is empty -- was there intended to be content in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed - also fixed my commit history for the CLA bot I think, though it looks like I need to sign those commits. will address that along with any feedback!

Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
import json
from hashlib import sha1

from dbt.contracts.project import GitPackage
from dbt.task.deps import _create_sha1_hash
from tests.unit.utils import ContractTestCase


class TestGitPackageEnvVarExclusion(ContractTestCase):
ContractType = GitPackage

def test_git_package_with_exclude_env_vars_from_hash_true(self):
"""Test GitPackage with exclude-env-vars-from-hash set to True"""
dct = {
"git": "https://github.com/user/repo.git",
"revision": "v1.0.0",
"exclude-env-vars-from-hash": True,
}
package = self.ContractType(
git="https://github.com/user/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
)
self.assert_from_dict(package, dct)

def test_git_package_with_exclude_env_vars_from_hash_false(self):
"""Test GitPackage with exclude-env-vars-from-hash set to False"""
dct = {
"git": "https://github.com/user/repo.git",
"revision": "v1.0.0",
"exclude-env-vars-from-hash": False,
}
package = self.ContractType(
git="https://github.com/user/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=False,
)
self.assert_from_dict(package, dct)

def test_git_package_without_exclude_env_vars_from_hash(self):
"""Test GitPackage without exclude-env-vars-from-hash (default behavior)"""
dct = {
"git": "https://github.com/user/repo.git",
"revision": "v1.0.0",
}
package = self.ContractType(
git="https://github.com/user/repo.git",
revision="v1.0.0",
)
self.assert_from_dict(package, dct)

def test_to_dict_for_hash_excludes_env_vars_when_flag_is_true(self):
"""Test that to_dict_for_hash excludes env vars when exclude_env_vars_from_hash is True"""
# Create a package with unrendered git URL containing env vars
package = GitPackage(
git="https://github.com/${GITHUB_USER}/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

# Get the dict for hash calculation
hash_dict = package.to_dict_for_hash()

# Should use the unrendered git URL and exclude the flag itself
self.assertEqual(hash_dict["git"], "https://github.com/${GITHUB_USER}/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertNotIn("exclude-env-vars-from-hash", hash_dict)

def test_to_dict_for_hash_includes_env_vars_when_flag_is_false(self):
"""Test that to_dict_for_hash includes env vars when exclude_env_vars_from_hash is False"""
# Create a package with rendered git URL
package = GitPackage(
git="https://github.com/actualuser/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=False,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

# Get the dict for hash calculation
hash_dict = package.to_dict_for_hash()

# Should use the rendered git URL and include the flag
self.assertEqual(hash_dict["git"], "https://github.com/actualuser/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertEqual(hash_dict["exclude-env-vars-from-hash"], False)

def test_to_dict_for_hash_includes_env_vars_when_flag_is_none(self):
"""Test that to_dict_for_hash includes env vars when exclude_env_vars_from_hash is None (default)"""
# Create a package with rendered git URL
package = GitPackage(
git="https://github.com/actualuser/repo.git",
revision="v1.0.0",
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

# Get the dict for hash calculation (should use regular to_dict)
hash_dict = package.to_dict_for_hash()

# Should use the rendered git URL since flag is None (falsy)
self.assertEqual(hash_dict["git"], "https://github.com/actualuser/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertIsNone(hash_dict.get("exclude-env-vars-from-hash"))

def test_hash_calculation_with_exclude_env_vars_true(self):
"""Test that _create_sha1_hash uses to_dict_for_hash when available and excludes env vars"""
# Create two packages: one with env vars resolved, one without
package_with_env_vars = GitPackage(
git="https://github.com/actualuser/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

package_without_env_vars = GitPackage(
git="https://github.com/differentuser/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

# Both packages should produce the same hash since they use unrendered URLs
hash1 = _create_sha1_hash([package_with_env_vars])
hash2 = _create_sha1_hash([package_without_env_vars])

self.assertEqual(hash1, hash2, "Hashes should be identical when using unrendered URLs")

def test_hash_calculation_with_exclude_env_vars_false(self):
"""Test that _create_sha1_hash includes env vars when exclude_env_vars_from_hash is False"""
# Create two packages with different resolved git URLs
package1 = GitPackage(
git="https://github.com/user1/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=False,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

package2 = GitPackage(
git="https://github.com/user2/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=False,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

# These packages should produce different hashes since they use rendered URLs
hash1 = _create_sha1_hash([package1])
hash2 = _create_sha1_hash([package2])

self.assertNotEqual(hash1, hash2, "Hashes should be different when using rendered URLs")

def test_hash_calculation_backwards_compatibility(self):
"""Test that packages without to_dict_for_hash method still work (backwards compatibility)"""

# Create a simple mock package without to_dict_for_hash method
class SimplePackage:
def to_dict(self):
return {"git": "https://github.com/user/repo.git", "revision": "v1.0.0"}

simple_package = SimplePackage()

# This should not raise an error and should use to_dict instead
hash_result = _create_sha1_hash([simple_package])

# Verify the hash is calculated correctly
expected_dict = {"git": "https://github.com/user/repo.git", "revision": "v1.0.0"}
expected_str = json.dumps(expected_dict, sort_keys=True)
expected_hash = sha1(expected_str.encode("utf-8")).hexdigest()

self.assertEqual(hash_result, expected_hash)

def test_package_with_subdirectory_and_exclude_env_vars(self):
"""Test GitPackage with subdirectory and exclude-env-vars-from-hash"""
package = GitPackage(
git="https://github.com/actualuser/repo.git",
revision="v1.0.0",
subdirectory="subdir",
exclude_env_vars_from_hash=True,
unrendered={"git": "https://github.com/${GITHUB_USER}/repo.git"},
)

hash_dict = package.to_dict_for_hash()

# Should include subdirectory and use unrendered git URL
self.assertEqual(hash_dict["git"], "https://github.com/${GITHUB_USER}/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertEqual(hash_dict["subdirectory"], "subdir")
self.assertNotIn("exclude-env-vars-from-hash", hash_dict)

def test_package_without_unrendered_git_url(self):
"""Test GitPackage with exclude_env_vars_from_hash=True but no unrendered git URL"""
package = GitPackage(
git="https://github.com/user/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
# No unrendered dict or git key
)

hash_dict = package.to_dict_for_hash()

# Should use the regular git URL since no unrendered version is available
self.assertEqual(hash_dict["git"], "https://github.com/user/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertNotIn("exclude-env-vars-from-hash", hash_dict)

def test_yaml_alias_support(self):
"""Test that the YAML alias 'exclude-env-vars-from-hash' works correctly"""
# Test creating from dict using the alias
dct_with_alias = {
"git": "https://github.com/user/repo.git",
"revision": "v1.0.0",
"exclude-env-vars-from-hash": True,
}
package = GitPackage.from_dict(dct_with_alias)

# The field should be accessible via the Python attribute name
self.assertTrue(package.exclude_env_vars_from_hash)

# Converting back to dict should use the alias
result_dict = package.to_dict()
self.assertIn("exclude-env-vars-from-hash", result_dict)
self.assertTrue(result_dict["exclude-env-vars-from-hash"])

def test_hash_consistency_across_environments(self):
"""Test that packages with same unrendered URLs produce consistent hashes"""
# Simulate packages from different environments with different resolved URLs
package_env1 = GitPackage(
git="https://github.com/user1/repo.git", # Different resolved URLs
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={
"git": "https://github.com/${GITHUB_USER}/repo.git"
}, # Same unrendered URL
)

package_env2 = GitPackage(
git="https://github.com/user2/repo.git", # Different resolved URLs
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={
"git": "https://github.com/${GITHUB_USER}/repo.git"
}, # Same unrendered URL
)

# Both should produce the same hash
hash1 = _create_sha1_hash([package_env1])
hash2 = _create_sha1_hash([package_env2])

self.assertEqual(
hash1, hash2, "Packages with same unrendered URLs should have consistent hashes"
)

def test_mixed_package_types_in_hash(self):
"""Test hash calculation with mixed package types (some with to_dict_for_hash, some without)"""
# GitPackage with to_dict_for_hash method
git_package = GitPackage(
git="https://github.com/user/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={"git": "https://github.com/${USER}/repo.git"},
)

# Mock package without to_dict_for_hash method
class SimplePackage:
def to_dict(self):
return {"tarball": "https://example.com/package.tar.gz", "name": "simple_package"}

simple_package = SimplePackage()

# Should handle mixed package types without error
hash_result = _create_sha1_hash([git_package, simple_package])

# Verify it's a valid SHA1 hash (40 hex characters)
self.assertEqual(len(hash_result), 40)
self.assertTrue(all(c in "0123456789abcdef" for c in hash_result))

def test_empty_unrendered_dict(self):
"""Test GitPackage with exclude_env_vars_from_hash=True but empty unrendered dict"""
package = GitPackage(
git="https://github.com/user/repo.git",
revision="v1.0.0",
exclude_env_vars_from_hash=True,
unrendered={}, # Empty dict, no git key
)

hash_dict = package.to_dict_for_hash()

# Should use the regular git URL since unrendered["git"] doesn't exist
self.assertEqual(hash_dict["git"], "https://github.com/user/repo.git")
self.assertEqual(hash_dict["revision"], "v1.0.0")
self.assertNotIn("exclude-env-vars-from-hash", hash_dict)