Skip to content

Commit e50d98b

Browse files
SNOW-2337918 Fix for duplicated dependencies in Snowpark (#2609)
* SNOW-2337918 Fix for duplicated dependencies in Snowpark * update RELEASE-NOTES.md * fit this in 3.12 * bring empty line back * one more empty line... we should have linter for Release Notes... * removed redundant tests and obvious comments; drop local imports * make real integration test
1 parent afc7a49 commit e50d98b

File tree

8 files changed

+345
-9
lines changed

8 files changed

+345
-9
lines changed

RELEASE-NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
* Fixed issues when pasting content with trailing new lines.
4141
* Improved output handling with streaming
4242
* Bumped `snowflake-connector-python` to 3.17.3
43+
* Fixed `snow snowpark deploy` failing on duplicated packages
4344
* Extend `Decimal` precision to 38
4445

4546

src/snowflake/cli/_plugins/snowpark/package/anaconda_packages.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,37 @@ def write_requirements_file_in_snowflake_format(
153153
):
154154
"""Saves requirements to a file in format accepted by Snowflake SQL commands."""
155155
log.info("Writing requirements into file %s", file_path.path)
156-
formatted_requirements = []
156+
157+
# Deduplicate requirements by package name, keeping the first occurrence
158+
seen_packages = set()
159+
deduplicated_requirements = []
160+
duplicate_packages = set()
161+
157162
for requirement in requirements:
158163
if requirement.name and requirement.name in self._packages:
159-
snowflake_name = self._packages[requirement.name].snowflake_name
160-
formatted_requirements.append(
161-
snowflake_name + requirement.formatted_specs
162-
)
164+
if requirement.name in seen_packages:
165+
duplicate_packages.add(requirement.name)
166+
log.warning(
167+
"Duplicate package '%s' found in Anaconda requirements. "
168+
"Ignoring: %s",
169+
requirement.name,
170+
requirement.name_and_version,
171+
)
172+
else:
173+
seen_packages.add(requirement.name)
174+
deduplicated_requirements.append(requirement)
175+
176+
if duplicate_packages:
177+
log.warning(
178+
"Found duplicate Anaconda packages: %s. "
179+
"Consider consolidating package versions in requirements.txt.",
180+
", ".join(sorted(duplicate_packages)),
181+
)
182+
183+
formatted_requirements = []
184+
for requirement in deduplicated_requirements:
185+
snowflake_name = self._packages[requirement.name].snowflake_name
186+
formatted_requirements.append(snowflake_name + requirement.formatted_specs)
163187

164188
if formatted_requirements:
165189
file_path.write_text("\n".join(formatted_requirements))

src/snowflake/cli/_plugins/snowpark/package_utils.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,14 +255,55 @@ def split_downloaded_dependencies(
255255
anaconda_packages: AnacondaPackages,
256256
skip_version_check: bool,
257257
) -> SplitDownloadedDependenciesResult:
258-
packages_metadata: Dict[str, WheelMetadata] = {
259-
meta.name: meta
258+
# Build metadata for all downloaded wheels
259+
all_wheels_metadata = [
260+
meta
260261
for meta in (
261262
WheelMetadata.from_wheel(wheel_path)
262263
for wheel_path in downloads_dir.glob("*.whl")
263264
)
264265
if meta is not None
265-
}
266+
]
267+
268+
# Detect and handle duplicate packages
269+
packages_metadata: Dict[str, WheelMetadata] = {}
270+
duplicate_packages = set()
271+
272+
for meta in all_wheels_metadata:
273+
if meta.name in packages_metadata:
274+
duplicate_packages.add(meta.name)
275+
log.warning(
276+
"Multiple versions of package '%s' found in dependencies. "
277+
"Using: %s, Ignoring: %s",
278+
meta.name,
279+
packages_metadata[meta.name].wheel_path.name,
280+
meta.wheel_path.name,
281+
)
282+
else:
283+
packages_metadata[meta.name] = meta
284+
285+
if duplicate_packages:
286+
log.warning(
287+
"Found duplicate packages: %s. This may cause deployment issues. "
288+
"Consider pinning package versions in requirements.txt to avoid conflicts.",
289+
", ".join(sorted(duplicate_packages)),
290+
)
291+
292+
# Remove duplicate wheel files to prevent them from being extracted
293+
for meta in all_wheels_metadata:
294+
if (
295+
meta.name in duplicate_packages
296+
and meta not in packages_metadata.values()
297+
):
298+
try:
299+
meta.wheel_path.unlink()
300+
log.debug("Removed duplicate wheel file: %s", meta.wheel_path.name)
301+
except Exception as e:
302+
log.warning(
303+
"Failed to remove duplicate wheel file %s: %s",
304+
meta.wheel_path.name,
305+
e,
306+
)
266307
available_in_snowflake_dependencies: Dict = {}
267308
unavailable_dependencies: Dict = {}
268309

tests/snowpark/test_build.py

Lines changed: 230 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,23 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
from typing import Set
15-
from unittest.mock import patch
15+
from unittest.mock import MagicMock, patch
1616
from zipfile import ZipFile
1717

1818
import pytest
19+
from snowflake.cli._plugins.snowpark.models import (
20+
Requirement,
21+
WheelMetadata,
22+
)
23+
from snowflake.cli._plugins.snowpark.package.anaconda_packages import (
24+
AnacondaPackages,
25+
AvailablePackage,
26+
)
1927
from snowflake.cli._plugins.snowpark.package_utils import (
2028
DownloadUnavailablePackagesResult,
29+
split_downloaded_dependencies,
2130
)
31+
from snowflake.cli.api.secure_path import SecurePath
2232

2333

2434
@patch("snowflake.cli._plugins.snowpark.package_utils.download_unavailable_packages")
@@ -66,3 +76,222 @@ def test_build_with_glob_patterns_in_artifacts(
6676
def _assert_zip_contains(app_zip: str, expected_files: Set[str]):
6777
zip_file = ZipFile(app_zip)
6878
assert set(zip_file.namelist()) == expected_files
79+
80+
81+
@patch("snowflake.cli._plugins.snowpark.package_utils.log")
82+
def test_split_downloaded_dependencies_handles_duplicates(mock_log, tmp_path):
83+
"""Test that split_downloaded_dependencies properly handles duplicate package versions.
84+
85+
This test prevents regression of the bug where multiple versions of the same package
86+
(e.g., httpx-0.27.0.whl and httpx-0.28.1.whl) would both be included in dependencies.zip,
87+
causing Snowflake deployment to fail with 'Package specified with multiple versions'.
88+
"""
89+
downloads_dir = tmp_path / "downloads"
90+
downloads_dir.mkdir()
91+
92+
httpx_v1_wheel = downloads_dir / "httpx-0.27.0-py3-none-any.whl"
93+
httpx_v2_wheel = downloads_dir / "httpx-0.28.1-py3-none-any.whl"
94+
httpx_v1_wheel.touch()
95+
httpx_v2_wheel.touch()
96+
97+
requirements_file = tmp_path / "requirements.txt"
98+
requirements_file.write_text("httpx\n")
99+
100+
original_from_wheel = WheelMetadata.from_wheel
101+
102+
def mock_from_wheel(wheel_path):
103+
if "httpx-0.27.0" in str(wheel_path):
104+
return WheelMetadata(name="httpx", wheel_path=wheel_path, dependencies=[])
105+
elif "httpx-0.28.1" in str(wheel_path):
106+
return WheelMetadata(name="httpx", wheel_path=wheel_path, dependencies=[])
107+
return original_from_wheel(wheel_path)
108+
109+
with patch.object(WheelMetadata, "from_wheel", side_effect=mock_from_wheel):
110+
mock_anaconda = MagicMock(spec=AnacondaPackages)
111+
mock_anaconda.is_package_available.return_value = False
112+
113+
result = split_downloaded_dependencies(
114+
requirements_file=SecurePath(requirements_file),
115+
downloads_dir=downloads_dir,
116+
anaconda_packages=mock_anaconda,
117+
skip_version_check=False,
118+
)
119+
120+
# Verify that 2 warnings were logged about duplicate packages
121+
assert mock_log.warning.call_count >= 2
122+
123+
# Check the first warning call (multiple versions found)
124+
first_call = mock_log.warning.call_args_list[0]
125+
assert "Multiple versions of package '%s' found" in first_call.args[0]
126+
assert first_call.args[1] == "httpx" # package name
127+
assert "httpx-" in first_call.args[2] # using wheel filename
128+
assert "httpx-" in first_call.args[3] # ignoring wheel filename
129+
130+
# Check the second warning call (duplicate packages summary)
131+
second_call = mock_log.warning.call_args_list[1]
132+
assert "Found duplicate packages: %s" in second_call.args[0]
133+
assert second_call.args[1] == "httpx"
134+
135+
# Verify that only one version of httpx is in the result
136+
httpx_packages = [
137+
pkg
138+
for pkg in result.unavailable_dependencies_wheels
139+
if pkg.requirement.name == "httpx"
140+
]
141+
assert (
142+
len(httpx_packages) == 1
143+
), f"Expected 1 httpx package, got {len(httpx_packages)}"
144+
145+
# Verify that one of the duplicate wheel files was removed
146+
remaining_wheels = list(downloads_dir.glob("httpx-*.whl"))
147+
assert (
148+
len(remaining_wheels) == 1
149+
), f"Expected 1 remaining wheel file, got {len(remaining_wheels)}"
150+
151+
152+
@patch("snowflake.cli._plugins.snowpark.package.anaconda_packages.log")
153+
def test_write_requirements_file_deduplicates_anaconda_packages(mock_log, tmp_path):
154+
"""Test that write_requirements_file_in_snowflake_format deduplicates packages.
155+
156+
This test prevents regression of the bug where multiple entries for the same package
157+
(e.g., 'httpx==0.28.1' and 'httpx>=0.20.0') would both be written to requirements.snowflake.txt,
158+
causing Snowflake deployment issues.
159+
"""
160+
packages = {
161+
"httpx": AvailablePackage(snowflake_name="httpx", versions={"0.28.1", "0.27.0"})
162+
}
163+
164+
anaconda_packages = AnacondaPackages(packages)
165+
166+
requirements = [
167+
Requirement.parse_line("httpx==0.28.1"),
168+
Requirement.parse_line("httpx>=0.20.0"),
169+
]
170+
171+
output_file = tmp_path / "requirements.snowflake.txt"
172+
173+
anaconda_packages.write_requirements_file_in_snowflake_format(
174+
file_path=SecurePath(output_file), requirements=requirements
175+
)
176+
177+
# Verify 2 warnings were logged
178+
assert mock_log.warning.call_count >= 2
179+
180+
# Check the first warning call (duplicate package found)
181+
first_call = mock_log.warning.call_args_list[0]
182+
assert "Duplicate package '%s' found in Anaconda requirements" in first_call.args[0]
183+
assert first_call.args[1] == "httpx" # package name
184+
assert first_call.args[2] == "httpx>=0.20.0" # ignored requirement
185+
186+
# Check the second warning call (duplicate packages summary)
187+
second_call = mock_log.warning.call_args_list[1]
188+
assert "Found duplicate Anaconda packages: %s" in second_call.args[0]
189+
assert second_call.args[1] == "httpx"
190+
191+
# Verify only one entry was written to the file
192+
content = output_file.read_text().strip()
193+
lines = [line.strip() for line in content.split("\n") if line.strip()]
194+
195+
# Should only have one httpx entry
196+
httpx_lines = [line for line in lines if "httpx" in line]
197+
assert (
198+
len(httpx_lines) == 1
199+
), f"Expected 1 httpx line, got {len(httpx_lines)}: {httpx_lines}"
200+
assert httpx_lines[0] == "httpx==0.28.1" # Should keep the first one
201+
202+
203+
def test_similar_package_names_not_treated_as_duplicates():
204+
"""Test that packages with similar names are treated as separate packages.
205+
206+
This test ensures that packages like 'httpx' and 'httpx-retries' are correctly
207+
treated as different packages and don't trigger duplicate detection.
208+
"""
209+
req1 = Requirement.parse_line("httpx==0.28.1")
210+
req2 = Requirement.parse_line("httpx-retries==0.4.2")
211+
212+
assert req1.name == "httpx"
213+
assert req2.name == "httpx_retries" # Note: hyphen becomes underscore
214+
assert req1.name != req2.name
215+
216+
wheel1 = "httpx-0.28.1-py3-none-any.whl"
217+
wheel2 = "httpx_retries-0.4.2-py3-none-any.whl"
218+
219+
name1 = WheelMetadata._get_name_from_wheel_filename(wheel1) # noqa: SLF001
220+
name2 = WheelMetadata._get_name_from_wheel_filename(wheel2) # noqa: SLF001
221+
222+
assert name1 == "httpx"
223+
assert name2 == "httpx_retries"
224+
assert name1 != name2
225+
226+
227+
@patch("snowflake.cli._plugins.snowpark.package_utils.log")
228+
def test_multiple_different_packages_no_duplicates_detected(mock_log, tmp_path):
229+
"""Test that multiple different packages don't trigger duplicate detection.
230+
231+
This is a regression test to ensure that legitimate different packages
232+
(like httpx, httpx-retries, requests, etc.) don't get flagged as duplicates.
233+
"""
234+
downloads_dir = tmp_path / "downloads"
235+
downloads_dir.mkdir()
236+
237+
wheels = [
238+
"httpx-0.28.1-py3-none-any.whl",
239+
"httpx_retries-0.4.2-py3-none-any.whl",
240+
"requests-2.31.0-py3-none-any.whl",
241+
]
242+
243+
for wheel in wheels:
244+
(downloads_dir / wheel).touch()
245+
246+
requirements_file = tmp_path / "requirements.txt"
247+
requirements_file.write_text("httpx\nhttpx-retries\nrequests\n")
248+
249+
def mock_from_wheel(wheel_path):
250+
wheel_name = wheel_path.name
251+
if "httpx-0.28.1" in wheel_name:
252+
return WheelMetadata(name="httpx", wheel_path=wheel_path, dependencies=[])
253+
elif "httpx_retries-0.4.2" in wheel_name:
254+
return WheelMetadata(
255+
name="httpx_retries", wheel_path=wheel_path, dependencies=[]
256+
)
257+
elif "requests-2.31.0" in wheel_name:
258+
return WheelMetadata(
259+
name="requests", wheel_path=wheel_path, dependencies=[]
260+
)
261+
return None
262+
263+
with patch.object(WheelMetadata, "from_wheel", side_effect=mock_from_wheel):
264+
mock_anaconda = MagicMock(spec=AnacondaPackages)
265+
mock_anaconda.is_package_available.return_value = False
266+
267+
result = split_downloaded_dependencies(
268+
requirements_file=SecurePath(requirements_file),
269+
downloads_dir=downloads_dir,
270+
anaconda_packages=mock_anaconda,
271+
skip_version_check=False,
272+
)
273+
274+
# Verify NO duplicate warnings were logged
275+
warning_calls = [str(call) for call in mock_log.warning.call_args_list]
276+
duplicate_warnings = [
277+
call
278+
for call in warning_calls
279+
if "Multiple versions of package" in call
280+
or "Found duplicate packages" in call
281+
]
282+
assert (
283+
len(duplicate_warnings) == 0
284+
), f"Unexpected duplicate warnings: {duplicate_warnings}"
285+
286+
# Verify three packages are in the result
287+
package_names = {
288+
pkg.requirement.name for pkg in result.unavailable_dependencies_wheels
289+
}
290+
assert "httpx" in package_names
291+
assert "httpx_retries" in package_names
292+
assert "requests" in package_names
293+
assert len(package_names) == 3
294+
295+
# Verify all wheel files are still present
296+
remaining_wheels = list(downloads_dir.glob("*.whl"))
297+
assert len(remaining_wheels) == 3
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def main():
2+
return "hello"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
httpx>=0.20.0
2+
httpx-retries==0.4.2
3+
aiohttp>=3.8.0
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
definition_version: 1
2+
snowpark:
3+
project_name: "test_snowpark_project"
4+
stage_name: "test_stage"
5+
src: "app.py"
6+
functions:
7+
- name: func1
8+
handler: "app.main"
9+
signature: ""
10+
returns: string
11+
runtime: 3.10

0 commit comments

Comments
 (0)