-
Notifications
You must be signed in to change notification settings - Fork 1.2k
fix: sanitize git clone repo input url #5234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,11 +12,12 @@ | |
# language governing permissions and limitations under the License. | ||
from __future__ import absolute_import | ||
|
||
import pytest | ||
import os | ||
from pathlib import Path | ||
import subprocess | ||
from mock import patch, ANY | ||
from pathlib import Path | ||
|
||
import pytest | ||
from mock import ANY, patch | ||
|
||
from sagemaker import git_utils | ||
|
||
|
@@ -494,3 +495,212 @@ def test_git_clone_repo_codecommit_https_creds_not_stored_locally(tempdir, mkdte | |
with pytest.raises(subprocess.CalledProcessError) as error: | ||
git_utils.git_clone_repo(git_config, entry_point) | ||
assert "returned non-zero exit status" in str(error.value) | ||
|
||
|
||
class TestGitUrlSanitization: | ||
"""Test cases for Git URL sanitization to prevent injection attacks.""" | ||
|
||
def test_sanitize_git_url_valid_https_urls(self): | ||
"""Test that valid HTTPS URLs pass sanitization.""" | ||
valid_urls = [ | ||
"https://github.com/user/repo.git", | ||
"https://gitlab.com/user/repo.git", | ||
"https://[email protected]/user/repo.git", | ||
"https://user:[email protected]/user/repo.git", | ||
"http://internal-git.company.com/repo.git", | ||
] | ||
|
||
for url in valid_urls: | ||
# Should not raise any exception | ||
result = git_utils._sanitize_git_url(url) | ||
assert result == url | ||
|
||
def test_sanitize_git_url_valid_ssh_urls(self): | ||
"""Test that valid SSH URLs pass sanitization.""" | ||
valid_urls = [ | ||
"[email protected]:user/repo.git", | ||
"[email protected]:user/repo.git", | ||
"ssh://[email protected]/user/repo.git", | ||
"ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/", # 0 @ symbols - valid for ssh:// | ||
"[email protected]:repo.git", | ||
] | ||
|
||
for url in valid_urls: | ||
# Should not raise any exception | ||
result = git_utils._sanitize_git_url(url) | ||
assert result == url | ||
|
||
def test_sanitize_git_url_blocks_multiple_at_https(self): | ||
"""Test that HTTPS URLs with multiple @ symbols are blocked.""" | ||
malicious_urls = [ | ||
"https://[email protected]@github.com/repo.git", | ||
"https://[email protected]@gitlab.com/user/repo.git", | ||
"https://a@b@[email protected]/repo.git", | ||
"https://user@[email protected]/legit/repo.git", | ||
] | ||
|
||
for url in malicious_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
assert "multiple @ symbols detected" in str(error.value) | ||
|
||
def test_sanitize_git_url_blocks_multiple_at_ssh(self): | ||
"""Test that SSH URLs with multiple @ symbols are blocked.""" | ||
malicious_urls = [ | ||
"[email protected]@github.com:repo.git", | ||
"git@[email protected]:user/repo.git", | ||
"ssh://git@[email protected]/repo.git", | ||
"git@a@b@c:repo.git", | ||
] | ||
|
||
for url in malicious_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
# git@ URLs should give "exactly one @ symbol" error | ||
# ssh:// URLs should give "multiple @ symbols detected" error | ||
assert any( | ||
phrase in str(error.value) | ||
for phrase in ["multiple @ symbols detected", "exactly one @ symbol"] | ||
) | ||
|
||
def test_sanitize_git_url_blocks_invalid_schemes_and_git_at_format(self): | ||
"""Test that invalid schemes and git@ format violations are blocked.""" | ||
# Test unsupported schemes | ||
unsupported_scheme_urls = [ | ||
"git-github.com:user/repo.git", # Doesn't start with git@, ssh://, http://, https:// | ||
] | ||
|
||
for url in unsupported_scheme_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
assert "Unsupported URL scheme" in str(error.value) | ||
|
||
# Test git@ URLs with wrong @ count | ||
invalid_git_at_urls = [ | ||
"[email protected]@evil.com:repo.git", # 2 @ symbols | ||
] | ||
|
||
for url in invalid_git_at_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
assert "exactly one @ symbol" in str(error.value) | ||
|
||
def test_sanitize_git_url_blocks_url_encoding_obfuscation(self): | ||
"""Test that URL-encoded obfuscation attempts are blocked.""" | ||
obfuscated_urls = [ | ||
"https://github.com%25evil.com/repo.git", | ||
"https://[email protected]%40attacker.com/repo.git", | ||
"https://github.com%2Fevil.com/repo.git", | ||
"https://github.com%3Aevil.com/repo.git", | ||
] | ||
|
||
for url in obfuscated_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
# The error could be either suspicious encoding or invalid characters | ||
assert any( | ||
phrase in str(error.value) | ||
for phrase in ["Suspicious URL encoding detected", "Invalid characters in hostname"] | ||
) | ||
|
||
def test_sanitize_git_url_blocks_invalid_hostname_chars(self): | ||
"""Test that hostnames with invalid characters are blocked.""" | ||
invalid_urls = [ | ||
"https://github<script>.com/repo.git", | ||
"https://github>.com/repo.git", | ||
"https://github[].com/repo.git", | ||
"https://github{}.com/repo.git", | ||
] | ||
|
||
for url in invalid_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
# The error could be various types due to URL parsing edge cases | ||
assert any( | ||
phrase in str(error.value) | ||
for phrase in [ | ||
"Invalid characters in hostname", | ||
"Failed to parse URL", | ||
"does not appear to be an IPv4 or IPv6 address", | ||
] | ||
) | ||
|
||
def test_sanitize_git_url_blocks_unsupported_schemes(self): | ||
"""Test that unsupported URL schemes are blocked.""" | ||
unsupported_urls = [ | ||
"ftp://github.com/repo.git", | ||
"file:///local/repo.git", | ||
"javascript:alert('xss')", | ||
"data:text/html,<script>alert('xss')</script>", | ||
] | ||
|
||
for url in unsupported_urls: | ||
with pytest.raises(ValueError) as error: | ||
git_utils._sanitize_git_url(url) | ||
assert "Unsupported URL scheme" in str(error.value) | ||
|
||
def test_git_clone_repo_blocks_malicious_https_url(self): | ||
"""Test that git_clone_repo blocks malicious HTTPS URLs.""" | ||
malicious_git_config = { | ||
"repo": "https://[email protected]@github.com/legit/repo.git", | ||
"branch": "main", | ||
} | ||
entry_point = "train.py" | ||
|
||
with pytest.raises(ValueError) as error: | ||
git_utils.git_clone_repo(malicious_git_config, entry_point) | ||
assert "multiple @ symbols detected" in str(error.value) | ||
|
||
def test_git_clone_repo_blocks_malicious_ssh_url(self): | ||
"""Test that git_clone_repo blocks malicious SSH URLs.""" | ||
malicious_git_config = { | ||
"repo": "git@[email protected]:sage-maker/temp-sev2.git", | ||
"branch": "main", | ||
} | ||
entry_point = "train.py" | ||
|
||
with pytest.raises(ValueError) as error: | ||
git_utils.git_clone_repo(malicious_git_config, entry_point) | ||
assert "exactly one @ symbol" in str(error.value) | ||
|
||
def test_git_clone_repo_blocks_url_encoded_attack(self): | ||
"""Test that git_clone_repo blocks URL-encoded attacks.""" | ||
malicious_git_config = { | ||
"repo": "https://github.com%40attacker.com/repo.git", | ||
"branch": "main", | ||
} | ||
entry_point = "train.py" | ||
|
||
with pytest.raises(ValueError) as error: | ||
git_utils.git_clone_repo(malicious_git_config, entry_point) | ||
assert "Suspicious URL encoding detected" in str(error.value) | ||
|
||
def test_sanitize_git_url_comprehensive_attack_scenarios(self): | ||
attack_scenarios = [ | ||
# Original PoC attack | ||
"https://USER@YOUR_NGROK_OR_LOCALHOST/[email protected]%25legit%25repo.git", | ||
# Variations of the attack | ||
"https://user@[email protected]/legit/repo.git", | ||
"[email protected]@github.com:user/repo.git", | ||
"ssh://[email protected]@github.com/repo.git", | ||
# URL encoding variations | ||
"https://github.com%40evil.com/repo.git", | ||
"https://[email protected]%2Fevil.com/repo.git", | ||
] | ||
|
||
entry_point = "train.py" | ||
|
||
for malicious_url in attack_scenarios: | ||
git_config = {"repo": malicious_url} | ||
with pytest.raises(ValueError) as error: | ||
git_utils.git_clone_repo(git_config, entry_point) | ||
# Should be blocked by sanitization | ||
assert any( | ||
phrase in str(error.value) | ||
for phrase in [ | ||
"multiple @ symbols detected", | ||
"exactly one @ symbol", | ||
"Suspicious URL encoding detected", | ||
"Invalid characters in hostname", | ||
] | ||
) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any sanitization happening here or is this just validation ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It shouldn't fix anything on the user's behalf so I think validation is a better word