@@ -501,6 +501,7 @@ def test_git_clone_repo_codecommit_https_creds_not_stored_locally(tempdir, mkdte
501
501
# URL Sanitization Tests - Security vulnerability prevention
502
502
# ============================================================================
503
503
504
+
504
505
class TestGitUrlSanitization :
505
506
"""Test cases for Git URL sanitization to prevent injection attacks."""
506
507
@@ -513,7 +514,7 @@ def test_sanitize_git_url_valid_https_urls(self):
513
514
"https://user:[email protected] /user/repo.git" ,
514
515
"http://internal-git.company.com/repo.git" ,
515
516
]
516
-
517
+
517
518
for url in valid_urls :
518
519
# Should not raise any exception
519
520
result = git_utils ._sanitize_git_url (url )
@@ -528,7 +529,7 @@ def test_sanitize_git_url_valid_ssh_urls(self):
528
529
"ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" , # 0 @ symbols - valid for ssh://
529
530
530
531
]
531
-
532
+
532
533
for url in valid_urls :
533
534
# Should not raise any exception
534
535
result = git_utils ._sanitize_git_url (url )
@@ -542,7 +543,7 @@ def test_sanitize_git_url_blocks_multiple_at_https(self):
542
543
"https://a@b@[email protected] /repo.git" ,
543
544
"https://user@[email protected] /legit/repo.git" ,
544
545
]
545
-
546
+
546
547
for url in malicious_urls :
547
548
with pytest .raises (ValueError ) as error :
548
549
git_utils ._sanitize_git_url (url )
@@ -556,34 +557,34 @@ def test_sanitize_git_url_blocks_multiple_at_ssh(self):
556
557
"ssh://git@[email protected] /repo.git" ,
557
558
"git@a@b@c:repo.git" ,
558
559
]
559
-
560
+
560
561
for url in malicious_urls :
561
562
with pytest .raises (ValueError ) as error :
562
563
git_utils ._sanitize_git_url (url )
563
564
# git@ URLs should give "exactly one @ symbol" error
564
565
# ssh:// URLs should give "multiple @ symbols detected" error
565
- assert any (phrase in str ( error . value ) for phrase in [
566
- "multiple @ symbols detected" ,
567
- " exactly one @ symbol"
568
- ] )
566
+ assert any (
567
+ phrase in str ( error . value )
568
+ for phrase in [ "multiple @ symbols detected" , " exactly one @ symbol"]
569
+ )
569
570
570
571
def test_sanitize_git_url_blocks_invalid_schemes_and_git_at_format (self ):
571
572
"""Test that invalid schemes and git@ format violations are blocked."""
572
573
# Test unsupported schemes
573
574
unsupported_scheme_urls = [
574
575
"git-github.com:user/repo.git" , # Doesn't start with git@, ssh://, http://, https://
575
576
]
576
-
577
+
577
578
for url in unsupported_scheme_urls :
578
579
with pytest .raises (ValueError ) as error :
579
580
git_utils ._sanitize_git_url (url )
580
581
assert "Unsupported URL scheme" in str (error .value )
581
-
582
+
582
583
# Test git@ URLs with wrong @ count
583
584
invalid_git_at_urls = [
584
585
"[email protected] @evil.com:repo.git" ,
# 2 @ symbols
585
586
]
586
-
587
+
587
588
for url in invalid_git_at_urls :
588
589
with pytest .raises (ValueError ) as error :
589
590
git_utils ._sanitize_git_url (url )
@@ -597,15 +598,15 @@ def test_sanitize_git_url_blocks_url_encoding_obfuscation(self):
597
598
"https://github.com%2Fevil.com/repo.git" ,
598
599
"https://github.com%3Aevil.com/repo.git" ,
599
600
]
600
-
601
+
601
602
for url in obfuscated_urls :
602
603
with pytest .raises (ValueError ) as error :
603
604
git_utils ._sanitize_git_url (url )
604
605
# The error could be either suspicious encoding or invalid characters
605
- assert any (phrase in str ( error . value ) for phrase in [
606
- "Suspicious URL encoding detected" ,
607
- " Invalid characters in hostname"
608
- ] )
606
+ assert any (
607
+ phrase in str ( error . value )
608
+ for phrase in [ "Suspicious URL encoding detected" , " Invalid characters in hostname"]
609
+ )
609
610
610
611
def test_sanitize_git_url_blocks_invalid_hostname_chars (self ):
611
612
"""Test that hostnames with invalid characters are blocked."""
@@ -615,16 +616,19 @@ def test_sanitize_git_url_blocks_invalid_hostname_chars(self):
615
616
"https://github[].com/repo.git" ,
616
617
"https://github{}.com/repo.git" ,
617
618
]
618
-
619
+
619
620
for url in invalid_urls :
620
621
with pytest .raises (ValueError ) as error :
621
622
git_utils ._sanitize_git_url (url )
622
623
# The error could be various types due to URL parsing edge cases
623
- assert any (phrase in str (error .value ) for phrase in [
624
- "Invalid characters in hostname" ,
625
- "Failed to parse URL" ,
626
- "does not appear to be an IPv4 or IPv6 address"
627
- ])
624
+ assert any (
625
+ phrase in str (error .value )
626
+ for phrase in [
627
+ "Invalid characters in hostname" ,
628
+ "Failed to parse URL" ,
629
+ "does not appear to be an IPv4 or IPv6 address" ,
630
+ ]
631
+ )
628
632
629
633
def test_sanitize_git_url_blocks_unsupported_schemes (self ):
630
634
"""Test that unsupported URL schemes are blocked."""
@@ -634,7 +638,7 @@ def test_sanitize_git_url_blocks_unsupported_schemes(self):
634
638
"javascript:alert('xss')" ,
635
639
"data:text/html,<script>alert('xss')</script>" ,
636
640
]
637
-
641
+
638
642
for url in unsupported_urls :
639
643
with pytest .raises (ValueError ) as error :
640
644
git_utils ._sanitize_git_url (url )
@@ -644,10 +648,10 @@ def test_git_clone_repo_blocks_malicious_https_url(self):
644
648
"""Test that git_clone_repo blocks malicious HTTPS URLs."""
645
649
malicious_git_config = {
646
650
"repo" :
"https://[email protected] @github.com/legit/repo.git" ,
647
- "branch" : "main"
651
+ "branch" : "main" ,
648
652
}
649
653
entry_point = "train.py"
650
-
654
+
651
655
with pytest .raises (ValueError ) as error :
652
656
git_utils .git_clone_repo (malicious_git_config , entry_point )
653
657
assert "multiple @ symbols detected" in str (error .value )
@@ -656,10 +660,10 @@ def test_git_clone_repo_blocks_malicious_ssh_url(self):
656
660
"""Test that git_clone_repo blocks malicious SSH URLs."""
657
661
malicious_git_config = {
658
662
"repo" :
"git@[email protected] :sage-maker/temp-sev2.git" ,
659
- "branch" : "main"
663
+ "branch" : "main" ,
660
664
}
661
665
entry_point = "train.py"
662
-
666
+
663
667
with pytest .raises (ValueError ) as error :
664
668
git_utils .git_clone_repo (malicious_git_config , entry_point )
665
669
assert "exactly one @ symbol" in str (error .value )
@@ -668,10 +672,10 @@ def test_git_clone_repo_blocks_url_encoded_attack(self):
668
672
"""Test that git_clone_repo blocks URL-encoded attacks."""
669
673
malicious_git_config = {
670
674
"repo" : "https://github.com%40attacker.com/repo.git" ,
671
- "branch" : "main"
675
+ "branch" : "main" ,
672
676
}
673
677
entry_point = "train.py"
674
-
678
+
675
679
with pytest .raises (ValueError ) as error :
676
680
git_utils .git_clone_repo (malicious_git_config , entry_point )
677
681
assert "Suspicious URL encoding detected" in str (error .value )
@@ -690,17 +694,20 @@ def test_sanitize_git_url_comprehensive_attack_scenarios(self):
690
694
"https://github.com%40evil.com/repo.git" ,
691
695
"https://[email protected] %2Fevil.com/repo.git" ,
692
696
]
693
-
697
+
694
698
entry_point = "train.py"
695
-
699
+
696
700
for malicious_url in attack_scenarios :
697
701
git_config = {"repo" : malicious_url }
698
702
with pytest .raises (ValueError ) as error :
699
703
git_utils .git_clone_repo (git_config , entry_point )
700
704
# Should be blocked by sanitization
701
- assert any (phrase in str (error .value ) for phrase in [
702
- "multiple @ symbols detected" ,
703
- "exactly one @ symbol" ,
704
- "Suspicious URL encoding detected" ,
705
- "Invalid characters in hostname"
706
- ])
705
+ assert any (
706
+ phrase in str (error .value )
707
+ for phrase in [
708
+ "multiple @ symbols detected" ,
709
+ "exactly one @ symbol" ,
710
+ "Suspicious URL encoding detected" ,
711
+ "Invalid characters in hostname" ,
712
+ ]
713
+ )
0 commit comments