428428 ("source-zonka-feedback" , "5.17.0" ),
429429]
430430
431+ RECHECK_EXCLUSION_LIST = False
432+
433+ USE_GIT_SPARSE_CHECKOUT = False
434+
431435CONNECTOR_REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json"
432436MANIFEST_URL_TEMPLATE = (
433437 "https://connectors.airbyte.com/files/metadata/airbyte/{connector_name}/latest/manifest.yaml"
@@ -462,8 +466,18 @@ def schema_validator() -> ValidateAdheresToSchema:
462466@pytest .fixture (scope = "session" )
463467def manifest_connector_names () -> List [str ]:
464468 """Cached list of manifest-only connector names to avoid repeated registry calls."""
465- connectors = get_manifest_only_connectors ()
466- return [connector_name for connector_name , _ in connectors ]
469+ if USE_GIT_SPARSE_CHECKOUT :
470+ # Use git sparse-checkout to get all available manifest connectors
471+ try :
472+ manifests = download_manifests_via_git ()
473+ return list (manifests .keys ())
474+ except Exception as e :
475+ logger .warning (f"Git sparse-checkout failed, falling back to registry: { e } " )
476+ connectors = get_manifest_only_connectors ()
477+ return [connector_name for connector_name , _ in connectors ]
478+ else :
479+ connectors = get_manifest_only_connectors ()
480+ return [connector_name for connector_name , _ in connectors ]
467481
468482
469483def load_declarative_component_schema () -> Dict [str , Any ]:
@@ -504,6 +518,10 @@ def get_manifest_only_connectors() -> List[Tuple[str, str]]:
504518 pytest .fail (f"Failed to fetch connector registry: { e } " )
505519
506520
521+ # Global cache for git-downloaded manifests
522+ _git_manifest_cache : Dict [str , Tuple [str , str ]] = {}
523+
524+
507525def download_manifest (
508526 connector_name : str , download_failures : List [Tuple [str , str ]]
509527) -> Tuple [str , str ]:
@@ -514,6 +532,19 @@ def download_manifest(
514532 Tuple of (manifest_content, cdk_version) where cdk_version is extracted
515533 from the manifest's version field.
516534 """
535+ global _git_manifest_cache
536+
537+ if USE_GIT_SPARSE_CHECKOUT and not _git_manifest_cache :
538+ try :
539+ logger .info ("Initializing git sparse-checkout cache..." )
540+ _git_manifest_cache = download_manifests_via_git ()
541+ logger .info (f"Cached { len (_git_manifest_cache )} manifests from git" )
542+ except Exception as e :
543+ logger .warning (f"Git sparse-checkout failed, using HTTP fallback: { e } " )
544+
545+ if connector_name in _git_manifest_cache :
546+ return _git_manifest_cache [connector_name ]
547+
517548 url = MANIFEST_URL_TEMPLATE .format (connector_name = connector_name )
518549 try :
519550 response = requests .get (url , timeout = 30 )
@@ -542,20 +573,24 @@ def download_manifests_via_git() -> Dict[str, Tuple[str, str]]:
542573 repo_path = Path (temp_dir ) / "airbyte"
543574
544575 try :
576+ logger .info ("Cloning airbyte repo with sparse-checkout..." )
545577 subprocess .run (
546578 [
547579 "git" ,
548580 "clone" ,
549581 "--filter=blob:none" ,
550582 "--sparse" ,
583+ "--depth=1" ,
551584 "https://github.com/airbytehq/airbyte.git" ,
552585 str (repo_path ),
553586 ],
554587 check = True ,
555588 capture_output = True ,
556589 text = True ,
590+ timeout = 120 ,
557591 )
558592
593+ logger .info ("Setting sparse-checkout pattern..." )
559594 subprocess .run (
560595 [
561596 "git" ,
@@ -568,12 +603,19 @@ def download_manifests_via_git() -> Dict[str, Tuple[str, str]]:
568603 check = True ,
569604 capture_output = True ,
570605 text = True ,
606+ timeout = 30 ,
571607 )
572608
573- manifest_files = repo_path .glob ("airbyte-integrations/connectors/*/manifest.yaml" )
609+ logger .info ("Processing manifest files..." )
610+ manifest_files = list (repo_path .glob ("airbyte-integrations/connectors/*/manifest.yaml" ))
611+ logger .info (f"Found { len (manifest_files )} manifest files" )
574612
575- for manifest_path in manifest_files :
613+ for i , manifest_path in enumerate ( manifest_files ) :
576614 connector_name = manifest_path .parent .name
615+ if i % 50 == 0 :
616+ logger .info (
617+ f"Processing manifest { i + 1 } /{ len (manifest_files )} : { connector_name } "
618+ )
577619 try :
578620 with open (manifest_path , "r" ) as f :
579621 manifest_content = f .read ()
@@ -584,10 +626,19 @@ def download_manifests_via_git() -> Dict[str, Tuple[str, str]]:
584626 except Exception as e :
585627 logger .warning (f"Failed to process manifest for { connector_name } : { e } " )
586628
629+ except subprocess .TimeoutExpired :
630+ logger .error ("Git sparse-checkout timed out. Falling back to HTTP downloads." )
631+ return {}
587632 except subprocess .CalledProcessError as e :
588633 logger .warning (f"Git sparse-checkout failed: { e } . Falling back to HTTP downloads." )
589634 return {}
635+ except Exception as e :
636+ logger .error (
637+ f"Unexpected error in git sparse-checkout: { e } . Falling back to HTTP downloads."
638+ )
639+ return {}
590640
641+ logger .info (f"Successfully cached { len (manifests )} manifests from git" )
591642 return manifests
592643
593644
@@ -622,11 +673,17 @@ def test_manifest_validates_against_schema(
622673 except Exception as e :
623674 pytest .fail (f"Failed to download manifest for { connector_name } : { e } " )
624675
625- if (connector_name , cdk_version ) in EXCLUDED_CONNECTORS :
626- pytest .skip (
627- f"Skipping { connector_name } - connector declares it is compatible with "
628- f"CDK version { cdk_version } but is known to fail validation"
629- )
676+ is_excluded = (connector_name , cdk_version ) in EXCLUDED_CONNECTORS
677+
678+ if RECHECK_EXCLUSION_LIST :
679+ expected_to_fail = is_excluded
680+ else :
681+ # Normal mode: skip excluded connectors
682+ if is_excluded :
683+ pytest .skip (
684+ f"Skipping { connector_name } - connector declares it is compatible with "
685+ f"CDK version { cdk_version } but is known to fail validation"
686+ )
630687
631688 try :
632689 manifest_dict = yaml .safe_load (manifest_content )
@@ -639,14 +696,28 @@ def test_manifest_validates_against_schema(
639696 schema_validator .validate (manifest_dict )
640697 validation_successes .append ((connector_name , cdk_version ))
641698 logger .info (f"✓ { connector_name } (CDK { cdk_version } ) - validation passed" )
699+
700+ if RECHECK_EXCLUSION_LIST and expected_to_fail :
701+ pytest .fail (
702+ f"EXCLUSION LIST ERROR: { connector_name } (CDK { cdk_version } ) was expected to fail "
703+ f"but passed validation. Remove from EXCLUDED_CONNECTORS."
704+ )
705+
642706 except ValueError as e :
643707 error_msg = (
644708 f"Manifest validation failed for { connector_name } "
645709 f"(connector declares it is compatible with CDK version { cdk_version } ): { e } "
646710 )
647711 validation_failures .append ((connector_name , cdk_version , str (e )))
648712 logger .error (f"✗ { connector_name } (CDK { cdk_version } ) - validation failed: { e } " )
649- pytest .fail (error_msg )
713+
714+ if RECHECK_EXCLUSION_LIST and not expected_to_fail :
715+ pytest .fail (
716+ f"EXCLUSION LIST ERROR: { connector_name } (CDK { cdk_version } ) was expected to pass "
717+ f"but failed validation. Add to EXCLUDED_CONNECTORS: { error_msg } "
718+ )
719+ elif not RECHECK_EXCLUSION_LIST :
720+ pytest .fail (error_msg )
650721
651722
652723def test_schema_loads_successfully () -> None :
0 commit comments