@@ -53,13 +53,13 @@ async def index_github_source_repos(
5353 stripped_domain = strip_domain (domain )
5454 job_id = str (uuid .uuid4 ())
5555
56- result = await db .execute (
56+ domain_root_entry = await db .execute (
5757 select (IndexSourceDb ).where (
5858 IndexSourceDb .domain == stripped_domain , IndexSourceDb .source_type == SourceType .GITHUB_DOMAIN_ROOT
5959 )
6060 )
6161
62- domain_root_entry = result .scalar_one_or_none ()
62+ domain_root_entry = domain_root_entry .scalar_one_or_none ()
6363 if domain_root_entry is None :
6464 domain_root_entry = IndexSourceDb (
6565 domain = stripped_domain ,
@@ -71,7 +71,20 @@ async def index_github_source_repos(
7171 db .add (domain_root_entry )
7272 await db .flush ()
7373
74+ existing_repo_entries = await db .execute (
75+ select (IndexSourceDb ).where (
76+ IndexSourceDb .domain == stripped_domain ,
77+ IndexSourceDb .source_type == SourceType .GITHUB ,
78+ IndexSourceDb .source_identifier .in_ (request .repo_urls ),
79+ )
80+ )
81+ existing_repo_urls = {repo .source_identifier for repo in existing_repo_entries .scalars ().all ()}
82+
7483 for repo_url in request .repo_urls :
84+ if repo_url in existing_repo_urls :
85+ logger .info (f"Repository { repo_url } already exists for domain { stripped_domain } , skipping" )
86+ continue
87+
7588 index_source = IndexSourceDb (
7689 domain = stripped_domain ,
7790 source_type = SourceType .GITHUB ,
0 commit comments