Skip to content

Commit a7ce32d

Browse files
authored
feat: dedupe repo entries per domain (#4686)
1 parent 51e6f66 commit a7ce32d

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

servers/fai/src/fai/routes/sources/github.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ async def index_github_source_repos(
5353
stripped_domain = strip_domain(domain)
5454
job_id = str(uuid.uuid4())
5555

56-
result = await db.execute(
56+
domain_root_entry = await db.execute(
5757
select(IndexSourceDb).where(
5858
IndexSourceDb.domain == stripped_domain, IndexSourceDb.source_type == SourceType.GITHUB_DOMAIN_ROOT
5959
)
6060
)
6161

62-
domain_root_entry = result.scalar_one_or_none()
62+
domain_root_entry = domain_root_entry.scalar_one_or_none()
6363
if domain_root_entry is None:
6464
domain_root_entry = IndexSourceDb(
6565
domain=stripped_domain,
@@ -71,7 +71,20 @@ async def index_github_source_repos(
7171
db.add(domain_root_entry)
7272
await db.flush()
7373

74+
existing_repo_entries = await db.execute(
75+
select(IndexSourceDb).where(
76+
IndexSourceDb.domain == stripped_domain,
77+
IndexSourceDb.source_type == SourceType.GITHUB,
78+
IndexSourceDb.source_identifier.in_(request.repo_urls),
79+
)
80+
)
81+
existing_repo_urls = {repo.source_identifier for repo in existing_repo_entries.scalars().all()}
82+
7483
for repo_url in request.repo_urls:
84+
if repo_url in existing_repo_urls:
85+
logger.info(f"Repository {repo_url} already exists for domain {stripped_domain}, skipping")
86+
continue
87+
7588
index_source = IndexSourceDb(
7689
domain=stripped_domain,
7790
source_type=SourceType.GITHUB,

0 commit comments

Comments
 (0)