From 8b25fd28a4aa0884c58144a3cd0759d5969ade2b Mon Sep 17 00:00:00 2001 From: Ucanbarlic Date: Mon, 2 Sep 2024 18:24:53 +0200 Subject: [PATCH 01/10] Implement forked from feature --- Sources/App/Commands/Ingest.swift | 33 ++++++- Sources/App/Core/Github.swift | 8 ++ .../079/UpdateRepositoryAddFork.swift | 35 +++++++ Sources/App/Models/Repository.swift | 17 ++-- Sources/App/configure.swift | 3 + Tests/AppTests/IngestorTests.swift | 93 ++++++++++++++++++- .../AppTests/Mocks/GithubMetadata+mock.swift | 8 +- Tests/AppTests/RepositoryTests.swift | 13 --- 8 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 Sources/App/Migrations/079/UpdateRepositoryAddFork.swift diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d4e66f75b..363f7b990 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -124,13 +124,30 @@ func ingest(client: Client, Current.logger().warning("storeS3Readme failed") s3Readme = .error("\(error)") } + + var fork: Fork? + do { + if let parentUrl = metadata.repository?.normalizedParentUrl { + if let packageId = try await Package.query(on: database) + .filter(\.$url == parentUrl) + .first()?.id { + fork = .parentId(packageId) + } else { + fork = .parentURL(parentUrl) + } + } + } catch { + Current.logger().warning("updating forked from failed") + } try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, - s3Readme: s3Readme) + s3Readme: s3Readme, + forkedFrom: fork) + return pkg } @@ -177,7 +194,8 @@ func updateRepository(on database: Database, metadata: Github.Metadata, licenseInfo: Github.License?, readmeInfo: Github.Readme?, - s3Readme: S3Readme?) async throws { + s3Readme: S3Readme?, + forkedFrom: Fork? = nil) async throws { guard let repoMetadata = metadata.repository else { if repository.$package.value == nil { try await repository.$package.load(on: database) @@ -208,6 +226,7 @@ func updateRepository(on database: Database, repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) repository.stars = repoMetadata.stargazerCount repository.summary = repoMetadata.description + repository.forkedFrom = forkedFrom try await repository.save(on: database) } @@ -224,3 +243,13 @@ private extension Github.Metadata.Repository { var repositoryOwner: String? { owner.login } var repositoryName: String? { name } } + +private extension Github.Metadata.Repository { + // Returns a normalized version of the URL. Adding a `.git` if not present. + var normalizedParentUrl: String? { + guard let url = parent.url else { return nil } + guard !url.hasSuffix(".git") else { return url } + let normalizedUrl = url + ".git" + return normalizedUrl + } +} diff --git a/Sources/App/Core/Github.swift b/Sources/App/Core/Github.swift index 359957138..3ce50122e 100644 --- a/Sources/App/Core/Github.swift +++ b/Sources/App/Core/Github.swift @@ -284,6 +284,9 @@ extension Github { homepageUrl isArchived isFork + parent { + url + } isInOrganization licenseInfo { name @@ -348,6 +351,7 @@ extension Github { var isArchived: Bool // periphery:ignore var isFork: Bool + var parent: Parent var isInOrganization: Bool var licenseInfo: LicenseInfo? var mergedPullRequests: IssueNodes @@ -459,6 +463,10 @@ extension Github { } } } + + struct Parent: Decodable, Equatable { + var url: String? + } } } diff --git a/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift b/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift new file mode 100644 index 000000000..42177e8aa --- /dev/null +++ b/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift @@ -0,0 +1,35 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Fluent +import SQLKit + + +struct UpdateRepositoryAddFork: AsyncMigration { + func prepare(on database: Database) async throws { + try await database.schema("repositories") + .field("forked_from", .json) + // delete old `forked_from_id` field + .deleteField("forked_from_id") + .update() + } + + func revert(on database: Database) async throws { + try await database.schema("repositories") + .deleteField("forked_from") + .field("forked_from_id", .uuid, + .references("repositories", "id")).unique(on: "forked_from_id") + .update() + } +} diff --git a/Sources/App/Models/Repository.swift b/Sources/App/Models/Repository.swift index 245448285..0b7160ee8 100644 --- a/Sources/App/Models/Repository.swift +++ b/Sources/App/Models/Repository.swift @@ -34,9 +34,6 @@ final class Repository: @unchecked Sendable, Model, Content { // reference fields - @OptionalParent(key: "forked_from_id") // TODO: remove or implement - var forkedFrom: Repository? - @Parent(key: "package_id") var package: Package @@ -122,6 +119,9 @@ final class Repository: @unchecked Sendable, Model, Content { @Field(key: "summary") var summary: String? + + @Field(key: "forked_from") + var forkedFrom: Fork? // initializers @@ -135,7 +135,7 @@ final class Repository: @unchecked Sendable, Model, Content { firstCommitDate: Date? = nil, forks: Int = 0, fundingLinks: [FundingLink] = [], - forkedFrom: Repository? = nil, + forkedFrom: Fork? = nil, homepageUrl: String? = nil, isArchived: Bool = false, isInOrganization: Bool = false, @@ -164,9 +164,7 @@ final class Repository: @unchecked Sendable, Model, Content { self.commitCount = commitCount self.firstCommitDate = firstCommitDate self.forks = forks - if let forkId = forkedFrom?.id { - self.$forkedFrom.id = forkId - } + self.forkedFrom = forkedFrom self.fundingLinks = fundingLinks self.homepageUrl = homepageUrl self.isArchived = isArchived @@ -273,3 +271,8 @@ enum S3Readme: Codable, Equatable { } } } + +enum Fork: Codable, Equatable { + case parentId(Package.Id) + case parentURL(String) +} diff --git a/Sources/App/configure.swift b/Sources/App/configure.swift index 156020e4c..9e367d417 100644 --- a/Sources/App/configure.swift +++ b/Sources/App/configure.swift @@ -334,6 +334,9 @@ public func configure(_ app: Application) async throws -> String { do { // Migration 078 - Add `build_date` and `commit_hash` to `builds` app.migrations.add(UpdateBuildAddBuildDateCommitHash()) } + do { // Migration 079 - Add `fork` to `repositories` + app.migrations.add(UpdateRepositoryAddFork()) + } app.asyncCommands.use(Analyze.Command(), as: "analyze") app.asyncCommands.use(CreateRestfileCommand(), as: "create-restfile") diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index fb4efedc0..ac39e5660 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -125,7 +125,8 @@ class IngestorTests: AppTestCase { Date(timeIntervalSince1970: 1), ], license: .mit, - openIssues: 1, + openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [ @@ -208,6 +209,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [], @@ -353,6 +355,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 0, + parentUrl: nil, openPullRequests: 0, owner: "owner", pullRequestsClosedAtDates: [], @@ -594,4 +597,92 @@ class IngestorTests: AppTestCase { let postMigrationFetchedRepo = try await XCTUnwrapAsync(try await Repository.query(on: app.db).first()) XCTAssertEqual(postMigrationFetchedRepo.s3Readme, .cached(s3ObjectUrl: "object-url", githubEtag: "")) } + + func test_ingest_storeForkedFromPackageInSPI() async throws { + let pkg = Package(url: "https://github.com/foo/bar.git".url, + processingStage: .analysis) + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await pkg.save(on: app.db) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + guard let id = pkg.id else { + XCTFail("Failed to get package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNotNil(repo?.forkedFrom) + + XCTAssertEqual(repo?.forkedFrom, .parentId(id)) + + } + + func test_ingest_storeForkedFromPackageNotInSPI() async throws { + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNotNil(repo?.forkedFrom) + + XCTAssertEqual(repo?.forkedFrom, .parentURL("https://github.com/foo/bar.git")) + } + + func test_ingest_storeForkedFromShouldNeNil() async throws { + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: nil) + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNil(repo?.forkedFrom) + } } diff --git a/Tests/AppTests/Mocks/GithubMetadata+mock.swift b/Tests/AppTests/Mocks/GithubMetadata+mock.swift index fd7ba0e70..511dc88b2 100644 --- a/Tests/AppTests/Mocks/GithubMetadata+mock.swift +++ b/Tests/AppTests/Mocks/GithubMetadata+mock.swift @@ -25,6 +25,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: nil, openPullRequests: 0, owner: "packageOwner", pullRequestsClosedAtDates: [], @@ -34,7 +35,7 @@ extension Github.Metadata { stars: 2, summary: "desc") - static func mock(owner: String, repository: String) -> Self { + static func mock(owner: String, repository: String, parentUrl: String? = nil) -> Self { return .init(defaultBranch: "main", forks: owner.count + repository.count, homepageUrl: nil, @@ -42,6 +43,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: parentUrl, openPullRequests: 0, owner: owner, pullRequestsClosedAtDates: [], @@ -60,6 +62,7 @@ extension Github.Metadata { issuesClosedAtDates: [Date], license: License, openIssues: Int, + parentUrl: String?, openPullRequests: Int, owner: String, pullRequestsClosedAtDates: [Date], @@ -84,7 +87,8 @@ extension Github.Metadata { fundingLinks: fundingLinks, homepageUrl: homepageUrl, isArchived: false, - isFork: false, + isFork: false, + parent: .init(url: parentUrl), isInOrganization: isInOrganization, licenseInfo: .init(name: license.fullName, key: license.rawValue), mergedPullRequests: .init(closedAtDates: []), diff --git a/Tests/AppTests/RepositoryTests.swift b/Tests/AppTests/RepositoryTests.swift index 0d7c5827a..92e53fefa 100644 --- a/Tests/AppTests/RepositoryTests.swift +++ b/Tests/AppTests/RepositoryTests.swift @@ -178,19 +178,6 @@ final class RepositoryTests: AppTestCase { } } - func test_forkedFrom_relationship() async throws { - let p1 = Package(url: "p1") - try await p1.save(on: app.db) - let p2 = Package(url: "p2") - try await p2.save(on: app.db) - - // test forked from link - let parent = try Repository(package: p1) - try await parent.save(on: app.db) - let child = try Repository(package: p2, forkedFrom: parent) - try await child.save(on: app.db) - } - func test_delete_cascade() async throws { // delete package must delete repository let pkg = Package(id: UUID(), url: "1") From 301bb2da4ccc6b627fce3472f1a4ca5341c139ca Mon Sep 17 00:00:00 2001 From: Ucanbarlic Date: Mon, 2 Sep 2024 18:49:47 +0200 Subject: [PATCH 02/10] Move forked from logic to updateRepository method --- Sources/App/Commands/Ingest.swift | 38 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 363f7b990..bdd96e2e4 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -125,28 +125,12 @@ func ingest(client: Client, s3Readme = .error("\(error)") } - var fork: Fork? - do { - if let parentUrl = metadata.repository?.normalizedParentUrl { - if let packageId = try await Package.query(on: database) - .filter(\.$url == parentUrl) - .first()?.id { - fork = .parentId(packageId) - } else { - fork = .parentURL(parentUrl) - } - } - } catch { - Current.logger().warning("updating forked from failed") - } - try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, - s3Readme: s3Readme, - forkedFrom: fork) + s3Readme: s3Readme) return pkg } @@ -194,8 +178,7 @@ func updateRepository(on database: Database, metadata: Github.Metadata, licenseInfo: Github.License?, readmeInfo: Github.Readme?, - s3Readme: S3Readme?, - forkedFrom: Fork? = nil) async throws { + s3Readme: S3Readme?) async throws { guard let repoMetadata = metadata.repository else { if repository.$package.value == nil { try await repository.$package.load(on: database) @@ -203,6 +186,21 @@ func updateRepository(on database: Database, throw AppError.genericError(repository.package.id, "repository metadata is nil for package \(repository.name ?? "unknown")") } + + var fork: Fork? + do { + if let parentUrl = metadata.repository?.normalizedParentUrl { + if let packageId = try await Package.query(on: database) + .filter(\.$url == parentUrl) + .first()?.id { + fork = .parentId(packageId) + } else { + fork = .parentURL(parentUrl) + } + } + } catch { + Current.logger().warning("updating forked from failed") + } repository.defaultBranch = repoMetadata.defaultBranch repository.forks = repoMetadata.forkCount @@ -226,7 +224,7 @@ func updateRepository(on database: Database, repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) repository.stars = repoMetadata.stargazerCount repository.summary = repoMetadata.description - repository.forkedFrom = forkedFrom + repository.forkedFrom = fork try await repository.save(on: database) } From 315bf1b8d4d23651b786bc0bf69049ce8bc6c2a1 Mon Sep 17 00:00:00 2001 From: Ucanbarlic Date: Tue, 3 Sep 2024 14:05:53 +0200 Subject: [PATCH 03/10] Make filter case insensitive --- Sources/App/Commands/Ingest.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index bdd96e2e4..b94fa590f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -191,7 +191,7 @@ func updateRepository(on database: Database, do { if let parentUrl = metadata.repository?.normalizedParentUrl { if let packageId = try await Package.query(on: database) - .filter(\.$url == parentUrl) + .filter(\.$url, .custom("ilike"), parentUrl) .first()?.id { fork = .parentId(packageId) } else { From b2d338a4f6caf08a681d06fbb752a68cd32baa13 Mon Sep 17 00:00:00 2001 From: Dave Verwer Date: Thu, 5 Sep 2024 07:12:30 +0100 Subject: [PATCH 04/10] Clean up. --- Sources/App/Commands/Ingest.swift | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index b94fa590f..d3a016e67 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -124,14 +124,13 @@ func ingest(client: Client, Current.logger().warning("storeS3Readme failed") s3Readme = .error("\(error)") } - + try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme) - return pkg } From 75e2ed335369253d25b380e75ae7611d85402a24 Mon Sep 17 00:00:00 2001 From: Dave Verwer Date: Thu, 5 Sep 2024 07:22:01 +0100 Subject: [PATCH 05/10] =?UTF-8?q?I=20don=E2=80=99t=20believe=20`parent`=20?= =?UTF-8?q?is=20a=20required=20field.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/App/Commands/Ingest.swift | 2 +- Sources/App/Core/Github.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d3a016e67..d4b062757 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -244,7 +244,7 @@ private extension Github.Metadata.Repository { private extension Github.Metadata.Repository { // Returns a normalized version of the URL. Adding a `.git` if not present. var normalizedParentUrl: String? { - guard let url = parent.url else { return nil } + guard let url = parent?.url else { return nil } guard !url.hasSuffix(".git") else { return url } let normalizedUrl = url + ".git" return normalizedUrl diff --git a/Sources/App/Core/Github.swift b/Sources/App/Core/Github.swift index 3ce50122e..48aac40f4 100644 --- a/Sources/App/Core/Github.swift +++ b/Sources/App/Core/Github.swift @@ -351,7 +351,7 @@ extension Github { var isArchived: Bool // periphery:ignore var isFork: Bool - var parent: Parent + var parent: Parent? var isInOrganization: Bool var licenseInfo: LicenseInfo? var mergedPullRequests: IssueNodes From 91ed579d8e03c3f1509a45082fb6e00aa7b8c499 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 5 Sep 2024 09:14:23 +0200 Subject: [PATCH 06/10] Sort fields --- Sources/App/Models/Repository.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/App/Models/Repository.swift b/Sources/App/Models/Repository.swift index 0b7160ee8..20ab78a28 100644 --- a/Sources/App/Models/Repository.swift +++ b/Sources/App/Models/Repository.swift @@ -50,6 +50,9 @@ final class Repository: @unchecked Sendable, Model, Content { @Field(key: "first_commit_date") var firstCommitDate: Date? + + @Field(key: "forked_from") + var forkedFrom: Fork? @Field(key: "forks") var forks: Int @@ -119,9 +122,6 @@ final class Repository: @unchecked Sendable, Model, Content { @Field(key: "summary") var summary: String? - - @Field(key: "forked_from") - var forkedFrom: Fork? // initializers From 41823a01949b2c7f29b59872dd67b205ffe8f631 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 5 Sep 2024 09:16:04 +0200 Subject: [PATCH 07/10] Rename migration to match field name --- ...itoryAddFork.swift => UpdateRepositoryAddForkedFrom.swift} | 2 +- Sources/App/configure.swift | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename Sources/App/Migrations/079/{UpdateRepositoryAddFork.swift => UpdateRepositoryAddForkedFrom.swift} (95%) diff --git a/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift b/Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift similarity index 95% rename from Sources/App/Migrations/079/UpdateRepositoryAddFork.swift rename to Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift index 42177e8aa..24af7caa9 100644 --- a/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift +++ b/Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift @@ -16,7 +16,7 @@ import Fluent import SQLKit -struct UpdateRepositoryAddFork: AsyncMigration { +struct UpdateRepositoryAddForkedFrom: AsyncMigration { func prepare(on database: Database) async throws { try await database.schema("repositories") .field("forked_from", .json) diff --git a/Sources/App/configure.swift b/Sources/App/configure.swift index 9e367d417..48f48c512 100644 --- a/Sources/App/configure.swift +++ b/Sources/App/configure.swift @@ -334,8 +334,8 @@ public func configure(_ app: Application) async throws -> String { do { // Migration 078 - Add `build_date` and `commit_hash` to `builds` app.migrations.add(UpdateBuildAddBuildDateCommitHash()) } - do { // Migration 079 - Add `fork` to `repositories` - app.migrations.add(UpdateRepositoryAddFork()) + do { // Migration 079 - Add `forked_from` to `repositories` + app.migrations.add(UpdateRepositoryAddForkedFrom()) } app.asyncCommands.use(Analyze.Command(), as: "analyze") From 908963f78e4305088dee466b2ad9ce759fb9508a Mon Sep 17 00:00:00 2001 From: Author Name Date: Thu, 5 Sep 2024 14:44:00 +0200 Subject: [PATCH 08/10] Move out to a seperate method --- Sources/App/Commands/Ingest.swift | 56 ++++++++------- Tests/AppTests/IngestorTests.swift | 107 ++++++++--------------------- 2 files changed, 61 insertions(+), 102 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d4b062757..45ff5ea8b 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -124,13 +124,22 @@ func ingest(client: Client, Current.logger().warning("storeS3Readme failed") s3Readme = .error("\(error)") } + + let fork: Fork? + do { + fork = try await getFork(on: database, metadata: metadata) + } catch { + fork = nil + Current.logger().warning("updating forked from failed") + } try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, - s3Readme: s3Readme) + s3Readme: s3Readme, + fork: fork) return pkg } @@ -177,7 +186,8 @@ func updateRepository(on database: Database, metadata: Github.Metadata, licenseInfo: Github.License?, readmeInfo: Github.Readme?, - s3Readme: S3Readme?) async throws { + s3Readme: S3Readme?, + fork: Fork? = nil) async throws { guard let repoMetadata = metadata.repository else { if repository.$package.value == nil { try await repository.$package.load(on: database) @@ -185,21 +195,6 @@ func updateRepository(on database: Database, throw AppError.genericError(repository.package.id, "repository metadata is nil for package \(repository.name ?? "unknown")") } - - var fork: Fork? - do { - if let parentUrl = metadata.repository?.normalizedParentUrl { - if let packageId = try await Package.query(on: database) - .filter(\.$url, .custom("ilike"), parentUrl) - .first()?.id { - fork = .parentId(packageId) - } else { - fork = .parentURL(parentUrl) - } - } - } catch { - Current.logger().warning("updating forked from failed") - } repository.defaultBranch = repoMetadata.defaultBranch repository.forks = repoMetadata.forkCount @@ -228,6 +223,20 @@ func updateRepository(on database: Database, try await repository.save(on: database) } +func getFork(on database: Database, metadata: Github.Metadata) async throws -> Fork? { + guard let url = metadata.repository?.parent?.url, + let parentUrl = URL(string: url)?.normalizedParent?.absoluteString else { + return nil + } + + if let packageId = try await Package.query(on: database) + .filter(\.$url, .custom("ilike"), parentUrl) + .first()?.id { + return .parentId(packageId) + } else { + return .parentURL(parentUrl) + } +} // Helper to ensure the canonical source for these critical fields is the same in all the places where we need them private extension Github.Metadata { @@ -241,12 +250,11 @@ private extension Github.Metadata.Repository { var repositoryName: String? { name } } -private extension Github.Metadata.Repository { - // Returns a normalized version of the URL. Adding a `.git` if not present. - var normalizedParentUrl: String? { - guard let url = parent?.url else { return nil } - guard !url.hasSuffix(".git") else { return url } - let normalizedUrl = url + ".git" - return normalizedUrl +private extension URL { + var normalizedParent: Self? { + guard var components = URLComponents(url: self, resolvingAgainstBaseURL: false) else { return nil } + if components.scheme == "http" { components.scheme = "https" } + if !components.path.hasSuffix(".git") { components.path = components.path + ".git" } + return components.url! } } diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index ac39e5660..6a1f28c96 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -156,7 +156,8 @@ class IngestorTests: AppTestCase { html: "readme html", htmlUrl: "readme html url", imagesToCache: []), - s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag")) + s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"), + fork: .parentURL("https://github.com/foo/bar.git")) // validate do { @@ -165,6 +166,7 @@ class IngestorTests: AppTestCase { let repo = try await Repository.query(on: app.db).first().unwrap() XCTAssertEqual(repo.defaultBranch, "main") XCTAssertEqual(repo.forks, 1) + XCTAssertEqual(repo.forkedFrom, .parentURL("https://github.com/foo/bar.git")) XCTAssertEqual(repo.fundingLinks, [ .init(platform: .gitHub, url: "https://github.com/username"), .init(platform: .customUrl, url: "https://example.com/username1"), @@ -598,91 +600,40 @@ class IngestorTests: AppTestCase { XCTAssertEqual(postMigrationFetchedRepo.s3Readme, .cached(s3ObjectUrl: "object-url", githubEtag: "")) } - func test_ingest_storeForkedFromPackageInSPI() async throws { - let pkg = Package(url: "https://github.com/foo/bar.git".url, - processingStage: .analysis) - let forkedPkg = Package( - url: "https://github.com/taz/bar.git", - processingStage: .reconciliation - ) - try await pkg.save(on: app.db) - try await forkedPkg.save(on: app.db) - Current.fetchMetadata = { _, owner, repository in - .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") - } + func test_getFork() async throws { + try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db) + try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db) - // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) - - guard let forkedId = forkedPkg.id else { - XCTFail("Failed to get forked package id") - return + var metadata = Github.Metadata.mock + + do { // test lookup when package is in the index + metadata.repository?.parent = .init(url: "https://github.com/foo/parent.git") + let fork = try await getFork(on: app.db, metadata: metadata) + XCTAssertEqual(fork, .parentId(.id0)) } - - guard let id = pkg.id else { - XCTFail("Failed to get package id") - return + + do { // test lookup when package is in the index but with different case in URL + metadata.repository?.parent = .init(url: "https://github.com/Foo/Parent.git") + let fork = try await getFork(on: app.db, metadata: metadata) + XCTAssertEqual(fork, .parentId(.id0)) } - let repo = try await Repository - .query(on: app.db) - .filter(\Repository.$package.$id == forkedId).first() - - XCTAssertNotNil(repo?.forkedFrom) - - XCTAssertEqual(repo?.forkedFrom, .parentId(id)) - - } - - func test_ingest_storeForkedFromPackageNotInSPI() async throws { - let forkedPkg = Package( - url: "https://github.com/taz/bar.git", - processingStage: .reconciliation - ) - try await forkedPkg.save(on: app.db) - Current.fetchMetadata = { _, owner, repository in - .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") + do { // test whem metadata repo url doesn't have `.git` at end + metadata.repository?.parent = .init(url: "https://github.com/Foo/Parent") + let fork = try await getFork(on: app.db, metadata: metadata) + XCTAssertEqual(fork, .parentId(.id0)) } - // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) - - guard let forkedId = forkedPkg.id else { - XCTFail("Failed to get forked package id") - return - } - - let repo = try await Repository - .query(on: app.db) - .filter(\Repository.$package.$id == forkedId).first() - - XCTAssertNotNil(repo?.forkedFrom) - - XCTAssertEqual(repo?.forkedFrom, .parentURL("https://github.com/foo/bar.git")) - } - - func test_ingest_storeForkedFromShouldNeNil() async throws { - let forkedPkg = Package( - url: "https://github.com/taz/bar.git", - processingStage: .reconciliation - ) - try await forkedPkg.save(on: app.db) - Current.fetchMetadata = { _, owner, repository in - .mock(owner: owner, repository: repository, parentUrl: nil) + do { // test lookup when package is not in the index + metadata.repository?.parent = .init(url: "https://github.com/some/other.git") + let fork = try await getFork(on: app.db, metadata: metadata) + XCTAssertEqual(fork, .parentURL("https://github.com/some/other.git")) } - // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) - - guard let forkedId = forkedPkg.id else { - XCTFail("Failed to get forked package id") - return + do { // test lookup when parent url is nil + metadata.repository?.parent = .init(url: nil) + let fork = try await getFork(on: app.db, metadata: metadata) + XCTAssertEqual(fork, nil) } - - let repo = try await Repository - .query(on: app.db) - .filter(\Repository.$package.$id == forkedId).first() - - XCTAssertNil(repo?.forkedFrom) } } From 0c04c1a77e048e1eefd8b1c80d9073d61678c349 Mon Sep 17 00:00:00 2001 From: Ucanbarlic Date: Fri, 6 Sep 2024 14:20:34 +0200 Subject: [PATCH 09/10] Resolve PR comments --- Sources/App/Commands/Ingest.swift | 31 +++++-------- Sources/App/Core/Extensions/URL+ext.swift | 24 ++++++++++ Tests/AppTests/IngestorTests.swift | 55 +++++++++++------------ 3 files changed, 61 insertions(+), 49 deletions(-) create mode 100644 Sources/App/Core/Extensions/URL+ext.swift diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 45ff5ea8b..0fc0ada1c 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -125,13 +125,7 @@ func ingest(client: Client, s3Readme = .error("\(error)") } - let fork: Fork? - do { - fork = try await getFork(on: database, metadata: metadata) - } catch { - fork = nil - Current.logger().warning("updating forked from failed") - } + let fork: Fork? = try? await getFork(on: database, parent: metadata.repository?.parent) try await updateRepository(on: database, for: repo, @@ -223,13 +217,10 @@ func updateRepository(on database: Database, try await repository.save(on: database) } -func getFork(on database: Database, metadata: Github.Metadata) async throws -> Fork? { - guard let url = metadata.repository?.parent?.url, - let parentUrl = URL(string: url)?.normalizedParent?.absoluteString else { - return nil - } +func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { + guard let parentUrl = parent?.normalizedURL else { return nil } - if let packageId = try await Package.query(on: database) + if let packageId = try? await Package.query(on: database) .filter(\.$url, .custom("ilike"), parentUrl) .first()?.id { return .parentId(packageId) @@ -250,11 +241,13 @@ private extension Github.Metadata.Repository { var repositoryName: String? { name } } -private extension URL { - var normalizedParent: Self? { - guard var components = URLComponents(url: self, resolvingAgainstBaseURL: false) else { return nil } - if components.scheme == "http" { components.scheme = "https" } - if !components.path.hasSuffix(".git") { components.path = components.path + ".git" } - return components.url! +private extension Github.Metadata.Parent { + // Returns a normalized version of the URL. Adding a `.git` if not present. + var normalizedURL: String? { + guard let url else { return nil } + guard let normalizedURL = URL(string: url)?.normalized?.absoluteString else { + return nil + } + return normalizedURL } } diff --git a/Sources/App/Core/Extensions/URL+ext.swift b/Sources/App/Core/Extensions/URL+ext.swift new file mode 100644 index 000000000..8eddc47ec --- /dev/null +++ b/Sources/App/Core/Extensions/URL+ext.swift @@ -0,0 +1,24 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +extension URL { + var normalized: Self? { + guard var components = URLComponents(url: self, resolvingAgainstBaseURL: false) else { return nil } + if components.scheme == "http" { components.scheme = "https" } + if !components.path.hasSuffix(".git") { components.path = components.path + ".git" } + return components.url! + } +} diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 6a1f28c96..50ad9164b 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -604,36 +604,31 @@ class IngestorTests: AppTestCase { try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db) try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db) - var metadata = Github.Metadata.mock - - do { // test lookup when package is in the index - metadata.repository?.parent = .init(url: "https://github.com/foo/parent.git") - let fork = try await getFork(on: app.db, metadata: metadata) - XCTAssertEqual(fork, .parentId(.id0)) - } - - do { // test lookup when package is in the index but with different case in URL - metadata.repository?.parent = .init(url: "https://github.com/Foo/Parent.git") - let fork = try await getFork(on: app.db, metadata: metadata) - XCTAssertEqual(fork, .parentId(.id0)) - } + var parent: Github.Metadata.Parent? - do { // test whem metadata repo url doesn't have `.git` at end - metadata.repository?.parent = .init(url: "https://github.com/Foo/Parent") - let fork = try await getFork(on: app.db, metadata: metadata) - XCTAssertEqual(fork, .parentId(.id0)) - } - - do { // test lookup when package is not in the index - metadata.repository?.parent = .init(url: "https://github.com/some/other.git") - let fork = try await getFork(on: app.db, metadata: metadata) - XCTAssertEqual(fork, .parentURL("https://github.com/some/other.git")) - } - - do { // test lookup when parent url is nil - metadata.repository?.parent = .init(url: nil) - let fork = try await getFork(on: app.db, metadata: metadata) - XCTAssertEqual(fork, nil) - } + // test lookup when package is in the index + parent = .init(url: "https://github.com/foo/parent.git") + let fork = await getFork(on: app.db, parent: parent) + XCTAssertEqual(fork, .parentId(.id0)) + + // test lookup when package is in the index but with different case in URL + parent = .init(url: "https://github.com/Foo/Parent.git") + let fork2 = await getFork(on: app.db, parent: parent) + XCTAssertEqual(fork2, .parentId(.id0)) + + // test whem metadata repo url doesn't have `.git` at end + parent = .init(url: "https://github.com/Foo/Parent") + let fork3 = await getFork(on: app.db, parent: parent) + XCTAssertEqual(fork3, .parentId(.id0)) + + // test lookup when package is not in the index + parent = .init(url: "https://github.com/some/other.git") + let fork4 = await getFork(on: app.db, parent: parent) + XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git")) + + // test lookup when parent url is nil + parent = nil + let fork5 = await getFork(on: app.db, parent: parent) + XCTAssertEqual(fork5, nil) } } From 21d22818d5aa21a94d99fadd460bf6b384466841 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 7 Sep 2024 12:39:19 +0200 Subject: [PATCH 10/10] Fix warning, tidy up test --- Sources/App/Commands/Ingest.swift | 2 +- Tests/AppTests/IngestorTests.swift | 17 +++++------------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 0fc0ada1c..2278d1337 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -125,7 +125,7 @@ func ingest(client: Client, s3Readme = .error("\(error)") } - let fork: Fork? = try? await getFork(on: database, parent: metadata.repository?.parent) + let fork = await getFork(on: database, parent: metadata.repository?.parent) try await updateRepository(on: database, for: repo, diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 50ad9164b..01378c213 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -604,31 +604,24 @@ class IngestorTests: AppTestCase { try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db) try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db) - var parent: Github.Metadata.Parent? - // test lookup when package is in the index - parent = .init(url: "https://github.com/foo/parent.git") - let fork = await getFork(on: app.db, parent: parent) + let fork = await getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git")) XCTAssertEqual(fork, .parentId(.id0)) // test lookup when package is in the index but with different case in URL - parent = .init(url: "https://github.com/Foo/Parent.git") - let fork2 = await getFork(on: app.db, parent: parent) + let fork2 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git")) XCTAssertEqual(fork2, .parentId(.id0)) // test whem metadata repo url doesn't have `.git` at end - parent = .init(url: "https://github.com/Foo/Parent") - let fork3 = await getFork(on: app.db, parent: parent) + let fork3 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent")) XCTAssertEqual(fork3, .parentId(.id0)) // test lookup when package is not in the index - parent = .init(url: "https://github.com/some/other.git") - let fork4 = await getFork(on: app.db, parent: parent) + let fork4 = await getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git")) XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git")) // test lookup when parent url is nil - parent = nil - let fork5 = await getFork(on: app.db, parent: parent) + let fork5 = await getFork(on: app.db, parent: nil) XCTAssertEqual(fork5, nil) } }