diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d4e66f75b..b94fa590f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -124,13 +124,14 @@ func ingest(client: Client, Current.logger().warning("storeS3Readme failed") s3Readme = .error("\(error)") } - + try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme) + return pkg } @@ -185,6 +186,21 @@ func updateRepository(on database: Database, throw AppError.genericError(repository.package.id, "repository metadata is nil for package \(repository.name ?? "unknown")") } + + var fork: Fork? + do { + if let parentUrl = metadata.repository?.normalizedParentUrl { + if let packageId = try await Package.query(on: database) + .filter(\.$url, .custom("ilike"), parentUrl) + .first()?.id { + fork = .parentId(packageId) + } else { + fork = .parentURL(parentUrl) + } + } + } catch { + Current.logger().warning("updating forked from failed") + } repository.defaultBranch = repoMetadata.defaultBranch repository.forks = repoMetadata.forkCount @@ -208,6 +224,7 @@ func updateRepository(on database: Database, repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) repository.stars = repoMetadata.stargazerCount repository.summary = repoMetadata.description + repository.forkedFrom = fork try await repository.save(on: database) } @@ -224,3 +241,13 @@ private extension Github.Metadata.Repository { var repositoryOwner: String? { owner.login } var repositoryName: String? { name } } + +private extension Github.Metadata.Repository { + // Returns a normalized version of the URL. Adding a `.git` if not present. + var normalizedParentUrl: String? { + guard let url = parent.url else { return nil } + guard !url.hasSuffix(".git") else { return url } + let normalizedUrl = url + ".git" + return normalizedUrl + } +} diff --git a/Sources/App/Core/Github.swift b/Sources/App/Core/Github.swift index 359957138..3ce50122e 100644 --- a/Sources/App/Core/Github.swift +++ b/Sources/App/Core/Github.swift @@ -284,6 +284,9 @@ extension Github { homepageUrl isArchived isFork + parent { + url + } isInOrganization licenseInfo { name @@ -348,6 +351,7 @@ extension Github { var isArchived: Bool // periphery:ignore var isFork: Bool + var parent: Parent var isInOrganization: Bool var licenseInfo: LicenseInfo? var mergedPullRequests: IssueNodes @@ -459,6 +463,10 @@ extension Github { } } } + + struct Parent: Decodable, Equatable { + var url: String? + } } } diff --git a/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift b/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift new file mode 100644 index 000000000..42177e8aa --- /dev/null +++ b/Sources/App/Migrations/079/UpdateRepositoryAddFork.swift @@ -0,0 +1,35 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Fluent +import SQLKit + + +struct UpdateRepositoryAddFork: AsyncMigration { + func prepare(on database: Database) async throws { + try await database.schema("repositories") + .field("forked_from", .json) + // delete old `forked_from_id` field + .deleteField("forked_from_id") + .update() + } + + func revert(on database: Database) async throws { + try await database.schema("repositories") + .deleteField("forked_from") + .field("forked_from_id", .uuid, + .references("repositories", "id")).unique(on: "forked_from_id") + .update() + } +} diff --git a/Sources/App/Models/Repository.swift b/Sources/App/Models/Repository.swift index 245448285..0b7160ee8 100644 --- a/Sources/App/Models/Repository.swift +++ b/Sources/App/Models/Repository.swift @@ -34,9 +34,6 @@ final class Repository: @unchecked Sendable, Model, Content { // reference fields - @OptionalParent(key: "forked_from_id") // TODO: remove or implement - var forkedFrom: Repository? - @Parent(key: "package_id") var package: Package @@ -122,6 +119,9 @@ final class Repository: @unchecked Sendable, Model, Content { @Field(key: "summary") var summary: String? + + @Field(key: "forked_from") + var forkedFrom: Fork? // initializers @@ -135,7 +135,7 @@ final class Repository: @unchecked Sendable, Model, Content { firstCommitDate: Date? = nil, forks: Int = 0, fundingLinks: [FundingLink] = [], - forkedFrom: Repository? = nil, + forkedFrom: Fork? = nil, homepageUrl: String? = nil, isArchived: Bool = false, isInOrganization: Bool = false, @@ -164,9 +164,7 @@ final class Repository: @unchecked Sendable, Model, Content { self.commitCount = commitCount self.firstCommitDate = firstCommitDate self.forks = forks - if let forkId = forkedFrom?.id { - self.$forkedFrom.id = forkId - } + self.forkedFrom = forkedFrom self.fundingLinks = fundingLinks self.homepageUrl = homepageUrl self.isArchived = isArchived @@ -273,3 +271,8 @@ enum S3Readme: Codable, Equatable { } } } + +enum Fork: Codable, Equatable { + case parentId(Package.Id) + case parentURL(String) +} diff --git a/Sources/App/configure.swift b/Sources/App/configure.swift index 156020e4c..9e367d417 100644 --- a/Sources/App/configure.swift +++ b/Sources/App/configure.swift @@ -334,6 +334,9 @@ public func configure(_ app: Application) async throws -> String { do { // Migration 078 - Add `build_date` and `commit_hash` to `builds` app.migrations.add(UpdateBuildAddBuildDateCommitHash()) } + do { // Migration 079 - Add `fork` to `repositories` + app.migrations.add(UpdateRepositoryAddFork()) + } app.asyncCommands.use(Analyze.Command(), as: "analyze") app.asyncCommands.use(CreateRestfileCommand(), as: "create-restfile") diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index fb4efedc0..ac39e5660 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -125,7 +125,8 @@ class IngestorTests: AppTestCase { Date(timeIntervalSince1970: 1), ], license: .mit, - openIssues: 1, + openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [ @@ -208,6 +209,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [], @@ -353,6 +355,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 0, + parentUrl: nil, openPullRequests: 0, owner: "owner", pullRequestsClosedAtDates: [], @@ -594,4 +597,92 @@ class IngestorTests: AppTestCase { let postMigrationFetchedRepo = try await XCTUnwrapAsync(try await Repository.query(on: app.db).first()) XCTAssertEqual(postMigrationFetchedRepo.s3Readme, .cached(s3ObjectUrl: "object-url", githubEtag: "")) } + + func test_ingest_storeForkedFromPackageInSPI() async throws { + let pkg = Package(url: "https://github.com/foo/bar.git".url, + processingStage: .analysis) + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await pkg.save(on: app.db) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + guard let id = pkg.id else { + XCTFail("Failed to get package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNotNil(repo?.forkedFrom) + + XCTAssertEqual(repo?.forkedFrom, .parentId(id)) + + } + + func test_ingest_storeForkedFromPackageNotInSPI() async throws { + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: "https://github.com/foo/bar.git") + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNotNil(repo?.forkedFrom) + + XCTAssertEqual(repo?.forkedFrom, .parentURL("https://github.com/foo/bar.git")) + } + + func test_ingest_storeForkedFromShouldNeNil() async throws { + let forkedPkg = Package( + url: "https://github.com/taz/bar.git", + processingStage: .reconciliation + ) + try await forkedPkg.save(on: app.db) + Current.fetchMetadata = { _, owner, repository in + .mock(owner: owner, repository: repository, parentUrl: nil) + } + + // MUT + try await ingest(client: app.client, database: app.db, mode: .limit(1)) + + guard let forkedId = forkedPkg.id else { + XCTFail("Failed to get forked package id") + return + } + + let repo = try await Repository + .query(on: app.db) + .filter(\Repository.$package.$id == forkedId).first() + + XCTAssertNil(repo?.forkedFrom) + } } diff --git a/Tests/AppTests/Mocks/GithubMetadata+mock.swift b/Tests/AppTests/Mocks/GithubMetadata+mock.swift index fd7ba0e70..511dc88b2 100644 --- a/Tests/AppTests/Mocks/GithubMetadata+mock.swift +++ b/Tests/AppTests/Mocks/GithubMetadata+mock.swift @@ -25,6 +25,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: nil, openPullRequests: 0, owner: "packageOwner", pullRequestsClosedAtDates: [], @@ -34,7 +35,7 @@ extension Github.Metadata { stars: 2, summary: "desc") - static func mock(owner: String, repository: String) -> Self { + static func mock(owner: String, repository: String, parentUrl: String? = nil) -> Self { return .init(defaultBranch: "main", forks: owner.count + repository.count, homepageUrl: nil, @@ -42,6 +43,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: parentUrl, openPullRequests: 0, owner: owner, pullRequestsClosedAtDates: [], @@ -60,6 +62,7 @@ extension Github.Metadata { issuesClosedAtDates: [Date], license: License, openIssues: Int, + parentUrl: String?, openPullRequests: Int, owner: String, pullRequestsClosedAtDates: [Date], @@ -84,7 +87,8 @@ extension Github.Metadata { fundingLinks: fundingLinks, homepageUrl: homepageUrl, isArchived: false, - isFork: false, + isFork: false, + parent: .init(url: parentUrl), isInOrganization: isInOrganization, licenseInfo: .init(name: license.fullName, key: license.rawValue), mergedPullRequests: .init(closedAtDates: []), diff --git a/Tests/AppTests/RepositoryTests.swift b/Tests/AppTests/RepositoryTests.swift index 0d7c5827a..92e53fefa 100644 --- a/Tests/AppTests/RepositoryTests.swift +++ b/Tests/AppTests/RepositoryTests.swift @@ -178,19 +178,6 @@ final class RepositoryTests: AppTestCase { } } - func test_forkedFrom_relationship() async throws { - let p1 = Package(url: "p1") - try await p1.save(on: app.db) - let p2 = Package(url: "p2") - try await p2.save(on: app.db) - - // test forked from link - let parent = try Repository(package: p1) - try await parent.save(on: app.db) - let child = try Repository(package: p2, forkedFrom: parent) - try await child.save(on: app.db) - } - func test_delete_cascade() async throws { // delete package must delete repository let pkg = Package(id: UUID(), url: "1")