diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d4e66f75b..2278d1337 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -124,13 +124,16 @@ func ingest(client: Client, Current.logger().warning("storeS3Readme failed") s3Readme = .error("\(error)") } + + let fork = await getFork(on: database, parent: metadata.repository?.parent) try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, - s3Readme: s3Readme) + s3Readme: s3Readme, + fork: fork) return pkg } @@ -177,7 +180,8 @@ func updateRepository(on database: Database, metadata: Github.Metadata, licenseInfo: Github.License?, readmeInfo: Github.Readme?, - s3Readme: S3Readme?) async throws { + s3Readme: S3Readme?, + fork: Fork? = nil) async throws { guard let repoMetadata = metadata.repository else { if repository.$package.value == nil { try await repository.$package.load(on: database) @@ -208,10 +212,22 @@ func updateRepository(on database: Database, repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) repository.stars = repoMetadata.stargazerCount repository.summary = repoMetadata.description + repository.forkedFrom = fork try await repository.save(on: database) } +func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { + guard let parentUrl = parent?.normalizedURL else { return nil } + + if let packageId = try? await Package.query(on: database) + .filter(\.$url, .custom("ilike"), parentUrl) + .first()?.id { + return .parentId(packageId) + } else { + return .parentURL(parentUrl) + } +} // Helper to ensure the canonical source for these critical fields is the same in all the places where we need them private extension Github.Metadata { @@ -224,3 +240,14 @@ private extension Github.Metadata.Repository { var repositoryOwner: String? { owner.login } var repositoryName: String? { name } } + +private extension Github.Metadata.Parent { + // Returns a normalized version of the URL. Adding a `.git` if not present. + var normalizedURL: String? { + guard let url else { return nil } + guard let normalizedURL = URL(string: url)?.normalized?.absoluteString else { + return nil + } + return normalizedURL + } +} diff --git a/Sources/App/Core/Extensions/URL+ext.swift b/Sources/App/Core/Extensions/URL+ext.swift new file mode 100644 index 000000000..8eddc47ec --- /dev/null +++ b/Sources/App/Core/Extensions/URL+ext.swift @@ -0,0 +1,24 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +extension URL { + var normalized: Self? { + guard var components = URLComponents(url: self, resolvingAgainstBaseURL: false) else { return nil } + if components.scheme == "http" { components.scheme = "https" } + if !components.path.hasSuffix(".git") { components.path = components.path + ".git" } + return components.url! + } +} diff --git a/Sources/App/Core/Github.swift b/Sources/App/Core/Github.swift index 359957138..48aac40f4 100644 --- a/Sources/App/Core/Github.swift +++ b/Sources/App/Core/Github.swift @@ -284,6 +284,9 @@ extension Github { homepageUrl isArchived isFork + parent { + url + } isInOrganization licenseInfo { name @@ -348,6 +351,7 @@ extension Github { var isArchived: Bool // periphery:ignore var isFork: Bool + var parent: Parent? var isInOrganization: Bool var licenseInfo: LicenseInfo? var mergedPullRequests: IssueNodes @@ -459,6 +463,10 @@ extension Github { } } } + + struct Parent: Decodable, Equatable { + var url: String? + } } } diff --git a/Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift b/Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift new file mode 100644 index 000000000..24af7caa9 --- /dev/null +++ b/Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift @@ -0,0 +1,35 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Fluent +import SQLKit + + +struct UpdateRepositoryAddForkedFrom: AsyncMigration { + func prepare(on database: Database) async throws { + try await database.schema("repositories") + .field("forked_from", .json) + // delete old `forked_from_id` field + .deleteField("forked_from_id") + .update() + } + + func revert(on database: Database) async throws { + try await database.schema("repositories") + .deleteField("forked_from") + .field("forked_from_id", .uuid, + .references("repositories", "id")).unique(on: "forked_from_id") + .update() + } +} diff --git a/Sources/App/Models/Repository.swift b/Sources/App/Models/Repository.swift index 245448285..20ab78a28 100644 --- a/Sources/App/Models/Repository.swift +++ b/Sources/App/Models/Repository.swift @@ -34,9 +34,6 @@ final class Repository: @unchecked Sendable, Model, Content { // reference fields - @OptionalParent(key: "forked_from_id") // TODO: remove or implement - var forkedFrom: Repository? - @Parent(key: "package_id") var package: Package @@ -53,6 +50,9 @@ final class Repository: @unchecked Sendable, Model, Content { @Field(key: "first_commit_date") var firstCommitDate: Date? + + @Field(key: "forked_from") + var forkedFrom: Fork? @Field(key: "forks") var forks: Int @@ -135,7 +135,7 @@ final class Repository: @unchecked Sendable, Model, Content { firstCommitDate: Date? = nil, forks: Int = 0, fundingLinks: [FundingLink] = [], - forkedFrom: Repository? = nil, + forkedFrom: Fork? = nil, homepageUrl: String? = nil, isArchived: Bool = false, isInOrganization: Bool = false, @@ -164,9 +164,7 @@ final class Repository: @unchecked Sendable, Model, Content { self.commitCount = commitCount self.firstCommitDate = firstCommitDate self.forks = forks - if let forkId = forkedFrom?.id { - self.$forkedFrom.id = forkId - } + self.forkedFrom = forkedFrom self.fundingLinks = fundingLinks self.homepageUrl = homepageUrl self.isArchived = isArchived @@ -273,3 +271,8 @@ enum S3Readme: Codable, Equatable { } } } + +enum Fork: Codable, Equatable { + case parentId(Package.Id) + case parentURL(String) +} diff --git a/Sources/App/configure.swift b/Sources/App/configure.swift index 156020e4c..48f48c512 100644 --- a/Sources/App/configure.swift +++ b/Sources/App/configure.swift @@ -334,6 +334,9 @@ public func configure(_ app: Application) async throws -> String { do { // Migration 078 - Add `build_date` and `commit_hash` to `builds` app.migrations.add(UpdateBuildAddBuildDateCommitHash()) } + do { // Migration 079 - Add `forked_from` to `repositories` + app.migrations.add(UpdateRepositoryAddForkedFrom()) + } app.asyncCommands.use(Analyze.Command(), as: "analyze") app.asyncCommands.use(CreateRestfileCommand(), as: "create-restfile") diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index fb4efedc0..01378c213 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -125,7 +125,8 @@ class IngestorTests: AppTestCase { Date(timeIntervalSince1970: 1), ], license: .mit, - openIssues: 1, + openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [ @@ -155,7 +156,8 @@ class IngestorTests: AppTestCase { html: "readme html", htmlUrl: "readme html url", imagesToCache: []), - s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag")) + s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"), + fork: .parentURL("https://github.com/foo/bar.git")) // validate do { @@ -164,6 +166,7 @@ class IngestorTests: AppTestCase { let repo = try await Repository.query(on: app.db).first().unwrap() XCTAssertEqual(repo.defaultBranch, "main") XCTAssertEqual(repo.forks, 1) + XCTAssertEqual(repo.forkedFrom, .parentURL("https://github.com/foo/bar.git")) XCTAssertEqual(repo.fundingLinks, [ .init(platform: .gitHub, url: "https://github.com/username"), .init(platform: .customUrl, url: "https://example.com/username1"), @@ -208,6 +211,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 1, + parentUrl: nil, openPullRequests: 2, owner: "foo", pullRequestsClosedAtDates: [], @@ -353,6 +357,7 @@ class IngestorTests: AppTestCase { issuesClosedAtDates: [], license: .mit, openIssues: 0, + parentUrl: nil, openPullRequests: 0, owner: "owner", pullRequestsClosedAtDates: [], @@ -594,4 +599,29 @@ class IngestorTests: AppTestCase { let postMigrationFetchedRepo = try await XCTUnwrapAsync(try await Repository.query(on: app.db).first()) XCTAssertEqual(postMigrationFetchedRepo.s3Readme, .cached(s3ObjectUrl: "object-url", githubEtag: "")) } + + func test_getFork() async throws { + try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db) + try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db) + + // test lookup when package is in the index + let fork = await getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git")) + XCTAssertEqual(fork, .parentId(.id0)) + + // test lookup when package is in the index but with different case in URL + let fork2 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git")) + XCTAssertEqual(fork2, .parentId(.id0)) + + // test whem metadata repo url doesn't have `.git` at end + let fork3 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent")) + XCTAssertEqual(fork3, .parentId(.id0)) + + // test lookup when package is not in the index + let fork4 = await getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git")) + XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git")) + + // test lookup when parent url is nil + let fork5 = await getFork(on: app.db, parent: nil) + XCTAssertEqual(fork5, nil) + } } diff --git a/Tests/AppTests/Mocks/GithubMetadata+mock.swift b/Tests/AppTests/Mocks/GithubMetadata+mock.swift index fd7ba0e70..511dc88b2 100644 --- a/Tests/AppTests/Mocks/GithubMetadata+mock.swift +++ b/Tests/AppTests/Mocks/GithubMetadata+mock.swift @@ -25,6 +25,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: nil, openPullRequests: 0, owner: "packageOwner", pullRequestsClosedAtDates: [], @@ -34,7 +35,7 @@ extension Github.Metadata { stars: 2, summary: "desc") - static func mock(owner: String, repository: String) -> Self { + static func mock(owner: String, repository: String, parentUrl: String? = nil) -> Self { return .init(defaultBranch: "main", forks: owner.count + repository.count, homepageUrl: nil, @@ -42,6 +43,7 @@ extension Github.Metadata { issuesClosedAtDates: [], license: .mit, openIssues: 3, + parentUrl: parentUrl, openPullRequests: 0, owner: owner, pullRequestsClosedAtDates: [], @@ -60,6 +62,7 @@ extension Github.Metadata { issuesClosedAtDates: [Date], license: License, openIssues: Int, + parentUrl: String?, openPullRequests: Int, owner: String, pullRequestsClosedAtDates: [Date], @@ -84,7 +87,8 @@ extension Github.Metadata { fundingLinks: fundingLinks, homepageUrl: homepageUrl, isArchived: false, - isFork: false, + isFork: false, + parent: .init(url: parentUrl), isInOrganization: isInOrganization, licenseInfo: .init(name: license.fullName, key: license.rawValue), mergedPullRequests: .init(closedAtDates: []), diff --git a/Tests/AppTests/RepositoryTests.swift b/Tests/AppTests/RepositoryTests.swift index 0d7c5827a..92e53fefa 100644 --- a/Tests/AppTests/RepositoryTests.swift +++ b/Tests/AppTests/RepositoryTests.swift @@ -178,19 +178,6 @@ final class RepositoryTests: AppTestCase { } } - func test_forkedFrom_relationship() async throws { - let p1 = Package(url: "p1") - try await p1.save(on: app.db) - let p2 = Package(url: "p2") - try await p2.save(on: app.db) - - // test forked from link - let parent = try Repository(package: p1) - try await parent.save(on: app.db) - let child = try Repository(package: p2, forkedFrom: parent) - try await child.save(on: app.db) - } - func test_delete_cascade() async throws { // delete package must delete repository let pkg = Package(id: UUID(), url: "1")