Skip to content
31 changes: 29 additions & 2 deletions Sources/App/Commands/Ingest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,16 @@ func ingest(client: Client,
Current.logger().warning("storeS3Readme failed")
s3Readme = .error("\(error)")
}

let fork = await getFork(on: database, parent: metadata.repository?.parent)

try await updateRepository(on: database,
for: repo,
metadata: metadata,
licenseInfo: license,
readmeInfo: readme,
s3Readme: s3Readme)
s3Readme: s3Readme,
fork: fork)
return pkg
}

Expand Down Expand Up @@ -177,7 +180,8 @@ func updateRepository(on database: Database,
metadata: Github.Metadata,
licenseInfo: Github.License?,
readmeInfo: Github.Readme?,
s3Readme: S3Readme?) async throws {
s3Readme: S3Readme?,
fork: Fork? = nil) async throws {
guard let repoMetadata = metadata.repository else {
if repository.$package.value == nil {
try await repository.$package.load(on: database)
Expand Down Expand Up @@ -208,10 +212,22 @@ func updateRepository(on database: Database,
repository.releases = repoMetadata.releases.nodes.map(Release.init(from:))
repository.stars = repoMetadata.stargazerCount
repository.summary = repoMetadata.description
repository.forkedFrom = fork

try await repository.save(on: database)
}

func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? {
guard let parentUrl = parent?.normalizedURL else { return nil }

if let packageId = try? await Package.query(on: database)
.filter(\.$url, .custom("ilike"), parentUrl)
.first()?.id {
return .parentId(packageId)
} else {
return .parentURL(parentUrl)
}
}

// Helper to ensure the canonical source for these critical fields is the same in all the places where we need them
private extension Github.Metadata {
Expand All @@ -224,3 +240,14 @@ private extension Github.Metadata.Repository {
var repositoryOwner: String? { owner.login }
var repositoryName: String? { name }
}

private extension Github.Metadata.Parent {
// Returns a normalized version of the URL. Adding a `.git` if not present.
var normalizedURL: String? {
guard let url else { return nil }
guard let normalizedURL = URL(string: url)?.normalized?.absoluteString else {
return nil
}
return normalizedURL
}
}
24 changes: 24 additions & 0 deletions Sources/App/Core/Extensions/URL+ext.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Dave Verwer, Sven A. Schmidt, and other contributors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

extension URL {
var normalized: Self? {
guard var components = URLComponents(url: self, resolvingAgainstBaseURL: false) else { return nil }
if components.scheme == "http" { components.scheme = "https" }
if !components.path.hasSuffix(".git") { components.path = components.path + ".git" }
return components.url!
}
}
8 changes: 8 additions & 0 deletions Sources/App/Core/Github.swift
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ extension Github {
homepageUrl
isArchived
isFork
parent {
url
}
isInOrganization
licenseInfo {
name
Expand Down Expand Up @@ -348,6 +351,7 @@ extension Github {
var isArchived: Bool
// periphery:ignore
var isFork: Bool
var parent: Parent?
var isInOrganization: Bool
var licenseInfo: LicenseInfo?
var mergedPullRequests: IssueNodes
Expand Down Expand Up @@ -459,6 +463,10 @@ extension Github {
}
}
}

struct Parent: Decodable, Equatable {
var url: String?
}
}

}
Expand Down
35 changes: 35 additions & 0 deletions Sources/App/Migrations/079/UpdateRepositoryAddForkedFrom.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright Dave Verwer, Sven A. Schmidt, and other contributors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Fluent
import SQLKit


struct UpdateRepositoryAddForkedFrom: AsyncMigration {
func prepare(on database: Database) async throws {
try await database.schema("repositories")
.field("forked_from", .json)
// delete old `forked_from_id` field
.deleteField("forked_from_id")
.update()
}

func revert(on database: Database) async throws {
try await database.schema("repositories")
.deleteField("forked_from")
.field("forked_from_id", .uuid,
.references("repositories", "id")).unique(on: "forked_from_id")
.update()
}
}
17 changes: 10 additions & 7 deletions Sources/App/Models/Repository.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ final class Repository: @unchecked Sendable, Model, Content {

// reference fields

@OptionalParent(key: "forked_from_id") // TODO: remove or implement
var forkedFrom: Repository?

@Parent(key: "package_id")
var package: Package

Expand All @@ -53,6 +50,9 @@ final class Repository: @unchecked Sendable, Model, Content {

@Field(key: "first_commit_date")
var firstCommitDate: Date?

@Field(key: "forked_from")
var forkedFrom: Fork?

@Field(key: "forks")
var forks: Int
Expand Down Expand Up @@ -135,7 +135,7 @@ final class Repository: @unchecked Sendable, Model, Content {
firstCommitDate: Date? = nil,
forks: Int = 0,
fundingLinks: [FundingLink] = [],
forkedFrom: Repository? = nil,
forkedFrom: Fork? = nil,
homepageUrl: String? = nil,
isArchived: Bool = false,
isInOrganization: Bool = false,
Expand Down Expand Up @@ -164,9 +164,7 @@ final class Repository: @unchecked Sendable, Model, Content {
self.commitCount = commitCount
self.firstCommitDate = firstCommitDate
self.forks = forks
if let forkId = forkedFrom?.id {
self.$forkedFrom.id = forkId
}
self.forkedFrom = forkedFrom
self.fundingLinks = fundingLinks
self.homepageUrl = homepageUrl
self.isArchived = isArchived
Expand Down Expand Up @@ -273,3 +271,8 @@ enum S3Readme: Codable, Equatable {
}
}
}

enum Fork: Codable, Equatable {
case parentId(Package.Id)
case parentURL(String)
}
3 changes: 3 additions & 0 deletions Sources/App/configure.swift
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ public func configure(_ app: Application) async throws -> String {
do { // Migration 078 - Add `build_date` and `commit_hash` to `builds`
app.migrations.add(UpdateBuildAddBuildDateCommitHash())
}
do { // Migration 079 - Add `forked_from` to `repositories`
app.migrations.add(UpdateRepositoryAddForkedFrom())
}

app.asyncCommands.use(Analyze.Command(), as: "analyze")
app.asyncCommands.use(CreateRestfileCommand(), as: "create-restfile")
Expand Down
34 changes: 32 additions & 2 deletions Tests/AppTests/IngestorTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ class IngestorTests: AppTestCase {
Date(timeIntervalSince1970: 1),
],
license: .mit,
openIssues: 1,
openIssues: 1,
parentUrl: nil,
openPullRequests: 2,
owner: "foo",
pullRequestsClosedAtDates: [
Expand Down Expand Up @@ -155,7 +156,8 @@ class IngestorTests: AppTestCase {
html: "readme html",
htmlUrl: "readme html url",
imagesToCache: []),
s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"))
s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"),
fork: .parentURL("https://github.com/foo/bar.git"))

// validate
do {
Expand All @@ -164,6 +166,7 @@ class IngestorTests: AppTestCase {
let repo = try await Repository.query(on: app.db).first().unwrap()
XCTAssertEqual(repo.defaultBranch, "main")
XCTAssertEqual(repo.forks, 1)
XCTAssertEqual(repo.forkedFrom, .parentURL("https://github.com/foo/bar.git"))
XCTAssertEqual(repo.fundingLinks, [
.init(platform: .gitHub, url: "https://github.com/username"),
.init(platform: .customUrl, url: "https://example.com/username1"),
Expand Down Expand Up @@ -208,6 +211,7 @@ class IngestorTests: AppTestCase {
issuesClosedAtDates: [],
license: .mit,
openIssues: 1,
parentUrl: nil,
openPullRequests: 2,
owner: "foo",
pullRequestsClosedAtDates: [],
Expand Down Expand Up @@ -353,6 +357,7 @@ class IngestorTests: AppTestCase {
issuesClosedAtDates: [],
license: .mit,
openIssues: 0,
parentUrl: nil,
openPullRequests: 0,
owner: "owner",
pullRequestsClosedAtDates: [],
Expand Down Expand Up @@ -594,4 +599,29 @@ class IngestorTests: AppTestCase {
let postMigrationFetchedRepo = try await XCTUnwrapAsync(try await Repository.query(on: app.db).first())
XCTAssertEqual(postMigrationFetchedRepo.s3Readme, .cached(s3ObjectUrl: "object-url", githubEtag: ""))
}

func test_getFork() async throws {
try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db)
try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db)

// test lookup when package is in the index
let fork = await getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git"))
XCTAssertEqual(fork, .parentId(.id0))

// test lookup when package is in the index but with different case in URL
let fork2 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git"))
XCTAssertEqual(fork2, .parentId(.id0))

// test whem metadata repo url doesn't have `.git` at end
let fork3 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent"))
XCTAssertEqual(fork3, .parentId(.id0))

// test lookup when package is not in the index
let fork4 = await getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git"))
XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git"))

// test lookup when parent url is nil
let fork5 = await getFork(on: app.db, parent: nil)
XCTAssertEqual(fork5, nil)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By moving the main functionality to getFork, these tests become much easier to set up because we don't need to test all of ingest. Instead we just test getFork in isolation:

    func test_getFork() async throws {
        try await Package(id: .id0, url: "https://github.com/foo/parent.git".url, processingStage: .analysis).save(on: app.db)
        try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db)

        var metadata = Github.Metadata.mock

        do {  // test lookup when package is in the index
            metadata.repository?.parent = .init(url: "https://github.com/foo/parent.git")
            let fork = try await getFork(on: app.db, metadata: metadata)
            XCTAssertEqual(fork, .parentId(.id0))
        }

        do {  // test lookup when package is in the index but with different case in URL
            metadata.repository?.parent = .init(url: "https://github.com/Foo/Parent.git")
            let fork = try await getFork(on: app.db, metadata: metadata)
            XCTAssertEqual(fork, .parentId(.id0))
        }

        do {  // test lookup when package is not in the index
            metadata.repository?.parent = .init(url: "https://github.com/some/other.git")
            let fork = try await getFork(on: app.db, metadata: metadata)
            XCTAssertEqual(fork, .parentURL("https://github.com/some/other.git"))
        }

        do {  // test lookup when parent url is nil
            metadata.repository?.parent = .init(url: nil)
            let fork = try await getFork(on: app.db, metadata: metadata)
            XCTAssertEqual(fork, nil)
        }
    }

and then we simply extend a single test where we ensure fork is written to the db in updateRepository. For example, I'd modify test_updateRepository_update and add fork (call on or around line 152):

        try await updateRepository(on: app.db,
                                   for: repo,
                                   metadata: md,
                                   licenseInfo: .init(htmlUrl: "license url"),
                                   readmeInfo: .init(etag: "etag",
                                                     html: "readme html",
                                                     htmlUrl: "readme html url",
                                                     imagesToCache: []),
                                   s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"),
                                   fork: .parentURL("https://github.com/foo/bar.git"))

and validate it (around line 168):

            XCTAssertEqual(repo.forkedFrom, .parentURL("https://github.com/foo/bar.git"))

}
8 changes: 6 additions & 2 deletions Tests/AppTests/Mocks/GithubMetadata+mock.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ extension Github.Metadata {
issuesClosedAtDates: [],
license: .mit,
openIssues: 3,
parentUrl: nil,
openPullRequests: 0,
owner: "packageOwner",
pullRequestsClosedAtDates: [],
Expand All @@ -34,14 +35,15 @@ extension Github.Metadata {
stars: 2,
summary: "desc")

static func mock(owner: String, repository: String) -> Self {
static func mock(owner: String, repository: String, parentUrl: String? = nil) -> Self {
return .init(defaultBranch: "main",
forks: owner.count + repository.count,
homepageUrl: nil,
isInOrganization: false,
issuesClosedAtDates: [],
license: .mit,
openIssues: 3,
parentUrl: parentUrl,
openPullRequests: 0,
owner: owner,
pullRequestsClosedAtDates: [],
Expand All @@ -60,6 +62,7 @@ extension Github.Metadata {
issuesClosedAtDates: [Date],
license: License,
openIssues: Int,
parentUrl: String?,
openPullRequests: Int,
owner: String,
pullRequestsClosedAtDates: [Date],
Expand All @@ -84,7 +87,8 @@ extension Github.Metadata {
fundingLinks: fundingLinks,
homepageUrl: homepageUrl,
isArchived: false,
isFork: false,
isFork: false,
parent: .init(url: parentUrl),
isInOrganization: isInOrganization,
licenseInfo: .init(name: license.fullName, key: license.rawValue),
mergedPullRequests: .init(closedAtDates: []),
Expand Down
13 changes: 0 additions & 13 deletions Tests/AppTests/RepositoryTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,6 @@ final class RepositoryTests: AppTestCase {
}
}

func test_forkedFrom_relationship() async throws {
let p1 = Package(url: "p1")
try await p1.save(on: app.db)
let p2 = Package(url: "p2")
try await p2.save(on: app.db)

// test forked from link
let parent = try Repository(package: p1)
try await parent.save(on: app.db)
let child = try Repository(package: p2, forkedFrom: parent)
try await child.save(on: app.db)
}

func test_delete_cascade() async throws {
// delete package must delete repository
let pkg = Package(id: UUID(), url: "1")
Expand Down
Loading