Skip to content

Commit 0ea9434

Browse files
committed
wip
1 parent 69f4a1a commit 0ea9434

File tree

4 files changed

+121
-42
lines changed

4 files changed

+121
-42
lines changed

Sources/App/Commands/Ingest.swift

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ import Vapor
1616
import Fluent
1717

1818

19+
enum Ingestion {
20+
enum Error: Swift.Error {
21+
case fetchMetadataFailed(owner: String, name: String, error: Swift.Error)
22+
case findOrCreateRepositoryFailed(url: String, error: Swift.Error)
23+
case invalidURL(String)
24+
case noRepositoryMetadata(owner: String?, name: String?)
25+
case repositorySaveFailed(owner: String?, name: String?, error: Swift.Error)
26+
}
27+
}
28+
29+
1930
struct IngestCommand: AsyncCommand {
2031
typealias Signature = SPICommand.Signature
2132

@@ -152,17 +163,82 @@ func ingestOriginal(client: Client, database: Database, package: Joined<Package,
152163
}
153164

154165

155-
func fetchMetadata(client: Client, package: Joined<Package, Repository>) async throws -> (Github.Metadata, Github.License?, Github.Readme?) {
166+
extension Ingestion {
167+
static func ingestNew(client: Client, database: Database, package: Joined<Package, Repository>) async {
168+
let result = await Result { () async throws(Ingestion.Error) -> Joined<Package, Repository> in
169+
Current.logger().info("Ingesting \(package.package.url)")
170+
let (metadata, license, readme) = try await fetchMetadata(client: client, package: package)
171+
let repo = try await Result {
172+
try await Repository.findOrCreate(on: database, for: package.model)
173+
}.mapError {
174+
Ingestion.Error.findOrCreateRepositoryFailed(url: package.package.url, error: $0)
175+
}.get()
176+
177+
let s3Readme: S3Readme?
178+
do throws(S3ReadmeError) {
179+
s3Readme = try await storeS3Readme(client: client, repository: repo, metadata: metadata, readme: readme)
180+
} catch {
181+
// We don't want to fail ingestion in case storing the readme fails - warn and continue.
182+
Current.logger().warning("storeS3Readme failed: \(error)")
183+
s3Readme = .error("\(error)")
184+
}
185+
186+
try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme)
187+
return package
188+
}
189+
190+
switch result {
191+
case .success:
192+
AppMetrics.ingestMetadataSuccessCount?.inc()
193+
case .failure:
194+
AppMetrics.ingestMetadataFailureCount?.inc()
195+
}
196+
197+
do {
198+
try await updatePackage(client: client, database: database, result: result, stage: .ingestion)
199+
} catch {
200+
Current.logger().report(error: error)
201+
}
202+
}
203+
204+
205+
static func storeS3Readme(client: Client, repository: Repository, metadata: Github.Metadata, readme: Github.Readme?) async throws(S3ReadmeError) -> S3Readme? {
206+
if let upstreamEtag = readme?.etag,
207+
repository.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true,
208+
let owner = metadata.repositoryOwner,
209+
let repository = metadata.repositoryName,
210+
let html = readme?.html {
211+
let objectUrl = try await Current.storeS3Readme(owner, repository, html)
212+
if let imagesToCache = readme?.imagesToCache, imagesToCache.isEmpty == false {
213+
try await Current.storeS3ReadmeImages(client, imagesToCache)
214+
}
215+
return .cached(s3ObjectUrl: objectUrl, githubEtag: upstreamEtag)
216+
} else {
217+
return repository.s3Readme
218+
}
219+
}
220+
}
221+
222+
func fetchMetadata(client: Client, package: Joined<Package, Repository>) async throws(Ingestion.Error) -> (Github.Metadata, Github.License?, Github.Readme?) {
156223
// Even though we get through a `Joined<Package, Repository>` as a parameter, it's
157224
// we must not rely on `repository` as it will be nil when a package is first ingested.
158225
// The only way to get `owner` and `repository` here is by parsing them from the URL.
159-
let (owner, repository) = try Github.parseOwnerName(url: package.model.url)
226+
let (owner, repository) = try Result {
227+
try Github.parseOwnerName(url: package.model.url)
228+
}.mapError { _ in
229+
Ingestion.Error.invalidURL(package.model.url)
230+
}.get()
160231

161-
async let metadata = try await Current.fetchMetadata(client, owner, repository)
162232
async let license = await Current.fetchLicense(client, owner, repository)
163233
async let readme = await Current.fetchReadme(client, owner, repository)
164234

165-
return try await (metadata, license, readme)
235+
// First one should be an `async let` as well but it doesn't compile right now. Reported as
236+
// https://github.com/swiftlang/swift/issues/76169
237+
return (try await Result { try await Current.fetchMetadata(client, owner, repository) }
238+
.mapError { Ingestion.Error.fetchMetadataFailed(owner: owner, name: repository, error: $0) }
239+
.get(),
240+
await license,
241+
await readme)
166242
}
167243

168244

@@ -177,13 +253,9 @@ func updateRepository(on database: Database,
177253
metadata: Github.Metadata,
178254
licenseInfo: Github.License?,
179255
readmeInfo: Github.Readme?,
180-
s3Readme: S3Readme?) async throws {
256+
s3Readme: S3Readme?) async throws(Ingestion.Error) {
181257
guard let repoMetadata = metadata.repository else {
182-
if repository.$package.value == nil {
183-
try await repository.$package.load(on: database)
184-
}
185-
throw AppError.genericError(repository.package.id,
186-
"repository metadata is nil for package \(repository.name ?? "unknown")")
258+
throw .noRepositoryMetadata(owner: repository.owner, name: repository.name)
187259
}
188260

189261
repository.defaultBranch = repoMetadata.defaultBranch
@@ -209,7 +281,11 @@ func updateRepository(on database: Database,
209281
repository.stars = repoMetadata.stargazerCount
210282
repository.summary = repoMetadata.description
211283

212-
try await repository.save(on: database)
284+
try await Result {
285+
try await repository.save(on: database)
286+
}.mapError {
287+
Ingestion.Error.repositorySaveFailed(owner: repository.owner, name: repository.name, error: $0)
288+
}.get()
213289
}
214290

215291

Sources/App/Core/Github.swift

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,13 @@ import S3Store
1919

2020
enum Github {
2121

22-
enum Error: LocalizedError {
22+
enum Error: Swift.Error {
23+
case decodeContentFailed(URI, Swift.Error)
2324
case missingToken
2425
case noBody
25-
case invalidURI(Package.Id?, _ url: String)
26+
case invalidURL(String)
27+
case postRequestFailed(URI, Swift.Error)
2628
case requestFailed(HTTPStatus)
27-
28-
var errorDescription: String? {
29-
switch self {
30-
case .missingToken:
31-
return "missing Github API token"
32-
case .noBody:
33-
return "no body"
34-
case let .invalidURI(id, url):
35-
return "invalid URL: \(url) (id: \(id?.uuidString ?? "nil"))"
36-
case .requestFailed(let statusCode):
37-
return "request failed with status code: \(statusCode)"
38-
}
39-
}
4029
}
4130

4231
static var decoder: JSONDecoder {
@@ -60,13 +49,13 @@ enum Github {
6049
return response.status == .forbidden && limit == 0
6150
}
6251

63-
static func parseOwnerName(url: String) throws -> (owner: String, name: String) {
52+
static func parseOwnerName(url: String) throws(Github.Error) -> (owner: String, name: String) {
6453
let parts = url
6554
.droppingGithubComPrefix
6655
.droppingGitExtension
6756
.split(separator: "/")
6857
.map(String.init)
69-
guard parts.count == 2 else { throw Error.invalidURI(nil, url) }
58+
guard parts.count == 2 else { throw Error.invalidURL(url) }
7059
return (owner: parts[0], name: parts[1])
7160
}
7261

@@ -181,13 +170,18 @@ extension Github {
181170
var query: String
182171
}
183172

184-
static func fetchResource<T: Decodable>(_ type: T.Type, client: Client, query: GraphQLQuery) async throws -> T {
173+
static func fetchResource<T: Decodable>(_ type: T.Type, client: Client, query: GraphQLQuery) async throws(Github.Error) -> T {
185174
guard let token = Current.githubToken() else {
186175
throw Error.missingToken
187176
}
188177

189-
let response = try await client.post(Self.graphQLApiUri, headers: defaultHeaders(with: token)) {
190-
try $0.content.encode(query)
178+
let response: ClientResponse
179+
do {
180+
response = try await client.post(Self.graphQLApiUri, headers: defaultHeaders(with: token)) {
181+
try $0.content.encode(query)
182+
}
183+
} catch {
184+
throw .postRequestFailed(Self.graphQLApiUri, error)
191185
}
192186

193187
guard !isRateLimited(response) else {
@@ -200,10 +194,14 @@ extension Github {
200194
throw Error.requestFailed(response.status)
201195
}
202196

203-
return try response.content.decode(T.self, using: decoder)
197+
do {
198+
return try response.content.decode(T.self, using: decoder)
199+
} catch {
200+
throw .decodeContentFailed(Self.graphQLApiUri, error)
201+
}
204202
}
205203

206-
static func fetchMetadata(client: Client, owner: String, repository: String) async throws -> Metadata {
204+
static func fetchMetadata(client: Client, owner: String, repository: String) async throws(Github.Error) -> Metadata {
207205
struct Response<T: Decodable & Equatable>: Decodable, Equatable {
208206
var data: T
209207
}

Tests/AppTests/ErrorReportingTests.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ class ErrorReportingTests: AppTestCase {
3434
func test_Ingestor_error_reporting() async throws {
3535
// setup
3636
try await Package(url: "1", processingStage: .reconciliation).save(on: app.db)
37-
Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURI(nil, "1") }
37+
Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURL("1") }
3838

3939
// MUT
4040
try await ingest(client: app.client, database: app.db, mode: .limit(10))
4141

4242
// validation
4343
logger.logs.withValue {
4444
XCTAssertEqual($0, [.init(level: .warning,
45-
message: #"App.Github.Error.invalidURI(nil, "1")"#)])
45+
message: #"App.Ingestion.Error.invalidURL("1")"#)])
4646
}
4747
}
4848

Tests/AppTests/GithubTests.swift

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ class GithubTests: AppTestCase {
3333
XCTAssertEqual(res.owner, "foo")
3434
XCTAssertEqual(res.name, "bar")
3535
}
36-
XCTAssertThrowsError(
37-
try Github.parseOwnerName(url: "https://github.com/foo/bar/baz")
38-
) { error in
39-
XCTAssertEqual(error.localizedDescription,
40-
"invalid URL: https://github.com/foo/bar/baz (id: nil)")
36+
do {
37+
_ = try Github.parseOwnerName(url: "https://github.com/foo/bar/baz")
38+
XCTFail("Expected error")
39+
} catch let Github.Error.invalidURL(url) {
40+
XCTAssertEqual(url, "https://github.com/foo/bar/baz")
41+
} catch {
42+
XCTFail("Unexpected error: \(error)")
4143
}
4244
}
4345

@@ -201,7 +203,7 @@ class GithubTests: AppTestCase {
201203
_ = try await Github.fetchMetadata(client: client, packageUrl: pkg.url)
202204
XCTFail("expected error to be thrown")
203205
} catch {
204-
guard case Github.Error.invalidURI = error else {
206+
guard case Github.Error.invalidURL = error else {
205207
XCTFail("unexpected error: \(error.localizedDescription)")
206208
return
207209
}
@@ -221,12 +223,15 @@ class GithubTests: AppTestCase {
221223
do {
222224
_ = try await Github.fetchMetadata(client: client, packageUrl: pkg.url)
223225
XCTFail("expected error to be thrown")
224-
} catch {
226+
} catch let Github.Error.decodeContentFailed(uri, error) {
225227
// validation
228+
XCTAssertEqual(uri, "https://api.github.com/graphql")
226229
guard case DecodingError.dataCorrupted = error else {
227230
XCTFail("unexpected error: \(error.localizedDescription)")
228231
return
229232
}
233+
} catch {
234+
XCTFail("Unexpected error: \(error)")
230235
}
231236
}
232237

0 commit comments

Comments
 (0)