From 1576f6c6658b0be9fc29c6b300bd91d8b322f5b5 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 27 Jul 2023 17:39:29 +0200 Subject: [PATCH 01/33] Expand test case to ensure we're advancing to analysis in case of an ingestion error --- Tests/AppTests/IngestorTests.swift | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index e63d8a82b..c5c1850d7 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -70,7 +70,7 @@ class IngestorTests: AppTestCase { } let packages = try await savePackages(on: app.db, ["https://github.com/foo/1", - "https://github.com/foo/2"]) + "https://github.com/foo/2"], processingStage: .reconciliation) .map(Joined.init(model:)) Current.fetchMetadata = { _, owner, repository in if owner == "foo" && repository == "1" { @@ -83,12 +83,17 @@ class IngestorTests: AppTestCase { // MUT await ingest(client: app.client, database: app.db, packages: packages) - // validate the second package's license is updated - let repo = try await Repository.query(on: app.db) - .filter(\.$name == "2") - .first() - .unwrap() - XCTAssertEqual(repo.licenseUrl, "license") + do { + // validate the second package's license is updated + let repo = try await Repository.query(on: app.db) + .filter(\.$name == "2") + .first() + .unwrap() + XCTAssertEqual(repo.licenseUrl, "license") + for pkg in try await Package.query(on: app.db).all() { + XCTAssertEqual(pkg.processingStage, .ingestion, "\(pkg.url) must be in ingestion") + } + } } func test_updateRepository_insert() async throws { From 64c602818520d4b42d7fc1a0d2b8564fab2145c6 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 30 Aug 2024 09:38:39 +0200 Subject: [PATCH 02/33] Temporarily disable new check --- Tests/AppTests/IngestorTests.swift | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index c5c1850d7..6ebfaf7e2 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -90,8 +90,9 @@ class IngestorTests: AppTestCase { .first() .unwrap() XCTAssertEqual(repo.licenseUrl, "license") - for pkg in try await Package.query(on: app.db).all() { - XCTAssertEqual(pkg.processingStage, .ingestion, "\(pkg.url) must be in ingestion") + for _ in try await Package.query(on: app.db).all() { +#warning("Re-enable this check") + // XCTAssertEqual(pkg.processingStage, .ingestion, "\(pkg.url) must be in ingestion") } } } From f3a46092dfde733824fb147c1ad91d0f3bef3993 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 30 Aug 2024 09:44:32 +0200 Subject: [PATCH 03/33] Move existing ingestion logic into ingestOriginal # Conflicts: # Sources/App/Commands/Ingest.swift --- Sources/App/Commands/Ingest.swift | 100 ++++++++++++++++-------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 7bf64744f..69db2a60b 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -98,59 +98,63 @@ func ingest(client: Client, await withTaskGroup(of: Void.self) { group in for pkg in packages { - group.addTask { - let result = await Result { - Current.logger().info("Ingesting \(pkg.package.url)") - let (metadata, license, readme) = try await fetchMetadata(client: client, package: pkg) - let repo = try await Repository.findOrCreate(on: database, for: pkg.model) - - let s3Readme: S3Readme? - do { - if let upstreamEtag = readme?.etag, - repo.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, - let owner = metadata.repositoryOwner, - let repository = metadata.repositoryName, - let html = readme?.html { - let objectUrl = try await Current.storeS3Readme(owner, repository, html) - if let imagesToCache = readme?.imagesToCache, imagesToCache.isEmpty == false { - try await Current.storeS3ReadmeImages(client, imagesToCache) - } - s3Readme = .cached(s3ObjectUrl: objectUrl, githubEtag: upstreamEtag) - } else { - s3Readme = repo.s3Readme - } - } catch { - // We don't want to fail ingestion in case storing the readme fails - warn and continue. - Current.logger().warning("storeS3Readme failed") - s3Readme = .error("\(error)") - } - - let fork = await getFork(on: database, parent: metadata.repository?.parent) - - try await updateRepository(on: database, - for: repo, - metadata: metadata, - licenseInfo: license, - readmeInfo: readme, - s3Readme: s3Readme, - fork: fork) - return pkg - } + group.addTask { + await ingestOriginal(client: client, database: database, package: pkg) + } + } + } +} - switch result { - case .success: - AppMetrics.ingestMetadataSuccessCount?.inc() - case .failure: - AppMetrics.ingestMetadataFailureCount?.inc() - } +func ingestOriginal(client: Client, database: Database, package: Joined) async { + let result = await Result { + Current.logger().info("Ingesting \(package.package.url)") + let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) + let repo = try await Repository.findOrCreate(on: database, for: package.model) - do { - try await updatePackage(client: client, database: database, result: result, stage: .ingestion) - } catch { - Current.logger().report(error: error) + let s3Readme: S3Readme? + do { + if let upstreamEtag = readme?.etag, + repo.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, + let owner = metadata.repositoryOwner, + let repository = metadata.repositoryName, + let html = readme?.html { + let objectUrl = try await Current.storeS3Readme(owner, repository, html) + if let imagesToCache = readme?.imagesToCache, imagesToCache.isEmpty == false { + try await Current.storeS3ReadmeImages(client, imagesToCache) } + s3Readme = .cached(s3ObjectUrl: objectUrl, githubEtag: upstreamEtag) + } else { + s3Readme = repo.s3Readme } + } catch { + // We don't want to fail ingestion in case storing the readme fails - warn and continue. + Current.logger().warning("storeS3Readme failed") + s3Readme = .error("\(error)") } + + let fork = await getFork(on: database, parent: metadata.repository?.parent) + + try await updateRepository(on: database, + for: repo, + metadata: metadata, + licenseInfo: license, + readmeInfo: readme, + s3Readme: s3Readme, + fork: fork) + return package + } + + switch result { + case .success: + AppMetrics.ingestMetadataSuccessCount?.inc() + case .failure: + AppMetrics.ingestMetadataFailureCount?.inc() + } + + do { + try await updatePackage(client: client, database: database, result: result, stage: .ingestion) + } catch { + Current.logger().report(error: error) } } From 81b770f576b52e3d12c8a008bf1308a4997f535b Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 30 Aug 2024 10:27:14 +0200 Subject: [PATCH 04/33] Make s3 readme functions throws(S3ReadmeError) # Conflicts: # Sources/App/Core/Extensions/S3Store+ext.swift --- Sources/App/Core/AppEnvironment.swift | 12 ++-- Sources/App/Core/Extensions/S3Store+ext.swift | 61 +++++++++++-------- Sources/S3Store/S3Store.swift | 2 +- Tests/AppTests/IngestorTests.swift | 5 +- 4 files changed, 47 insertions(+), 33 deletions(-) diff --git a/Sources/App/Core/AppEnvironment.swift b/Sources/App/Core/AppEnvironment.swift index f71d5e11b..fa2ddf3ce 100644 --- a/Sources/App/Core/AppEnvironment.swift +++ b/Sources/App/Core/AppEnvironment.swift @@ -51,9 +51,9 @@ struct AppEnvironment: Sendable { var siteURL: @Sendable () -> String var storeS3Readme: @Sendable (_ owner: String, _ repository: String, - _ readme: String) async throws -> String + _ readme: String) async throws(S3ReadmeError) -> String var storeS3ReadmeImages: @Sendable (_ client: Client, - _ imagesToCache: [Github.Readme.ImageToCache]) async throws -> Void + _ imagesToCache: [Github.Readme.ImageToCache]) async throws(S3ReadmeError) -> Void var timeZone: @Sendable () -> TimeZone var triggerBuild: @Sendable (_ client: Client, _ buildId: Build.Id, @@ -131,8 +131,12 @@ extension AppEnvironment { setLogger: { logger in Self.logger = logger }, shell: .live, siteURL: { Environment.get("SITE_URL") ?? "http://localhost:8080" }, - storeS3Readme: { owner, repo, readme in try await S3Store.storeReadme(owner: owner, repository: repo, readme: readme) }, - storeS3ReadmeImages: { client, images in try await S3Store.storeReadmeImages(client: client, imagesToCache: images) }, + storeS3Readme: { owner, repo, readme throws(S3ReadmeError) in + try await S3Store.storeReadme(owner: owner, repository: repo, readme: readme) + }, + storeS3ReadmeImages: { client, images throws(S3ReadmeError) in + try await S3Store.storeReadmeImages(client: client, imagesToCache: images) + }, timeZone: { .current }, triggerBuild: { client, buildId, cloneURL, isDocBuild, platform, ref, swiftVersion, versionID in try await Gitlab.Builder.triggerBuild(client: client, diff --git a/Sources/App/Core/Extensions/S3Store+ext.swift b/Sources/App/Core/Extensions/S3Store+ext.swift index 83e35b87f..f37ce900a 100644 --- a/Sources/App/Core/Extensions/S3Store+ext.swift +++ b/Sources/App/Core/Extensions/S3Store+ext.swift @@ -16,47 +16,61 @@ import S3Store import Vapor import Dependencies +enum S3ReadmeError: Swift.Error { + case envVariableNotSet(String) + case invalidURL(String) + case missingBody + case requestFailed(key: S3Store.Key, error: Swift.Error) + case storeReadmeFailed + case storeImagesFailed +} extension S3Store { - static func fetchReadme(client: Client, owner: String, repository: String) async throws -> String { + static func fetchReadme(client: Client, owner: String, repository: String) async throws(S3ReadmeError) -> String { let key = try Key.readme(owner: owner, repository: repository) - guard let body = try await client.get(URI(string: key.objectUrl)).body else { - throw Error.genericError("No body") + let response: ClientResponse + do { + response = try await client.get(URI(string: key.objectUrl)) + } catch { + throw .requestFailed(key: key, error: error) } + guard let body = response.body else { throw S3ReadmeError.missingBody } return body.asString() } - static func storeReadme(owner: String, repository: String, readme: String) async throws -> String { + static func storeReadme(owner: String, repository: String, readme: String) async throws(S3ReadmeError) -> String { @Dependency(\.environment) var environment - guard let accessKeyId = environment.awsAccessKeyId(), - let secretAccessKey = environment.awsSecretAccessKey() - else { - throw Error.genericError("missing AWS credentials") - } + guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } + guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) let key = try Key.readme(owner: owner, repository: repository) Current.logger().debug("Copying readme to \(key.s3Uri) ...") - try await store.save(payload: readme, to: key) + do { + try await store.save(payload: readme, to: key) + } catch { + throw .requestFailed(key: key, error: error) + } return key.objectUrl } - static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws { + static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws(S3ReadmeError) { @Dependency(\.environment) var environment - guard let accessKeyId = environment.awsAccessKeyId(), - let secretAccessKey = environment.awsSecretAccessKey() - else { - throw Error.genericError("missing AWS credentials") - } + guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } + guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) for imageToCache in imagesToCache { Current.logger().debug("Copying readme image to \(imageToCache.s3Key.s3Uri) ...") - let response = try await client.get(URI(stringLiteral: imageToCache.originalUrl)) - if var body = response.body, let imageData = body.readData(length: body.readableBytes) { - try await store.save(payload: imageData, to: imageToCache.s3Key) + do { + let response = try await client.get(URI(stringLiteral: imageToCache.originalUrl)) + if var body = response.body, let imageData = body.readData(length: body.readableBytes) { + try await store.save(payload: imageData, to: imageToCache.s3Key) + } + } catch { + throw .requestFailed(key: imageToCache.s3Key, error: error) } } } @@ -65,15 +79,12 @@ extension S3Store { extension S3Store.Key { - static func readme(owner: String, repository: String, imageUrl: String? = nil) throws -> Self { + static func readme(owner: String, repository: String, imageUrl: String? = nil) throws(S3ReadmeError) -> Self { @Dependency(\.environment) var environment - guard let bucket = environment.awsReadmeBucket() else { - throw S3Store.Error.genericError("AWS_README_BUCKET not set") - } + guard let bucket = environment.awsReadmeBucket() else { throw .envVariableNotSet("AWS_README_BUCKET") } if let imageUrl { - guard let url = URL(string: imageUrl) - else { throw S3Store.Error.genericError("Invalid imageUrl \(imageUrl)") } + guard let url = URL(string: imageUrl) else { throw .invalidURL(imageUrl) } let filename = url.lastPathComponent let path = "\(owner)/\(repository)/\(filename)".lowercased() return .init(bucket: bucket, path: path) diff --git a/Sources/S3Store/S3Store.swift b/Sources/S3Store/S3Store.swift index f101e19d7..3d743544c 100644 --- a/Sources/S3Store/S3Store.swift +++ b/Sources/S3Store/S3Store.swift @@ -61,7 +61,7 @@ extension S3Store { } } - public struct Key: Equatable { + public struct Key: Equatable, Sendable { public let bucket: String public let path: String diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 6ebfaf7e2..fd4cf90a2 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -575,10 +575,9 @@ class IngestorTests: AppTestCase { imagesToCache: []) } let storeCalls = QueueIsolated(0) - struct Error: Swift.Error { } - Current.storeS3Readme = { owner, repo, html in + Current.storeS3Readme = { owner, repo, html throws(S3ReadmeError) in storeCalls.increment() - throw Error() + throw .storeReadmeFailed } do { // first ingestion, no readme has been saved From ff183876632e8a899c83e688faf4e8d8d46d5c2a Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sun, 1 Sep 2024 10:52:23 +0200 Subject: [PATCH 05/33] wip # Conflicts: # Sources/App/Commands/Ingest.swift --- Sources/App/Commands/Ingest.swift | 98 +++++++++++++++++++++--- Sources/App/Core/Github.swift | 42 +++++----- Tests/AppTests/ErrorReportingTests.swift | 4 +- Tests/AppTests/GithubTests.swift | 19 +++-- 4 files changed, 121 insertions(+), 42 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 69db2a60b..75f04795f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -16,6 +16,17 @@ import Vapor import Fluent +enum Ingestion { + enum Error: Swift.Error { + case fetchMetadataFailed(owner: String, name: String, error: Swift.Error) + case findOrCreateRepositoryFailed(url: String, error: Swift.Error) + case invalidURL(String) + case noRepositoryMetadata(owner: String?, name: String?) + case repositorySaveFailed(owner: String?, name: String?, error: Swift.Error) + } +} + + struct IngestCommand: AsyncCommand { typealias Signature = SPICommand.Signature @@ -159,17 +170,82 @@ func ingestOriginal(client: Client, database: Database, package: Joined) async throws -> (Github.Metadata, Github.License?, Github.Readme?) { +extension Ingestion { + static func ingestNew(client: Client, database: Database, package: Joined) async { + let result = await Result { () async throws(Ingestion.Error) -> Joined in + Current.logger().info("Ingesting \(package.package.url)") + let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) + let repo = try await Result { + try await Repository.findOrCreate(on: database, for: package.model) + }.mapError { + Ingestion.Error.findOrCreateRepositoryFailed(url: package.package.url, error: $0) + }.get() + + let s3Readme: S3Readme? + do throws(S3ReadmeError) { + s3Readme = try await storeS3Readme(client: client, repository: repo, metadata: metadata, readme: readme) + } catch { + // We don't want to fail ingestion in case storing the readme fails - warn and continue. + Current.logger().warning("storeS3Readme failed: \(error)") + s3Readme = .error("\(error)") + } + + try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme) + return package + } + + switch result { + case .success: + AppMetrics.ingestMetadataSuccessCount?.inc() + case .failure: + AppMetrics.ingestMetadataFailureCount?.inc() + } + + do { + try await updatePackage(client: client, database: database, result: result, stage: .ingestion) + } catch { + Current.logger().report(error: error) + } + } + + + static func storeS3Readme(client: Client, repository: Repository, metadata: Github.Metadata, readme: Github.Readme?) async throws(S3ReadmeError) -> S3Readme? { + if let upstreamEtag = readme?.etag, + repository.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, + let owner = metadata.repositoryOwner, + let repository = metadata.repositoryName, + let html = readme?.html { + let objectUrl = try await Current.storeS3Readme(owner, repository, html) + if let imagesToCache = readme?.imagesToCache, imagesToCache.isEmpty == false { + try await Current.storeS3ReadmeImages(client, imagesToCache) + } + return .cached(s3ObjectUrl: objectUrl, githubEtag: upstreamEtag) + } else { + return repository.s3Readme + } + } +} + +func fetchMetadata(client: Client, package: Joined) async throws(Ingestion.Error) -> (Github.Metadata, Github.License?, Github.Readme?) { // Even though we get through a `Joined` as a parameter, it's // we must not rely on `repository` as it will be nil when a package is first ingested. // The only way to get `owner` and `repository` here is by parsing them from the URL. - let (owner, repository) = try Github.parseOwnerName(url: package.model.url) + let (owner, repository) = try Result { + try Github.parseOwnerName(url: package.model.url) + }.mapError { _ in + Ingestion.Error.invalidURL(package.model.url) + }.get() - async let metadata = try await Current.fetchMetadata(client, owner, repository) async let license = await Current.fetchLicense(client, owner, repository) async let readme = await Current.fetchReadme(client, owner, repository) - return try await (metadata, license, readme) + // First one should be an `async let` as well but it doesn't compile right now. Reported as + // https://github.com/swiftlang/swift/issues/76169 + return (try await Result { try await Current.fetchMetadata(client, owner, repository) } + .mapError { Ingestion.Error.fetchMetadataFailed(owner: owner, name: repository, error: $0) } + .get(), + await license, + await readme) } @@ -185,13 +261,9 @@ func updateRepository(on database: Database, licenseInfo: Github.License?, readmeInfo: Github.Readme?, s3Readme: S3Readme?, - fork: Fork? = nil) async throws { + fork: Fork? = nil) async throws(Ingestion.Error) { guard let repoMetadata = metadata.repository else { - if repository.$package.value == nil { - try await repository.$package.load(on: database) - } - throw AppError.genericError(repository.package.id, - "repository metadata is nil for package \(repository.name ?? "unknown")") + throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) } repository.defaultBranch = repoMetadata.defaultBranch @@ -219,7 +291,11 @@ func updateRepository(on database: Database, repository.summary = repoMetadata.description repository.forkedFrom = fork - try await repository.save(on: database) + try await Result { + try await repository.save(on: database) + }.mapError { + Ingestion.Error.repositorySaveFailed(owner: repository.owner, name: repository.name, error: $0) + }.get() } func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { diff --git a/Sources/App/Core/Github.swift b/Sources/App/Core/Github.swift index 8bccce4f2..d4f0c0cfc 100644 --- a/Sources/App/Core/Github.swift +++ b/Sources/App/Core/Github.swift @@ -19,24 +19,13 @@ import S3Store enum Github { - enum Error: LocalizedError { + enum Error: Swift.Error { + case decodeContentFailed(URI, Swift.Error) case missingToken case noBody - case invalidURI(Package.Id?, _ url: String) + case invalidURL(String) + case postRequestFailed(URI, Swift.Error) case requestFailed(HTTPStatus) - - var errorDescription: String? { - switch self { - case .missingToken: - return "missing Github API token" - case .noBody: - return "no body" - case let .invalidURI(id, url): - return "invalid URL: \(url) (id: \(id?.uuidString ?? "nil"))" - case .requestFailed(let statusCode): - return "request failed with status code: \(statusCode)" - } - } } static var decoder: JSONDecoder { @@ -60,13 +49,13 @@ enum Github { return response.status == .forbidden && limit == 0 } - static func parseOwnerName(url: String) throws -> (owner: String, name: String) { + static func parseOwnerName(url: String) throws(Github.Error) -> (owner: String, name: String) { let parts = url .droppingGithubComPrefix .droppingGitExtension .split(separator: "/") .map(String.init) - guard parts.count == 2 else { throw Error.invalidURI(nil, url) } + guard parts.count == 2 else { throw Error.invalidURL(url) } return (owner: parts[0], name: parts[1]) } @@ -181,13 +170,18 @@ extension Github { var query: String } - static func fetchResource(_ type: T.Type, client: Client, query: GraphQLQuery) async throws -> T { + static func fetchResource(_ type: T.Type, client: Client, query: GraphQLQuery) async throws(Github.Error) -> T { guard let token = Current.githubToken() else { throw Error.missingToken } - let response = try await client.post(Self.graphQLApiUri, headers: defaultHeaders(with: token)) { - try $0.content.encode(query) + let response: ClientResponse + do { + response = try await client.post(Self.graphQLApiUri, headers: defaultHeaders(with: token)) { + try $0.content.encode(query) + } + } catch { + throw .postRequestFailed(Self.graphQLApiUri, error) } guard !isRateLimited(response) else { @@ -200,10 +194,14 @@ extension Github { throw Error.requestFailed(response.status) } - return try response.content.decode(T.self, using: decoder) + do { + return try response.content.decode(T.self, using: decoder) + } catch { + throw .decodeContentFailed(Self.graphQLApiUri, error) + } } - static func fetchMetadata(client: Client, owner: String, repository: String) async throws -> Metadata { + static func fetchMetadata(client: Client, owner: String, repository: String) async throws(Github.Error) -> Metadata { struct Response: Decodable, Equatable { var data: T } diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 64e776b93..26664a0b6 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -35,7 +35,7 @@ class ErrorReportingTests: AppTestCase { func test_Ingestor_error_reporting() async throws { // setup try await Package(url: "1", processingStage: .reconciliation).save(on: app.db) - Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURI(nil, "1") } + Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURL("1") } try await withDependencies { $0.date.now = .now @@ -47,7 +47,7 @@ class ErrorReportingTests: AppTestCase { // validation logger.logs.withValue { XCTAssertEqual($0, [.init(level: .warning, - message: #"App.Github.Error.invalidURI(nil, "1")"#)]) + message: #"App.Ingestion.Error.invalidURL("1")"#)]) } } diff --git a/Tests/AppTests/GithubTests.swift b/Tests/AppTests/GithubTests.swift index 254f34842..87c6c54af 100644 --- a/Tests/AppTests/GithubTests.swift +++ b/Tests/AppTests/GithubTests.swift @@ -35,11 +35,13 @@ class GithubTests: AppTestCase { XCTAssertEqual(res.owner, "foo") XCTAssertEqual(res.name, "bar") } - XCTAssertThrowsError( - try Github.parseOwnerName(url: "https://github.com/foo/bar/baz") - ) { error in - XCTAssertEqual(error.localizedDescription, - "invalid URL: https://github.com/foo/bar/baz (id: nil)") + do { + _ = try Github.parseOwnerName(url: "https://github.com/foo/bar/baz") + XCTFail("Expected error") + } catch let Github.Error.invalidURL(url) { + XCTAssertEqual(url, "https://github.com/foo/bar/baz") + } catch { + XCTFail("Unexpected error: \(error)") } } @@ -203,7 +205,7 @@ class GithubTests: AppTestCase { _ = try await Github.fetchMetadata(client: client, packageUrl: pkg.url) XCTFail("expected error to be thrown") } catch { - guard case Github.Error.invalidURI = error else { + guard case Github.Error.invalidURL = error else { XCTFail("unexpected error: \(error.localizedDescription)") return } @@ -223,12 +225,15 @@ class GithubTests: AppTestCase { do { _ = try await Github.fetchMetadata(client: client, packageUrl: pkg.url) XCTFail("expected error to be thrown") - } catch { + } catch let Github.Error.decodeContentFailed(uri, error) { // validation + XCTAssertEqual(uri, "https://api.github.com/graphql") guard case DecodingError.dataCorrupted = error else { XCTFail("unexpected error: \(error.localizedDescription)") return } + } catch { + XCTFail("Unexpected error: \(error)") } } From a7f9fb196370ce9fc63c47d2618ca662bba19328 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 7 Dec 2024 13:40:08 +0100 Subject: [PATCH 06/33] Add back getFork --- Sources/App/Commands/Ingest.swift | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 75f04795f..51026a27b 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -190,7 +190,9 @@ extension Ingestion { s3Readme = .error("\(error)") } - try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme) + let fork = await getFork(on: database, parent: metadata.repository?.parent) + + try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) return package } From 7f1e47406de8adc5ac90ff6d6659cbdd587a5757 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 7 Dec 2024 13:40:26 +0100 Subject: [PATCH 07/33] =?UTF-8?q?Add=20warning=20about=20S3ReadmeError=20?= =?UTF-8?q?=E2=86=92=20S3Store.Error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/App/Core/Extensions/S3Store+ext.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/App/Core/Extensions/S3Store+ext.swift b/Sources/App/Core/Extensions/S3Store+ext.swift index f37ce900a..fc19e5b31 100644 --- a/Sources/App/Core/Extensions/S3Store+ext.swift +++ b/Sources/App/Core/Extensions/S3Store+ext.swift @@ -16,6 +16,7 @@ import S3Store import Vapor import Dependencies +#warning("Make this S3Store.Error") enum S3ReadmeError: Swift.Error { case envVariableNotSet(String) case invalidURL(String) From 76cfd4de373a7c858d1d22f4282c45d4380a419d Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sun, 8 Dec 2024 12:24:08 +0100 Subject: [PATCH 08/33] Add id, underlyingError to Ingestion.Error --- Sources/App/Commands/Common.swift | 39 +++++++++++++++++++++++++++---- Sources/App/Commands/Ingest.swift | 37 ++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index a8aabea88..ed2bd690f 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -53,10 +53,10 @@ func updatePackages(client: Client, } -func updatePackage(client: Client, - database: Database, - result: Result, Error>, - stage: Package.ProcessingStage) async throws { +func updatePackage(client: Client, + database: Database, + result: Result, E>, + stage: Package.ProcessingStage) async throws { switch result { case .success(let res): let pkg = res.package @@ -125,3 +125,34 @@ func recordError(database: Database, try await setStatus(id: id, status: .noValidVersions) } } + + +func recordError(database: Database, + error: Ingestion.Error, + stage: Package.ProcessingStage) async throws { + let status: Package.Status + switch error.underlyingError { + case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: + status = .ingestionFailed + case .invalidURL: + status = .invalidUrl + } + +#warning("drop stage as a parameter") + try await Package.update(for: error.packageId, on: database, status: status, stage: stage) +} + + +extension Package { +#warning("Move") + static func update(for id: Package.Id, + on database: Database, + status: Status, + stage: ProcessingStage) async throws { + try await Package.query(on: database) + .filter(\.$id == id) + .set(\.$processingStage, to: stage) + .set(\.$status, to: status) + .update() + } +} diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 51026a27b..b9920a4cf 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -17,12 +17,16 @@ import Fluent enum Ingestion { - enum Error: Swift.Error { - case fetchMetadataFailed(owner: String, name: String, error: Swift.Error) - case findOrCreateRepositoryFailed(url: String, error: Swift.Error) - case invalidURL(String) - case noRepositoryMetadata(owner: String?, name: String?) - case repositorySaveFailed(owner: String?, name: String?, error: Swift.Error) + struct Error: Swift.Error { + var packageId: Package.Id + var underlyingError: UnderlyingError + enum UnderlyingError: Swift.Error { + case fetchMetadataFailed(owner: String, name: String, details: Swift.Error) + case findOrCreateRepositoryFailed(url: String, details: Swift.Error) + case invalidURL(String) + case noRepositoryMetadata(owner: String?, name: String?) + case repositorySaveFailed(owner: String?, name: String?, details: Swift.Error) + } } } @@ -178,7 +182,10 @@ extension Ingestion { let repo = try await Result { try await Repository.findOrCreate(on: database, for: package.model) }.mapError { - Ingestion.Error.findOrCreateRepositoryFailed(url: package.package.url, error: $0) + Ingestion.Error( + packageId: package.model.id!, + underlyingError: .findOrCreateRepositoryFailed(url: package.package.url, details: $0) + ) }.get() let s3Readme: S3Readme? @@ -235,7 +242,8 @@ func fetchMetadata(client: Client, package: Joined) async t let (owner, repository) = try Result { try Github.parseOwnerName(url: package.model.url) }.mapError { _ in - Ingestion.Error.invalidURL(package.model.url) + Ingestion.Error(packageId: package.model.id!, + underlyingError: .invalidURL(package.model.url)) }.get() async let license = await Current.fetchLicense(client, owner, repository) @@ -244,7 +252,10 @@ func fetchMetadata(client: Client, package: Joined) async t // First one should be an `async let` as well but it doesn't compile right now. Reported as // https://github.com/swiftlang/swift/issues/76169 return (try await Result { try await Current.fetchMetadata(client, owner, repository) } - .mapError { Ingestion.Error.fetchMetadataFailed(owner: owner, name: repository, error: $0) } + .mapError { + Ingestion.Error(packageId: package.model.id!, + underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: $0)) + } .get(), await license, await readme) @@ -265,7 +276,8 @@ func updateRepository(on database: Database, s3Readme: S3Readme?, fork: Fork? = nil) async throws(Ingestion.Error) { guard let repoMetadata = metadata.repository else { - throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) + throw .init(packageId: repository.package.id!, + underlyingError: .noRepositoryMetadata(owner: repository.owner, name: repository.name)) } repository.defaultBranch = repoMetadata.defaultBranch @@ -296,7 +308,10 @@ func updateRepository(on database: Database, try await Result { try await repository.save(on: database) }.mapError { - Ingestion.Error.repositorySaveFailed(owner: repository.owner, name: repository.name, error: $0) + Ingestion.Error( + packageId: repository.package.id!, + underlyingError: .repositorySaveFailed(owner: repository.owner, name: repository.name, details: $0) + ) }.get() } From c39639f7fd77cad19511515be336c4a233c60892 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Mon, 9 Dec 2024 11:00:38 +0100 Subject: [PATCH 09/33] Avoid repository.package.id! --- Sources/App/Commands/Ingest.swift | 42 +++++++++++++++----- Sources/App/Core/Extensions/Result+ext.swift | 3 ++ Tests/AppTests/ErrorReportingTests.swift | 4 +- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index b9920a4cf..d1a86155b 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -17,15 +17,35 @@ import Fluent enum Ingestion { - struct Error: Swift.Error { + struct Error: Swift.Error, CustomStringConvertible { var packageId: Package.Id var underlyingError: UnderlyingError - enum UnderlyingError: Swift.Error { + + var description: String { + "Ingestion.Error(\(packageId), \(underlyingError)" + } + + enum UnderlyingError: Swift.Error, CustomStringConvertible { case fetchMetadataFailed(owner: String, name: String, details: Swift.Error) case findOrCreateRepositoryFailed(url: String, details: Swift.Error) case invalidURL(String) case noRepositoryMetadata(owner: String?, name: String?) case repositorySaveFailed(owner: String?, name: String?, details: Swift.Error) + + var description: String { + switch self { + case let .fetchMetadataFailed(_, _, details): + "fetchMetadataFailed(\(details))" + case .findOrCreateRepositoryFailed: + "findOrCreateRepositoryFailed" + case let .invalidURL(url): + "invalidURL(\(url))" + case .noRepositoryMetadata: + "noRepositoryMetadata" + case let .repositorySaveFailed(_, _, details): + "repositorySaveFailed(\(details)" + } + } } } } @@ -114,7 +134,7 @@ func ingest(client: Client, await withTaskGroup(of: Void.self) { group in for pkg in packages { group.addTask { - await ingestOriginal(client: client, database: database, package: pkg) + await Ingestion.ingestNew(client: client, database: database, package: pkg) } } } @@ -199,7 +219,11 @@ extension Ingestion { let fork = await getFork(on: database, parent: metadata.repository?.parent) - try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) + try await Result { () async throws(Ingestion.Error.UnderlyingError) in + try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) + }.mapError { + Error.init(packageId: package.model.id!, underlyingError: $0) + }.get() return package } @@ -274,10 +298,9 @@ func updateRepository(on database: Database, licenseInfo: Github.License?, readmeInfo: Github.Readme?, s3Readme: S3Readme?, - fork: Fork? = nil) async throws(Ingestion.Error) { + fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) { guard let repoMetadata = metadata.repository else { - throw .init(packageId: repository.package.id!, - underlyingError: .noRepositoryMetadata(owner: repository.owner, name: repository.name)) + throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) } repository.defaultBranch = repoMetadata.defaultBranch @@ -308,10 +331,7 @@ func updateRepository(on database: Database, try await Result { try await repository.save(on: database) }.mapError { - Ingestion.Error( - packageId: repository.package.id!, - underlyingError: .repositorySaveFailed(owner: repository.owner, name: repository.name, details: $0) - ) + Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, name: repository.name, details: $0) }.get() } diff --git a/Sources/App/Core/Extensions/Result+ext.swift b/Sources/App/Core/Extensions/Result+ext.swift index e7690c5c9..938cd0b2b 100644 --- a/Sources/App/Core/Extensions/Result+ext.swift +++ b/Sources/App/Core/Extensions/Result+ext.swift @@ -25,3 +25,6 @@ extension Result where Failure == Error { var isError: Bool { return !isSucess } } + + +#warning("Add an extension `Result.mapError`") diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 26664a0b6..479965325 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -34,7 +34,7 @@ class ErrorReportingTests: AppTestCase { func test_Ingestor_error_reporting() async throws { // setup - try await Package(url: "1", processingStage: .reconciliation).save(on: app.db) + try await Package(id: .id0, url: "1", processingStage: .reconciliation).save(on: app.db) Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURL("1") } try await withDependencies { @@ -47,7 +47,7 @@ class ErrorReportingTests: AppTestCase { // validation logger.logs.withValue { XCTAssertEqual($0, [.init(level: .warning, - message: #"App.Ingestion.Error.invalidURL("1")"#)]) + message: #"Ingestion.Error(\#(UUID.id0), invalidURL(1)"#)]) } } From bbdfbbeb0a96085b7afa454dd2ea6bd1a01e2f9e Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Mon, 9 Dec 2024 12:16:38 +0100 Subject: [PATCH 10/33] Update recordError to handle Ingestion.Error --- Sources/App/Commands/Common.swift | 62 +++++++++++++++---------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index ed2bd690f..8981ea571 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -97,39 +97,41 @@ func updatePackage(client: Client, func recordError(database: Database, error: Error, stage: Package.ProcessingStage) async throws { - func setStatus(id: Package.Id?, status: Package.Status) async throws { - guard let id = id else { return } - try await Package.query(on: database) - .filter(\.$id == id) - .set(\.$processingStage, to: stage) - .set(\.$status, to: status) - .update() - } + if let error = error as? Ingestion.Error { + try await recordIngestionError(database: database, error: error) + } else { + func setStatus(id: Package.Id?, status: Package.Status) async throws { + guard let id = id else { return } + try await Package.query(on: database) + .filter(\.$id == id) + .set(\.$processingStage, to: stage) + .set(\.$status, to: status) + .update() + } - guard let error = error as? AppError else { return } - - switch error { - case let .analysisError(id, _): - try await setStatus(id: id, status: .analysisFailed) - case .envVariableNotSet, .shellCommandFailed: - break - case let .genericError(id, _): - try await setStatus(id: id, status: .ingestionFailed) - case let .invalidPackageCachePath(id, _): - try await setStatus(id: id, status: .invalidCachePath) - case let .cacheDirectoryDoesNotExist(id, _): - try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) - case let .invalidRevision(id, _): - try await setStatus(id: id, status: .analysisFailed) - case let .noValidVersions(id, _): - try await setStatus(id: id, status: .noValidVersions) + guard let error = error as? AppError else { return } + + switch error { + case let .analysisError(id, _): + try await setStatus(id: id, status: .analysisFailed) + case .envVariableNotSet, .shellCommandFailed: + break + case let .genericError(id, _): + try await setStatus(id: id, status: .ingestionFailed) + case let .invalidPackageCachePath(id, _): + try await setStatus(id: id, status: .invalidCachePath) + case let .cacheDirectoryDoesNotExist(id, _): + try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) + case let .invalidRevision(id, _): + try await setStatus(id: id, status: .analysisFailed) + case let .noValidVersions(id, _): + try await setStatus(id: id, status: .noValidVersions) + } } } -func recordError(database: Database, - error: Ingestion.Error, - stage: Package.ProcessingStage) async throws { +func recordIngestionError(database: Database, error: Ingestion.Error) async throws { let status: Package.Status switch error.underlyingError { case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: @@ -137,9 +139,7 @@ func recordError(database: Database, case .invalidURL: status = .invalidUrl } - -#warning("drop stage as a parameter") - try await Package.update(for: error.packageId, on: database, status: status, stage: stage) + try await Package.update(for: error.packageId, on: database, status: status, stage: .ingestion) } From 0e4f349227382897c5168a45b3526a09d20a1e70 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Mon, 9 Dec 2024 18:06:20 +0100 Subject: [PATCH 11/33] Ingestion specific updatePackage - all tests pass --- Sources/App/Commands/Common.swift | 65 +++++++++++++++++++++--- Sources/App/Commands/Ingest.swift | 36 +++++++++---- Tests/AppTests/ErrorReportingTests.swift | 2 +- Tests/AppTests/IngestorTests.swift | 2 +- 4 files changed, 85 insertions(+), 20 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index 8981ea571..e3746b614 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -53,10 +53,10 @@ func updatePackages(client: Client, } -func updatePackage(client: Client, - database: Database, - result: Result, E>, - stage: Package.ProcessingStage) async throws { +func updatePackage(client: Client, + database: Database, + result: Result, Error>, + stage: Package.ProcessingStage) async throws { switch result { case .success(let res): let pkg = res.package @@ -94,6 +94,51 @@ func updatePackage(client: Client, } +func updatePackage(client: Client, + database: Database, + result: Result, Ingestion.Error>, + stage: Package.ProcessingStage) async throws { + switch result { + case .success(let res): + try await updatePackage(database: database, package: res.package, stage: stage) + case .failure(let failure): + switch failure.underlyingError { + case .fetchMetadataFailed: + Current.logger().warning("\(failure)") + + case .findOrCreateRepositoryFailed: + Current.logger().critical("\(failure)") + + case .invalidURL, .noRepositoryMetadata: + Current.logger().warning("\(failure)") + + case .repositorySaveFailed, .repositorySaveUniqueViolation: + Current.logger().critical("\(failure)") + } + + try await recordIngestionError(database: database, error: failure) + } +} + + +func updatePackage(database: Database, + package: Package, + stage: Package.ProcessingStage) async throws { + if stage == .ingestion && package.status == .new { + // newly ingested package: leave status == .new for fast-track + // analysis + } else { + package.status = .ok + } + package.processingStage = stage + do { + try await package.update(on: database) + } catch { + Current.logger().report(error: error) + } +} + + func recordError(database: Database, error: Error, stage: Package.ProcessingStage) async throws { @@ -132,14 +177,18 @@ func recordError(database: Database, func recordIngestionError(database: Database, error: Ingestion.Error) async throws { - let status: Package.Status switch error.underlyingError { case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: - status = .ingestionFailed + try await Package + .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) case .invalidURL: - status = .invalidUrl + try await Package + .update(for: error.packageId, on: database, status: .invalidUrl, stage: .ingestion) + case .repositorySaveUniqueViolation: + // Speficically do _not_ update package at all - this is what test_ingest_unique_owner_name_violation expects +#warning("check what are the consequences if we do? Does this break ingestion somehow?") + break } - try await Package.update(for: error.packageId, on: database, status: status, stage: .ingestion) } diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d1a86155b..7d2d2a0e7 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -import Vapor import Fluent +import PostgresKit +import Vapor enum Ingestion { @@ -22,15 +23,16 @@ enum Ingestion { var underlyingError: UnderlyingError var description: String { - "Ingestion.Error(\(packageId), \(underlyingError)" + "Ingestion.Error(\(packageId), \(underlyingError))" } enum UnderlyingError: Swift.Error, CustomStringConvertible { - case fetchMetadataFailed(owner: String, name: String, details: Swift.Error) + case fetchMetadataFailed(owner: String, name: String, details: String) case findOrCreateRepositoryFailed(url: String, details: Swift.Error) case invalidURL(String) case noRepositoryMetadata(owner: String?, name: String?) - case repositorySaveFailed(owner: String?, name: String?, details: Swift.Error) + case repositorySaveFailed(owner: String?, name: String?, details: String) + case repositorySaveUniqueViolation(owner: String?, name: String?, details: String) var description: String { switch self { @@ -43,7 +45,9 @@ enum Ingestion { case .noRepositoryMetadata: "noRepositoryMetadata" case let .repositorySaveFailed(_, _, details): - "repositorySaveFailed(\(details)" + "repositorySaveFailed(\(String(reflecting: details)))" + case let .repositorySaveUniqueViolation(_, _, details): + "repositorySaveUniqueViolation(\(details))" } } } @@ -278,7 +282,7 @@ func fetchMetadata(client: Client, package: Joined) async t return (try await Result { try await Current.fetchMetadata(client, owner, repository) } .mapError { Ingestion.Error(packageId: package.model.id!, - underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: $0)) + underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: "\($0)")) } .get(), await license, @@ -328,11 +332,23 @@ func updateRepository(on database: Database, repository.summary = repoMetadata.description repository.forkedFrom = fork - try await Result { + do { try await repository.save(on: database) - }.mapError { - Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, name: repository.name, details: $0) - }.get() + } catch let error as PSQLError where error.isUniqueViolation { + let details = error.serverInfo?[.message] ?? "" + throw Ingestion.Error.UnderlyingError.repositorySaveUniqueViolation(owner: repository.owner, + name: repository.name, + details: details) + } catch let error as PSQLError { + let details = error.serverInfo?[.message] ?? "" + throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: details) + } catch { + throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: "\(error)") + } } func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 479965325..64ae26c93 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -47,7 +47,7 @@ class ErrorReportingTests: AppTestCase { // validation logger.logs.withValue { XCTAssertEqual($0, [.init(level: .warning, - message: #"Ingestion.Error(\#(UUID.id0), invalidURL(1)"#)]) + message: #"Ingestion.Error(\#(UUID.id0), invalidURL(1))"#)]) } } diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index fd4cf90a2..17be4c025 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -420,7 +420,7 @@ class IngestorTests: AppTestCase { XCTAssertEqual(logs.count, 1) let log = try XCTUnwrap(logs.first) XCTAssertEqual(log.level, .critical) - XCTAssertEqual(log.message, #"duplicate key value violates unique constraint "idx_repositories_owner_name""#) + XCTAssertEqual(log.message, #"Ingestion.Error(\#(try reconciled.requireID()), repositorySaveUniqueViolation(duplicate key value violates unique constraint "idx_repositories_owner_name"))"#) } } From c64cfa2f54973f77f8f8b79f3827a235b560ba4f Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Wed, 11 Dec 2024 09:52:12 +0100 Subject: [PATCH 12/33] Update test_ingest_unique_owner_name_violation to reflect new error handling --- Sources/App/Commands/Common.swift | 5 +- Tests/AppTests/IngestorTests.swift | 80 ++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 24 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index e3746b614..1f120a29d 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -185,9 +185,8 @@ func recordIngestionError(database: Database, error: Ingestion.Error) async thro try await Package .update(for: error.packageId, on: database, status: .invalidUrl, stage: .ingestion) case .repositorySaveUniqueViolation: - // Speficically do _not_ update package at all - this is what test_ingest_unique_owner_name_violation expects -#warning("check what are the consequences if we do? Does this break ingestion somehow?") - break + try await Package + .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) } } diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 17be4c025..27540c484 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -363,9 +363,10 @@ class IngestorTests: AppTestCase { // - don't update package // - don't create repository records // setup - for url in ["https://github.com/foo/1", "https://github.com/foo/2"].asURLs { - try await Package(url: url, processingStage: .reconciliation).save(on: app.db) - } + try await Package(id: .id0, url: "https://github.com/foo/0", status: .ok, processingStage: .reconciliation) + .save(on: app.db) + try await Package(id: .id1, url: "https://github.com/foo/1", status: .ok, processingStage: .reconciliation) + .save(on: app.db) // Return identical metadata for both packages, same as a for instance a redirected // package would after a rename / ownership change Current.fetchMetadata = { _, _, _ in @@ -385,7 +386,6 @@ class IngestorTests: AppTestCase { stars: 0, summary: "desc") } - let lastUpdate = Date() try await withDependencies { $0.date.now = .now @@ -396,31 +396,48 @@ class IngestorTests: AppTestCase { // validate repositories (single element pointing to the ingested package) let repos = try await Repository.query(on: app.db).all() - let ingested = try await Package.query(on: app.db) - .filter(\.$processingStage == .ingestion) + XCTAssertEqual(repos.count, 1) + + // validate packages - one should have succeeded, one should have failed + let succeeded = try await Package.query(on: app.db) + .filter(\.$status == .ok) .first() .unwrap() - XCTAssertEqual(repos.map(\.$package.id), [try ingested.requireID()]) - - // validate packages - let reconciled = try await Package.query(on: app.db) - .filter(\.$processingStage == .reconciliation) + let failed = try await Package.query(on: app.db) + .filter(\.$status == .ingestionFailed) .first() .unwrap() - // the ingested package has the update ... - XCTAssertEqual(ingested.status, .new) - XCTAssertEqual(ingested.processingStage, .ingestion) - XCTAssert(ingested.updatedAt! > lastUpdate) - // ... while the reconciled package remains unchanged ... - XCTAssertEqual(reconciled.status, .new) - XCTAssertEqual(reconciled.processingStage, .reconciliation) - XCTAssert(reconciled.updatedAt! < lastUpdate) - // ... and an error has been logged + XCTAssertEqual(succeeded.processingStage, .ingestion) + XCTAssertEqual(failed.processingStage, .ingestion) + // an error must have been logged try logger.logs.withValue { logs in XCTAssertEqual(logs.count, 1) let log = try XCTUnwrap(logs.first) XCTAssertEqual(log.level, .critical) - XCTAssertEqual(log.message, #"Ingestion.Error(\#(try reconciled.requireID()), repositorySaveUniqueViolation(duplicate key value violates unique constraint "idx_repositories_owner_name"))"#) + XCTAssertEqual(log.message, #"Ingestion.Error(\#(try failed.requireID()), repositorySaveUniqueViolation(duplicate key value violates unique constraint "idx_repositories_owner_name"))"#) + } + + // ensure analysis can process these packages + try await withDependencies { + $0.date.now = .now + $0.environment.allowSocialPosts = { false } + } operation: { + Current.fileManager.fileExists = { @Sendable _ in true } + Current.git.commitCount = { @Sendable _ in 1 } + Current.git.firstCommitDate = { @Sendable _ in .t0 } + Current.git.lastCommitDate = { @Sendable _ in .t0 } + Current.git.getTags = { @Sendable _ in [] } + Current.git.hasBranch = { @Sendable _, _ in true } + Current.git.revisionInfo = { @Sendable _, _ in .init(commit: "sha0", date: .t0) } + Current.git.shortlog = { @Sendable _ in "" } + Current.shell.run = { @Sendable cmd, _ in + if cmd.description.hasSuffix("package dump-package") { + return .packageDump(name: "foo") + } + return "" + } + + try await Analyze.analyze(client: app.client, database: app.db, mode: .limit(10)) } } @@ -665,3 +682,24 @@ class IngestorTests: AppTestCase { XCTAssertEqual(fork5, nil) } } + + +private extension String { + static func packageDump(name: String) -> Self { + #""" + { + "name": "\#(name)", + "products": [ + { + "name": "p1", + "targets": [], + "type": { + "executable": null + } + } + ], + "targets": [] + } + """# + } +} From 6fba70ea6bb51c9d9f038681194695a9943787e6 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Wed, 11 Dec 2024 11:30:47 +0100 Subject: [PATCH 13/33] Disentangle updatePackage/recordError into Common, Analyze, Ingestion parts --- Sources/App/Commands/Analyze.swift | 9 +- Sources/App/Commands/Common.swift | 300 ++++++++++++----------- Sources/App/Commands/Ingest.swift | 53 ---- Tests/AppTests/AnalyzerTests.swift | 8 +- Tests/AppTests/ErrorReportingTests.swift | 6 +- Tests/AppTests/IngestorTests.swift | 33 +-- 6 files changed, 189 insertions(+), 220 deletions(-) diff --git a/Sources/App/Commands/Analyze.swift b/Sources/App/Commands/Analyze.swift index 4e3453968..6f4b130eb 100644 --- a/Sources/App/Commands/Analyze.swift +++ b/Sources/App/Commands/Analyze.swift @@ -152,10 +152,11 @@ extension Analyze { } } - try await updatePackages(client: client, - database: database, - results: packageResults, - stage: .analysis) +#warning("drop prefix") + try await Analyze.updatePackages(client: client, + database: database, + results: packageResults, + stage: .analysis) try await RecentPackage.refresh(on: database) try await RecentRelease.refresh(on: database) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index 1f120a29d..c7372a4c0 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -17,176 +17,192 @@ import PostgresKit import Vapor -/// Update packages (in the `[Result, Error>]` array). -/// -/// - Parameters: -/// - client: `Client` object -/// - database: `Database` object -/// - results: `Joined` results to update -/// - stage: Processing stage -func updatePackages(client: Client, - database: Database, - results: [Result, Error>], - stage: Package.ProcessingStage) async throws { - do { - let total = results.count - let errors = results.filter(\.isError).count - let errorRate = total > 0 ? 100.0 * Double(errors) / Double(total) : 0.0 - switch errorRate { - case 0: - Current.logger().info("Updating \(total) packages for stage '\(stage)'") - case 0..<20: - Current.logger().info("Updating \(total) packages for stage '\(stage)' (errors: \(errors))") - default: - Current.logger().critical("updatePackages: unusually high error rate: \(errors)/\(total) = \(errorRate)%") - } - } - for result in results { +enum Common { } + + +#warning("move") +extension Analyze { + /// Update packages (in the `[Result, Error>]` array). + /// + /// - Parameters: + /// - client: `Client` object + /// - database: `Database` object + /// - results: `Joined` results to update + /// - stage: Processing stage +#warning("drop stage parameter") + static func updatePackages(client: Client, + database: Database, + results: [Result, Error>], + stage: Package.ProcessingStage) async throws { do { - try await updatePackage(client: client, database: database, result: result, stage: stage) - } catch { - Current.logger().critical("updatePackage failed: \(error)") - } - } - - Current.logger().debug("updateStatus ops: \(results.count)") -} - - -func updatePackage(client: Client, - database: Database, - result: Result, Error>, - stage: Package.ProcessingStage) async throws { - switch result { - case .success(let res): - let pkg = res.package - if stage == .ingestion && pkg.status == .new { - // newly ingested package: leave status == .new for fast-track - // analysis - } else { - pkg.status = .ok + let total = results.count + let errors = results.filter(\.isError).count + let errorRate = total > 0 ? 100.0 * Double(errors) / Double(total) : 0.0 + switch errorRate { + case 0: + Current.logger().info("Updating \(total) packages for stage '\(stage)'") + case 0..<20: + Current.logger().info("Updating \(total) packages for stage '\(stage)' (errors: \(errors))") + default: + Current.logger().critical("updatePackages: unusually high error rate: \(errors)/\(total) = \(errorRate)%") } - pkg.processingStage = stage + } + for result in results { do { - try await pkg.update(on: database) + try await updatePackage(client: client, database: database, result: result, stage: stage) } catch { - Current.logger().report(error: error) + Current.logger().critical("updatePackage failed: \(error)") } + } - // PSQLError also conforms to DatabaseError but we want to intercept it specifically, - // because it allows us to log more concise error messages via serverInfo[.message] - case let .failure(error) where error is PSQLError: - // Escalate database errors to critical - let error = error as! PSQLError - let msg = error.serverInfo?[.message] ?? String(reflecting: error) - Current.logger().critical("\(msg)") - try await recordError(database: database, error: error, stage: stage) - - case let .failure(error) where error is DatabaseError: - // Escalate database errors to critical - Current.logger().critical("\(String(reflecting: error))") - try await recordError(database: database, error: error, stage: stage) - - case let .failure(error): - Current.logger().report(error: error) - try await recordError(database: database, error: error, stage: stage) + Current.logger().debug("updateStatus ops: \(results.count)") + } + +#warning("drop stage parameter") + static func updatePackage(client: Client, + database: Database, + result: Result, Error>, + stage: Package.ProcessingStage) async throws { + switch result { + case .success(let res): + let pkg = res.package + if stage == .ingestion && pkg.status == .new { + // newly ingested package: leave status == .new for fast-track + // analysis + } else { + pkg.status = .ok + } + pkg.processingStage = stage + do { + try await pkg.update(on: database) + } catch { + Current.logger().report(error: error) + } + + // PSQLError also conforms to DatabaseError but we want to intercept it specifically, + // because it allows us to log more concise error messages via serverInfo[.message] + case let .failure(error) where error is PSQLError: + // Escalate database errors to critical + let error = error as! PSQLError + let msg = error.serverInfo?[.message] ?? String(reflecting: error) + Current.logger().critical("\(msg)") + try await recordError(database: database, error: error, stage: stage) + + case let .failure(error) where error is DatabaseError: + // Escalate database errors to critical + Current.logger().critical("\(String(reflecting: error))") + try await recordError(database: database, error: error, stage: stage) + + case let .failure(error): + Current.logger().report(error: error) + try await recordError(database: database, error: error, stage: stage) + } } } -func updatePackage(client: Client, - database: Database, - result: Result, Ingestion.Error>, - stage: Package.ProcessingStage) async throws { - switch result { - case .success(let res): - try await updatePackage(database: database, package: res.package, stage: stage) - case .failure(let failure): - switch failure.underlyingError { - case .fetchMetadataFailed: - Current.logger().warning("\(failure)") +extension Ingestion { + static func updatePackage(client: Client, + database: Database, + result: Result, Ingestion.Error>, + stage: Package.ProcessingStage) async throws { + switch result { + case .success(let res): + try await Common.updatePackage(database: database, package: res.package, stage: stage) + case .failure(let failure): + switch failure.underlyingError { + case .fetchMetadataFailed: + Current.logger().warning("\(failure)") - case .findOrCreateRepositoryFailed: - Current.logger().critical("\(failure)") + case .findOrCreateRepositoryFailed: + Current.logger().critical("\(failure)") - case .invalidURL, .noRepositoryMetadata: - Current.logger().warning("\(failure)") + case .invalidURL, .noRepositoryMetadata: + Current.logger().warning("\(failure)") - case .repositorySaveFailed, .repositorySaveUniqueViolation: - Current.logger().critical("\(failure)") - } + case .repositorySaveFailed, .repositorySaveUniqueViolation: + Current.logger().critical("\(failure)") + } - try await recordIngestionError(database: database, error: failure) + try await Ingestion.recordError(database: database, error: failure) + } } } -func updatePackage(database: Database, - package: Package, - stage: Package.ProcessingStage) async throws { - if stage == .ingestion && package.status == .new { - // newly ingested package: leave status == .new for fast-track - // analysis - } else { - package.status = .ok - } - package.processingStage = stage - do { - try await package.update(on: database) - } catch { - Current.logger().report(error: error) +extension Common { + @available(*, deprecated) + static func updatePackage(database: Database, + package: Package, + stage: Package.ProcessingStage) async throws { + if stage == .ingestion && package.status == .new { + // newly ingested package: leave status == .new for fast-track + // analysis + } else { + package.status = .ok + } + package.processingStage = stage + do { + try await package.update(on: database) + } catch { + Current.logger().report(error: error) + } } } -func recordError(database: Database, - error: Error, - stage: Package.ProcessingStage) async throws { - if let error = error as? Ingestion.Error { - try await recordIngestionError(database: database, error: error) - } else { - func setStatus(id: Package.Id?, status: Package.Status) async throws { - guard let id = id else { return } - try await Package.query(on: database) - .filter(\.$id == id) - .set(\.$processingStage, to: stage) - .set(\.$status, to: status) - .update() - } +extension Analyze { + static func recordError(database: Database, + error: Error, + stage: Package.ProcessingStage) async throws { + if let error = error as? Ingestion.Error { + try await Ingestion.recordError(database: database, error: error) + } else { + func setStatus(id: Package.Id?, status: Package.Status) async throws { + guard let id = id else { return } + try await Package.query(on: database) + .filter(\.$id == id) + .set(\.$processingStage, to: stage) + .set(\.$status, to: status) + .update() + } - guard let error = error as? AppError else { return } - - switch error { - case let .analysisError(id, _): - try await setStatus(id: id, status: .analysisFailed) - case .envVariableNotSet, .shellCommandFailed: - break - case let .genericError(id, _): - try await setStatus(id: id, status: .ingestionFailed) - case let .invalidPackageCachePath(id, _): - try await setStatus(id: id, status: .invalidCachePath) - case let .cacheDirectoryDoesNotExist(id, _): - try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) - case let .invalidRevision(id, _): - try await setStatus(id: id, status: .analysisFailed) - case let .noValidVersions(id, _): - try await setStatus(id: id, status: .noValidVersions) + guard let error = error as? AppError else { return } + + switch error { + case let .analysisError(id, _): + try await setStatus(id: id, status: .analysisFailed) + case .envVariableNotSet, .shellCommandFailed: + break + case let .genericError(id, _): + try await setStatus(id: id, status: .ingestionFailed) + case let .invalidPackageCachePath(id, _): + try await setStatus(id: id, status: .invalidCachePath) + case let .cacheDirectoryDoesNotExist(id, _): + try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) + case let .invalidRevision(id, _): + try await setStatus(id: id, status: .analysisFailed) + case let .noValidVersions(id, _): + try await setStatus(id: id, status: .noValidVersions) + } } } } -func recordIngestionError(database: Database, error: Ingestion.Error) async throws { - switch error.underlyingError { - case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: - try await Package - .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) - case .invalidURL: - try await Package - .update(for: error.packageId, on: database, status: .invalidUrl, stage: .ingestion) - case .repositorySaveUniqueViolation: - try await Package - .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) +extension Ingestion { + static func recordError(database: Database, error: Ingestion.Error) async throws { + switch error.underlyingError { + case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: + try await Package + .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) + case .invalidURL: + try await Package + .update(for: error.packageId, on: database, status: .invalidUrl, stage: .ingestion) + case .repositorySaveUniqueViolation: + try await Package + .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) + } } } diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 7d2d2a0e7..f1ff87868 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -144,59 +144,6 @@ func ingest(client: Client, } } -func ingestOriginal(client: Client, database: Database, package: Joined) async { - let result = await Result { - Current.logger().info("Ingesting \(package.package.url)") - let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) - let repo = try await Repository.findOrCreate(on: database, for: package.model) - - let s3Readme: S3Readme? - do { - if let upstreamEtag = readme?.etag, - repo.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, - let owner = metadata.repositoryOwner, - let repository = metadata.repositoryName, - let html = readme?.html { - let objectUrl = try await Current.storeS3Readme(owner, repository, html) - if let imagesToCache = readme?.imagesToCache, imagesToCache.isEmpty == false { - try await Current.storeS3ReadmeImages(client, imagesToCache) - } - s3Readme = .cached(s3ObjectUrl: objectUrl, githubEtag: upstreamEtag) - } else { - s3Readme = repo.s3Readme - } - } catch { - // We don't want to fail ingestion in case storing the readme fails - warn and continue. - Current.logger().warning("storeS3Readme failed") - s3Readme = .error("\(error)") - } - - let fork = await getFork(on: database, parent: metadata.repository?.parent) - - try await updateRepository(on: database, - for: repo, - metadata: metadata, - licenseInfo: license, - readmeInfo: readme, - s3Readme: s3Readme, - fork: fork) - return package - } - - switch result { - case .success: - AppMetrics.ingestMetadataSuccessCount?.inc() - case .failure: - AppMetrics.ingestMetadataFailureCount?.inc() - } - - do { - try await updatePackage(client: client, database: database, result: result, stage: .ingestion) - } catch { - Current.logger().report(error: error) - } -} - extension Ingestion { static func ingestNew(client: Client, database: Database, package: Joined) async { diff --git a/Tests/AppTests/AnalyzerTests.swift b/Tests/AppTests/AnalyzerTests.swift index 6d2ced970..428e88763 100644 --- a/Tests/AppTests/AnalyzerTests.swift +++ b/Tests/AppTests/AnalyzerTests.swift @@ -869,10 +869,10 @@ class AnalyzerTests: AppTestCase { ] // MUT - try await updatePackages(client: app.client, - database: app.db, - results: results, - stage: .analysis) + try await Analyze.updatePackages(client: app.client, + database: app.db, + results: results, + stage: .analysis) // validate do { diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 64ae26c93..7c177dd3e 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -22,9 +22,9 @@ class ErrorReportingTests: AppTestCase { func test_recordError() async throws { let pkg = try await savePackage(on: app.db, "1") - try await recordError(database: app.db, - error: AppError.cacheDirectoryDoesNotExist(pkg.id, "path"), - stage: .ingestion) + try await Analyze.recordError(database: app.db, + error: AppError.cacheDirectoryDoesNotExist(pkg.id, "path"), + stage: .ingestion) do { let pkg = try await XCTUnwrapAsync(try await Package.find(pkg.id, on: app.db)) XCTAssertEqual(pkg.status, .cacheDirectoryDoesNotExist) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 27540c484..0b2012881 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -255,16 +255,19 @@ class IngestorTests: AppTestCase { let pkgs = try await savePackages(on: app.db, ["https://github.com/foo/1", "https://github.com/foo/2"]) .map(Joined.init(model:)) - let results: [Result, Error>] = [ - .failure(AppError.genericError(try pkgs[0].model.requireID(), "error 1")), + let pkgId0 = try pkgs[0].model.requireID() + let results: [Result, Ingestion.Error>] = [ + .failure(.init(packageId: pkgId0, underlyingError: .fetchMetadataFailed(owner: "", name: "", details: ""))), .success(pkgs[1]) ] // MUT - try await updatePackages(client: app.client, - database: app.db, - results: results, - stage: .ingestion) + for result in results { + try await Ingestion.updatePackage(client: app.client, + database: app.db, + result: result, + stage: .ingestion) + } // validate do { @@ -274,7 +277,7 @@ class IngestorTests: AppTestCase { } } - func test_updatePackages_new() async throws { + func test_updatePackage_new() async throws { // Ensure newly ingested packages are passed on with status = new to fast-track // them into analysis let pkgs = [ @@ -282,14 +285,16 @@ class IngestorTests: AppTestCase { Package(id: UUID(), url: "https://github.com/foo/2", status: .new, processingStage: .reconciliation) ] try await pkgs.save(on: app.db) - let results: [Result, Error>] = [ .success(.init(model: pkgs[0])), - .success(.init(model: pkgs[1]))] + let results: [Result, Ingestion.Error>] = [ .success(.init(model: pkgs[0])), + .success(.init(model: pkgs[1]))] // MUT - try await updatePackages(client: app.client, - database: app.db, - results: results, - stage: .ingestion) + for result in results { + try await Ingestion.updatePackage(client: app.client, + database: app.db, + result: result, + stage: .ingestion) + } // validate do { @@ -358,7 +363,7 @@ class IngestorTests: AppTestCase { } } - func test_ingest_unique_owner_name_violation() async throws { + func _test_ingest_unique_owner_name_violation() async throws { // Test error behaviour when two packages resolving to the same owner/name are ingested: // - don't update package // - don't create repository records From eb4abf7a0f3df0444b7bcb088ff5ea26c3254e00 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Wed, 11 Dec 2024 11:59:26 +0100 Subject: [PATCH 14/33] Remove Common.updatePackage, clean up --- Sources/App/Commands/Analyze.swift | 6 +- Sources/App/Commands/Common.swift | 126 ++++++++--------------- Tests/AppTests/AnalyzerTests.swift | 5 +- Tests/AppTests/ErrorReportingTests.swift | 7 +- 4 files changed, 49 insertions(+), 95 deletions(-) diff --git a/Sources/App/Commands/Analyze.swift b/Sources/App/Commands/Analyze.swift index 6f4b130eb..28dcb7f25 100644 --- a/Sources/App/Commands/Analyze.swift +++ b/Sources/App/Commands/Analyze.swift @@ -152,11 +152,7 @@ extension Analyze { } } -#warning("drop prefix") - try await Analyze.updatePackages(client: client, - database: database, - results: packageResults, - stage: .analysis) + try await updatePackages(client: client, database: database, results: packageResults) try await RecentPackage.refresh(on: database) try await RecentRelease.refresh(on: database) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index c7372a4c0..df867f5d4 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -17,9 +17,6 @@ import PostgresKit import Vapor -enum Common { } - - #warning("move") extension Analyze { /// Update packages (in the `[Result, Error>]` array). @@ -29,27 +26,25 @@ extension Analyze { /// - database: `Database` object /// - results: `Joined` results to update /// - stage: Processing stage -#warning("drop stage parameter") static func updatePackages(client: Client, database: Database, - results: [Result, Error>], - stage: Package.ProcessingStage) async throws { + results: [Result, Error>]) async throws { do { let total = results.count let errors = results.filter(\.isError).count let errorRate = total > 0 ? 100.0 * Double(errors) / Double(total) : 0.0 switch errorRate { case 0: - Current.logger().info("Updating \(total) packages for stage '\(stage)'") + Current.logger().info("Updating \(total) packages for stage 'analysis'") case 0..<20: - Current.logger().info("Updating \(total) packages for stage '\(stage)' (errors: \(errors))") + Current.logger().info("Updating \(total) packages for stage 'analysis' (errors: \(errors))") default: Current.logger().critical("updatePackages: unusually high error rate: \(errors)/\(total) = \(errorRate)%") } } for result in results { do { - try await updatePackage(client: client, database: database, result: result, stage: stage) + try await updatePackage(client: client, database: database, result: result) } catch { Current.logger().critical("updatePackage failed: \(error)") } @@ -58,26 +53,12 @@ extension Analyze { Current.logger().debug("updateStatus ops: \(results.count)") } -#warning("drop stage parameter") static func updatePackage(client: Client, database: Database, - result: Result, Error>, - stage: Package.ProcessingStage) async throws { + result: Result, Error>) async throws { switch result { case .success(let res): - let pkg = res.package - if stage == .ingestion && pkg.status == .new { - // newly ingested package: leave status == .new for fast-track - // analysis - } else { - pkg.status = .ok - } - pkg.processingStage = stage - do { - try await pkg.update(on: database) - } catch { - Current.logger().report(error: error) - } + try await res.package.update(on: database, status: .ok, stage: .analysis) // PSQLError also conforms to DatabaseError but we want to intercept it specifically, // because it allows us to log more concise error messages via serverInfo[.message] @@ -86,16 +67,16 @@ extension Analyze { let error = error as! PSQLError let msg = error.serverInfo?[.message] ?? String(reflecting: error) Current.logger().critical("\(msg)") - try await recordError(database: database, error: error, stage: stage) + try await recordError(database: database, error: error) case let .failure(error) where error is DatabaseError: // Escalate database errors to critical Current.logger().critical("\(String(reflecting: error))") - try await recordError(database: database, error: error, stage: stage) + try await recordError(database: database, error: error) case let .failure(error): Current.logger().report(error: error) - try await recordError(database: database, error: error, stage: stage) + try await recordError(database: database, error: error) } } } @@ -108,7 +89,9 @@ extension Ingestion { stage: Package.ProcessingStage) async throws { switch result { case .success(let res): - try await Common.updatePackage(database: database, package: res.package, stage: stage) + // for newly ingested package leave status == .new in order to fast-track analysis + let updatedStatus: Package.Status = res.package.status == .new ? .new : .ok + try await res.package.update(on: database, status: updatedStatus, stage: stage) case .failure(let failure): switch failure.underlyingError { case .fetchMetadataFailed: @@ -130,61 +113,34 @@ extension Ingestion { } -extension Common { - @available(*, deprecated) - static func updatePackage(database: Database, - package: Package, - stage: Package.ProcessingStage) async throws { - if stage == .ingestion && package.status == .new { - // newly ingested package: leave status == .new for fast-track - // analysis - } else { - package.status = .ok - } - package.processingStage = stage - do { - try await package.update(on: database) - } catch { - Current.logger().report(error: error) - } - } -} - - extension Analyze { - static func recordError(database: Database, - error: Error, - stage: Package.ProcessingStage) async throws { - if let error = error as? Ingestion.Error { - try await Ingestion.recordError(database: database, error: error) - } else { - func setStatus(id: Package.Id?, status: Package.Status) async throws { - guard let id = id else { return } - try await Package.query(on: database) - .filter(\.$id == id) - .set(\.$processingStage, to: stage) - .set(\.$status, to: status) - .update() - } + static func recordError(database: Database, error: Error) async throws { + func setStatus(id: Package.Id?, status: Package.Status) async throws { + guard let id = id else { return } + try await Package.query(on: database) + .filter(\.$id == id) + .set(\.$processingStage, to: .analysis) + .set(\.$status, to: status) + .update() + } - guard let error = error as? AppError else { return } - - switch error { - case let .analysisError(id, _): - try await setStatus(id: id, status: .analysisFailed) - case .envVariableNotSet, .shellCommandFailed: - break - case let .genericError(id, _): - try await setStatus(id: id, status: .ingestionFailed) - case let .invalidPackageCachePath(id, _): - try await setStatus(id: id, status: .invalidCachePath) - case let .cacheDirectoryDoesNotExist(id, _): - try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) - case let .invalidRevision(id, _): - try await setStatus(id: id, status: .analysisFailed) - case let .noValidVersions(id, _): - try await setStatus(id: id, status: .noValidVersions) - } + guard let error = error as? AppError else { return } + + switch error { + case let .analysisError(id, _): + try await setStatus(id: id, status: .analysisFailed) + case .envVariableNotSet, .shellCommandFailed: + break + case let .genericError(id, _): + try await setStatus(id: id, status: .ingestionFailed) + case let .invalidPackageCachePath(id, _): + try await setStatus(id: id, status: .invalidCachePath) + case let .cacheDirectoryDoesNotExist(id, _): + try await setStatus(id: id, status: .cacheDirectoryDoesNotExist) + case let .invalidRevision(id, _): + try await setStatus(id: id, status: .analysisFailed) + case let .noValidVersions(id, _): + try await setStatus(id: id, status: .noValidVersions) } } } @@ -219,4 +175,10 @@ extension Package { .set(\.$status, to: status) .update() } + + func update(on database: Database, status: Status, stage: ProcessingStage) async throws { + self.status = status + self.processingStage = stage + try await update(on: database) + } } diff --git a/Tests/AppTests/AnalyzerTests.swift b/Tests/AppTests/AnalyzerTests.swift index 428e88763..f72888e57 100644 --- a/Tests/AppTests/AnalyzerTests.swift +++ b/Tests/AppTests/AnalyzerTests.swift @@ -869,10 +869,7 @@ class AnalyzerTests: AppTestCase { ] // MUT - try await Analyze.updatePackages(client: app.client, - database: app.db, - results: results, - stage: .analysis) + try await Analyze.updatePackages(client: app.client, database: app.db, results: results) // validate do { diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 7c177dd3e..40153a5ec 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -20,15 +20,14 @@ import XCTVapor class ErrorReportingTests: AppTestCase { - func test_recordError() async throws { + func test_Analyze_recordError() async throws { let pkg = try await savePackage(on: app.db, "1") try await Analyze.recordError(database: app.db, - error: AppError.cacheDirectoryDoesNotExist(pkg.id, "path"), - stage: .ingestion) + error: AppError.cacheDirectoryDoesNotExist(pkg.id, "path")) do { let pkg = try await XCTUnwrapAsync(try await Package.find(pkg.id, on: app.db)) XCTAssertEqual(pkg.status, .cacheDirectoryDoesNotExist) - XCTAssertEqual(pkg.processingStage, .ingestion) + XCTAssertEqual(pkg.processingStage, .analysis) } } From 9d6551daaca80122219f9992833a69edfd1629bd Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 14:09:55 +0100 Subject: [PATCH 15/33] Move things around --- Sources/App/Commands/Common.swift | 65 ++++++++++++++----------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index df867f5d4..188314498 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -17,7 +17,7 @@ import PostgresKit import Vapor -#warning("move") +#warning("Move") extension Analyze { /// Update packages (in the `[Result, Error>]` array). /// @@ -79,41 +79,7 @@ extension Analyze { try await recordError(database: database, error: error) } } -} - - -extension Ingestion { - static func updatePackage(client: Client, - database: Database, - result: Result, Ingestion.Error>, - stage: Package.ProcessingStage) async throws { - switch result { - case .success(let res): - // for newly ingested package leave status == .new in order to fast-track analysis - let updatedStatus: Package.Status = res.package.status == .new ? .new : .ok - try await res.package.update(on: database, status: updatedStatus, stage: stage) - case .failure(let failure): - switch failure.underlyingError { - case .fetchMetadataFailed: - Current.logger().warning("\(failure)") - - case .findOrCreateRepositoryFailed: - Current.logger().critical("\(failure)") - - case .invalidURL, .noRepositoryMetadata: - Current.logger().warning("\(failure)") - - case .repositorySaveFailed, .repositorySaveUniqueViolation: - Current.logger().critical("\(failure)") - } - - try await Ingestion.recordError(database: database, error: failure) - } - } -} - -extension Analyze { static func recordError(database: Database, error: Error) async throws { func setStatus(id: Package.Id?, status: Package.Status) async throws { guard let id = id else { return } @@ -146,7 +112,36 @@ extension Analyze { } +#warning("Move") extension Ingestion { + static func updatePackage(client: Client, + database: Database, + result: Result, Ingestion.Error>, + stage: Package.ProcessingStage) async throws { + switch result { + case .success(let res): + // for newly ingested package leave status == .new in order to fast-track analysis + let updatedStatus: Package.Status = res.package.status == .new ? .new : .ok + try await res.package.update(on: database, status: updatedStatus, stage: stage) + case .failure(let failure): + switch failure.underlyingError { + case .fetchMetadataFailed: + Current.logger().warning("\(failure)") + + case .findOrCreateRepositoryFailed: + Current.logger().critical("\(failure)") + + case .invalidURL, .noRepositoryMetadata: + Current.logger().warning("\(failure)") + + case .repositorySaveFailed, .repositorySaveUniqueViolation: + Current.logger().critical("\(failure)") + } + + try await Ingestion.recordError(database: database, error: failure) + } + } + static func recordError(database: Database, error: Ingestion.Error) async throws { switch error.underlyingError { case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: From ba4c7c55280cda55702a40fe3faab71199259a5e Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 14:28:26 +0100 Subject: [PATCH 16/33] Cleanup --- Sources/App/Commands/Common.swift | 42 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index 188314498..8b67084ca 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -124,42 +124,40 @@ extension Ingestion { let updatedStatus: Package.Status = res.package.status == .new ? .new : .ok try await res.package.update(on: database, status: updatedStatus, stage: stage) case .failure(let failure): - switch failure.underlyingError { - case .fetchMetadataFailed: - Current.logger().warning("\(failure)") - - case .findOrCreateRepositoryFailed: - Current.logger().critical("\(failure)") - - case .invalidURL, .noRepositoryMetadata: - Current.logger().warning("\(failure)") + Current.logger().log(level: failure.level, "\(failure)") + try await Package.update(for: failure.packageId, on: database, status: failure.status, stage: stage) + } + } +} - case .repositorySaveFailed, .repositorySaveUniqueViolation: - Current.logger().critical("\(failure)") - } - try await Ingestion.recordError(database: database, error: failure) +#warning("Turn these into error protocol requirements") +// Doing so should allow us to turn these extensions on Ingestions and Analysis back into Common functions that just used the typed errors to do the specific things. +extension Ingestion.Error { + var level: Logger.Level { + switch underlyingError { + case .fetchMetadataFailed, .invalidURL, .noRepositoryMetadata: + return .warning + case .findOrCreateRepositoryFailed, .repositorySaveFailed, .repositorySaveUniqueViolation: + return .critical } } - static func recordError(database: Database, error: Ingestion.Error) async throws { - switch error.underlyingError { + var status: Package.Status { + switch underlyingError { case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: - try await Package - .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) + return .ingestionFailed case .invalidURL: - try await Package - .update(for: error.packageId, on: database, status: .invalidUrl, stage: .ingestion) + return .invalidUrl case .repositorySaveUniqueViolation: - try await Package - .update(for: error.packageId, on: database, status: .ingestionFailed, stage: .ingestion) + return .ingestionFailed } } } -extension Package { #warning("Move") +extension Package { static func update(for id: Package.Id, on database: Database, status: Status, From 0f1c2bb1a027d1547b30d5e21f08496263b3df1a Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 14:38:44 +0100 Subject: [PATCH 17/33] Add protocol ProcessingError --- Sources/App/Commands/Common.swift | 53 ++++--------------------------- Sources/App/Commands/Ingest.swift | 22 ++++++++++++- Sources/App/Models/Package.swift | 15 +++++++++ 3 files changed, 43 insertions(+), 47 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index 8b67084ca..c2ba6ea50 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -17,6 +17,13 @@ import PostgresKit import Vapor +// TODO: Adopt ProcessingError also in Analysis and then factor out generic parts back into Common +protocol ProcessingError: Swift.Error, CustomStringConvertible { + var level: Logger.Level { get } + var status: Package.Status { get } +} + + #warning("Move") extension Analyze { /// Update packages (in the `[Result, Error>]` array). @@ -129,49 +136,3 @@ extension Ingestion { } } } - - -#warning("Turn these into error protocol requirements") -// Doing so should allow us to turn these extensions on Ingestions and Analysis back into Common functions that just used the typed errors to do the specific things. -extension Ingestion.Error { - var level: Logger.Level { - switch underlyingError { - case .fetchMetadataFailed, .invalidURL, .noRepositoryMetadata: - return .warning - case .findOrCreateRepositoryFailed, .repositorySaveFailed, .repositorySaveUniqueViolation: - return .critical - } - } - - var status: Package.Status { - switch underlyingError { - case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: - return .ingestionFailed - case .invalidURL: - return .invalidUrl - case .repositorySaveUniqueViolation: - return .ingestionFailed - } - } -} - - -#warning("Move") -extension Package { - static func update(for id: Package.Id, - on database: Database, - status: Status, - stage: ProcessingStage) async throws { - try await Package.query(on: database) - .filter(\.$id == id) - .set(\.$processingStage, to: stage) - .set(\.$status, to: status) - .update() - } - - func update(on database: Database, status: Status, stage: ProcessingStage) async throws { - self.status = status - self.processingStage = stage - try await update(on: database) - } -} diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index f1ff87868..04550a3e1 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -18,7 +18,7 @@ import Vapor enum Ingestion { - struct Error: Swift.Error, CustomStringConvertible { + struct Error: ProcessingError { var packageId: Package.Id var underlyingError: UnderlyingError @@ -51,6 +51,26 @@ enum Ingestion { } } } + + var level: Logger.Level { + switch underlyingError { + case .fetchMetadataFailed, .invalidURL, .noRepositoryMetadata: + return .warning + case .findOrCreateRepositoryFailed, .repositorySaveFailed, .repositorySaveUniqueViolation: + return .critical + } + } + + var status: Package.Status { + switch underlyingError { + case .fetchMetadataFailed, .findOrCreateRepositoryFailed, .noRepositoryMetadata, .repositorySaveFailed: + return .ingestionFailed + case .invalidURL: + return .invalidUrl + case .repositorySaveUniqueViolation: + return .ingestionFailed + } + } } } diff --git a/Sources/App/Models/Package.swift b/Sources/App/Models/Package.swift index 76f87c22e..ff6a914a8 100644 --- a/Sources/App/Models/Package.swift +++ b/Sources/App/Models/Package.swift @@ -267,6 +267,21 @@ extension Package { """# ).run() } + + static func update(for id: Package.Id, on database: Database, + status: Status, stage: ProcessingStage) async throws { + try await Package.query(on: database) + .filter(\.$id == id) + .set(\.$processingStage, to: stage) + .set(\.$status, to: status) + .update() + } + + func update(on database: Database, status: Status, stage: ProcessingStage) async throws { + self.status = status + self.processingStage = stage + try await update(on: database) + } } From 43a7ca9da0d4470f04ec6bf8a42b33fa4e6fbebd Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 15:29:57 +0100 Subject: [PATCH 18/33] Address warnings in Common --- Sources/App/Commands/Common.swift | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Sources/App/Commands/Common.swift b/Sources/App/Commands/Common.swift index c2ba6ea50..41a2c81ea 100644 --- a/Sources/App/Commands/Common.swift +++ b/Sources/App/Commands/Common.swift @@ -18,13 +18,16 @@ import Vapor // TODO: Adopt ProcessingError also in Analysis and then factor out generic parts back into Common -protocol ProcessingError: Swift.Error, CustomStringConvertible { +protocol ProcessingError: Error, CustomStringConvertible { + associatedtype UnderlyingError: Error & CustomStringConvertible + var packageId: Package.Id { get } + var underlyingError: UnderlyingError { get } var level: Logger.Level { get } var status: Package.Status { get } } -#warning("Move") +// TODO: Leaving this extension here for now in order to group the updating/error reporting in one place for both Ingestion and Analysis. Eventually these should either go to their respective files or move common parts into a Common namespace. extension Analyze { /// Update packages (in the `[Result, Error>]` array). /// @@ -119,7 +122,7 @@ extension Analyze { } -#warning("Move") +// TODO: Leaving this extension here for now in order to group the updating/error reporting in one place for both Ingestion and Analysis. Eventually these should either go to their respective files or move common parts into a Common namespace. extension Ingestion { static func updatePackage(client: Client, database: Database, From 08490806b1a9bce00df96a61808e903f7f5acf78 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 15:31:57 +0100 Subject: [PATCH 19/33] Extended test_ingest_continue_on_error now passing --- Tests/AppTests/IngestorTests.swift | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 0b2012881..6c941d636 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -90,9 +90,8 @@ class IngestorTests: AppTestCase { .first() .unwrap() XCTAssertEqual(repo.licenseUrl, "license") - for _ in try await Package.query(on: app.db).all() { -#warning("Re-enable this check") - // XCTAssertEqual(pkg.processingStage, .ingestion, "\(pkg.url) must be in ingestion") + for pkg in try await Package.query(on: app.db).all() { + XCTAssertEqual(pkg.processingStage, .ingestion, "\(pkg.url) must be in ingestion") } } } From 644a87e82de27b2dfc2d52f13f4c95432c4915e8 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 16:38:45 +0100 Subject: [PATCH 20/33] =?UTF-8?q?S3ReadmeError=20=E2=86=92=20S3Readme.Erro?= =?UTF-8?q?r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/App/Commands/Ingest.swift | 4 +-- Sources/App/Core/AppEnvironment.swift | 14 ++++---- Sources/App/Core/Extensions/S3Store+ext.swift | 33 +++++++++---------- Tests/AppTests/IngestorTests.swift | 2 +- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 04550a3e1..1b19b634f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -180,7 +180,7 @@ extension Ingestion { }.get() let s3Readme: S3Readme? - do throws(S3ReadmeError) { + do throws(S3Readme.Error) { s3Readme = try await storeS3Readme(client: client, repository: repo, metadata: metadata, readme: readme) } catch { // We don't want to fail ingestion in case storing the readme fails - warn and continue. @@ -213,7 +213,7 @@ extension Ingestion { } - static func storeS3Readme(client: Client, repository: Repository, metadata: Github.Metadata, readme: Github.Readme?) async throws(S3ReadmeError) -> S3Readme? { + static func storeS3Readme(client: Client, repository: Repository, metadata: Github.Metadata, readme: Github.Readme?) async throws(S3Readme.Error) -> S3Readme? { if let upstreamEtag = readme?.etag, repository.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, let owner = metadata.repositoryOwner, diff --git a/Sources/App/Core/AppEnvironment.swift b/Sources/App/Core/AppEnvironment.swift index fa2ddf3ce..edb1e155b 100644 --- a/Sources/App/Core/AppEnvironment.swift +++ b/Sources/App/Core/AppEnvironment.swift @@ -51,9 +51,9 @@ struct AppEnvironment: Sendable { var siteURL: @Sendable () -> String var storeS3Readme: @Sendable (_ owner: String, _ repository: String, - _ readme: String) async throws(S3ReadmeError) -> String + _ readme: String) async throws(S3Readme.Error) -> String var storeS3ReadmeImages: @Sendable (_ client: Client, - _ imagesToCache: [Github.Readme.ImageToCache]) async throws(S3ReadmeError) -> Void + _ imagesToCache: [Github.Readme.ImageToCache]) async throws(S3Readme.Error) -> Void var timeZone: @Sendable () -> TimeZone var triggerBuild: @Sendable (_ client: Client, _ buildId: Build.Id, @@ -88,7 +88,7 @@ extension AppEnvironment { fetchLicense: { client, owner, repo in await Github.fetchLicense(client:client, owner: owner, repository: repo) }, fetchMetadata: { client, owner, repo in try await Github.fetchMetadata(client:client, owner: owner, repository: repo) }, fetchReadme: { client, owner, repo in await Github.fetchReadme(client:client, owner: owner, repository: repo) }, - fetchS3Readme: { client, owner, repo in try await S3Store.fetchReadme(client:client, owner: owner, repository: repo) }, + fetchS3Readme: { client, owner, repo in try await S3Readme.fetchReadme(client:client, owner: owner, repository: repo) }, fileManager: .live, getStatusCount: { client, status in try await Gitlab.Builder.getStatusCount(client: client, @@ -131,11 +131,11 @@ extension AppEnvironment { setLogger: { logger in Self.logger = logger }, shell: .live, siteURL: { Environment.get("SITE_URL") ?? "http://localhost:8080" }, - storeS3Readme: { owner, repo, readme throws(S3ReadmeError) in - try await S3Store.storeReadme(owner: owner, repository: repo, readme: readme) + storeS3Readme: { owner, repo, readme throws(S3Readme.Error) in + try await S3Readme.storeReadme(owner: owner, repository: repo, readme: readme) }, - storeS3ReadmeImages: { client, images throws(S3ReadmeError) in - try await S3Store.storeReadmeImages(client: client, imagesToCache: images) + storeS3ReadmeImages: { client, images throws(S3Readme.Error) in + try await S3Readme.storeReadmeImages(client: client, imagesToCache: images) }, timeZone: { .current }, triggerBuild: { client, buildId, cloneURL, isDocBuild, platform, ref, swiftVersion, versionID in diff --git a/Sources/App/Core/Extensions/S3Store+ext.swift b/Sources/App/Core/Extensions/S3Store+ext.swift index fc19e5b31..125d45ce8 100644 --- a/Sources/App/Core/Extensions/S3Store+ext.swift +++ b/Sources/App/Core/Extensions/S3Store+ext.swift @@ -16,36 +16,35 @@ import S3Store import Vapor import Dependencies -#warning("Make this S3Store.Error") -enum S3ReadmeError: Swift.Error { - case envVariableNotSet(String) - case invalidURL(String) - case missingBody - case requestFailed(key: S3Store.Key, error: Swift.Error) - case storeReadmeFailed - case storeImagesFailed -} -extension S3Store { +extension S3Readme { + enum Error: Swift.Error { + case envVariableNotSet(String) + case invalidURL(String) + case missingBody + case requestFailed(key: S3Store.Key, error: Swift.Error) + case storeReadmeFailed + case storeImagesFailed + } - static func fetchReadme(client: Client, owner: String, repository: String) async throws(S3ReadmeError) -> String { - let key = try Key.readme(owner: owner, repository: repository) + static func fetchReadme(client: Client, owner: String, repository: String) async throws(S3Readme.Error) -> String { + let key = try S3Store.Key.readme(owner: owner, repository: repository) let response: ClientResponse do { response = try await client.get(URI(string: key.objectUrl)) } catch { throw .requestFailed(key: key, error: error) } - guard let body = response.body else { throw S3ReadmeError.missingBody } + guard let body = response.body else { throw .missingBody } return body.asString() } - static func storeReadme(owner: String, repository: String, readme: String) async throws(S3ReadmeError) -> String { + static func storeReadme(owner: String, repository: String, readme: String) async throws(S3Readme.Error) -> String { @Dependency(\.environment) var environment guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) - let key = try Key.readme(owner: owner, repository: repository) + let key = try S3Store.Key.readme(owner: owner, repository: repository) Current.logger().debug("Copying readme to \(key.s3Uri) ...") do { @@ -57,7 +56,7 @@ extension S3Store { return key.objectUrl } - static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws(S3ReadmeError) { + static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws(S3Readme.Error) { @Dependency(\.environment) var environment guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} @@ -80,7 +79,7 @@ extension S3Store { extension S3Store.Key { - static func readme(owner: String, repository: String, imageUrl: String? = nil) throws(S3ReadmeError) -> Self { + static func readme(owner: String, repository: String, imageUrl: String? = nil) throws(S3Readme.Error) -> Self { @Dependency(\.environment) var environment guard let bucket = environment.awsReadmeBucket() else { throw .envVariableNotSet("AWS_README_BUCKET") } diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 6c941d636..9dc69a9e2 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -596,7 +596,7 @@ class IngestorTests: AppTestCase { imagesToCache: []) } let storeCalls = QueueIsolated(0) - Current.storeS3Readme = { owner, repo, html throws(S3ReadmeError) in + Current.storeS3Readme = { owner, repo, html throws(S3Readme.Error) in storeCalls.increment() throw .storeReadmeFailed } From ff81141f9eb52da123c9f1391ec275340cba1081 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Thu, 12 Dec 2024 16:45:57 +0100 Subject: [PATCH 21/33] Add run { } throwing: { } --- Sources/App/Commands/Ingest.swift | 44 ++++++++++++-------- Sources/App/Core/Extensions/Result+ext.swift | 13 +++++- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 1b19b634f..80ce9ec13 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -170,14 +170,7 @@ extension Ingestion { let result = await Result { () async throws(Ingestion.Error) -> Joined in Current.logger().info("Ingesting \(package.package.url)") let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) - let repo = try await Result { - try await Repository.findOrCreate(on: database, for: package.model) - }.mapError { - Ingestion.Error( - packageId: package.model.id!, - underlyingError: .findOrCreateRepositoryFailed(url: package.package.url, details: $0) - ) - }.get() + let repo = try await findOrCreateRepository(on: database, for: package) let s3Readme: S3Readme? do throws(S3Readme.Error) { @@ -190,11 +183,11 @@ extension Ingestion { let fork = await getFork(on: database, parent: metadata.repository?.parent) - try await Result { () async throws(Ingestion.Error.UnderlyingError) in + try await run { () async throws(Ingestion.Error.UnderlyingError) in try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) - }.mapError { - Error.init(packageId: package.model.id!, underlyingError: $0) - }.get() + } throwing: { + Ingestion.Error(packageId: package.model.id!, underlyingError: $0) + } return package } @@ -213,6 +206,18 @@ extension Ingestion { } + static func findOrCreateRepository(on database: Database, for package: Joined) async throws(Ingestion.Error) -> Repository { + try await run { + try await Repository.findOrCreate(on: database, for: package.model) + } throwing: { + Ingestion.Error( + packageId: package.model.id!, + underlyingError: .findOrCreateRepositoryFailed(url: package.model.url, details: $0) + ) + } + } + + static func storeS3Readme(client: Client, repository: Repository, metadata: Github.Metadata, readme: Github.Readme?) async throws(S3Readme.Error) -> S3Readme? { if let upstreamEtag = readme?.etag, repository.s3Readme?.needsUpdate(upstreamEtag: upstreamEtag) ?? true, @@ -234,12 +239,11 @@ func fetchMetadata(client: Client, package: Joined) async t // Even though we get through a `Joined` as a parameter, it's // we must not rely on `repository` as it will be nil when a package is first ingested. // The only way to get `owner` and `repository` here is by parsing them from the URL. - let (owner, repository) = try Result { + let (owner, repository) = try await run { try Github.parseOwnerName(url: package.model.url) - }.mapError { _ in - Ingestion.Error(packageId: package.model.id!, - underlyingError: .invalidURL(package.model.url)) - }.get() + } throwing: { _ in + Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) + } async let license = await Current.fetchLicense(client, owner, repository) async let readme = await Current.fetchReadme(client, owner, repository) @@ -352,3 +356,9 @@ private extension Github.Metadata.Parent { return normalizedURL } } + +private extension Ingestion.Error { + static func invalidURL(packageId: Package.Id, url: String) -> Self { + Ingestion.Error(packageId: packageId, underlyingError: .invalidURL(url)) + } +} diff --git a/Sources/App/Core/Extensions/Result+ext.swift b/Sources/App/Core/Extensions/Result+ext.swift index 938cd0b2b..07af18f83 100644 --- a/Sources/App/Core/Extensions/Result+ext.swift +++ b/Sources/App/Core/Extensions/Result+ext.swift @@ -27,4 +27,15 @@ extension Result where Failure == Error { } -#warning("Add an extension `Result.mapError`") +// Not really a part of the Result type but closely enough related to put here +// Perhaps put this in AsyncDefer.swift and rename the file? +@discardableResult +func run(_ operation: () async throws(E1) -> T, + throwing transform: (E1) -> E2) async throws(E2) -> T { + do { + let result = try await operation() + return result + } catch { + throw transform(error) + } +} From 481d641af92806fb34ae4309c5b17417ff037691 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 10:50:55 +0100 Subject: [PATCH 22/33] =?UTF-8?q?Ingestion.ingestNew=20=E2=86=92=20Ingesti?= =?UTF-8?q?on.ingest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/App/Commands/Ingest.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 80ce9ec13..12ea4e060 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -158,7 +158,7 @@ func ingest(client: Client, await withTaskGroup(of: Void.self) { group in for pkg in packages { group.addTask { - await Ingestion.ingestNew(client: client, database: database, package: pkg) + await Ingestion.ingest(client: client, database: database, package: pkg) } } } @@ -166,7 +166,7 @@ func ingest(client: Client, extension Ingestion { - static func ingestNew(client: Client, database: Database, package: Joined) async { + static func ingest(client: Client, database: Database, package: Joined) async { let result = await Result { () async throws(Ingestion.Error) -> Joined in Current.logger().info("Ingesting \(package.package.url)") let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) From 1edf3455a37f4fd93414b13016cc5352c46f0440 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 11:01:13 +0100 Subject: [PATCH 23/33] Rename run(_:throwing:) to run(_:rethrowing), bundle with run(_:defer:) --- Sources/App/Commands/Ingest.swift | 6 +++--- ...AsyncDefer.swift => ErrorHandlingHelpers.swift} | 14 +++++++++++++- Sources/App/Core/Extensions/Result+ext.swift | 14 -------------- 3 files changed, 16 insertions(+), 18 deletions(-) rename Sources/App/Core/{AsyncDefer.swift => ErrorHandlingHelpers.swift} (69%) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 12ea4e060..e5da940b3 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -185,7 +185,7 @@ extension Ingestion { try await run { () async throws(Ingestion.Error.UnderlyingError) in try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) - } throwing: { + } rethrowing: { Ingestion.Error(packageId: package.model.id!, underlyingError: $0) } return package @@ -209,7 +209,7 @@ extension Ingestion { static func findOrCreateRepository(on database: Database, for package: Joined) async throws(Ingestion.Error) -> Repository { try await run { try await Repository.findOrCreate(on: database, for: package.model) - } throwing: { + } rethrowing: { Ingestion.Error( packageId: package.model.id!, underlyingError: .findOrCreateRepositoryFailed(url: package.model.url, details: $0) @@ -241,7 +241,7 @@ func fetchMetadata(client: Client, package: Joined) async t // The only way to get `owner` and `repository` here is by parsing them from the URL. let (owner, repository) = try await run { try Github.parseOwnerName(url: package.model.url) - } throwing: { _ in + } rethrowing: { _ in Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) } diff --git a/Sources/App/Core/AsyncDefer.swift b/Sources/App/Core/ErrorHandlingHelpers.swift similarity index 69% rename from Sources/App/Core/AsyncDefer.swift rename to Sources/App/Core/ErrorHandlingHelpers.swift index 50bc1dedd..6436b0fbf 100644 --- a/Sources/App/Core/AsyncDefer.swift +++ b/Sources/App/Core/ErrorHandlingHelpers.swift @@ -15,7 +15,7 @@ @discardableResult func run(_ operation: () async throws -> T, - defer deferredOperation: () async throws -> Void) async throws -> T { + defer deferredOperation: () async throws -> Void) async throws -> T { do { let result = try await operation() try await deferredOperation() @@ -25,3 +25,15 @@ func run(_ operation: () async throws -> T, throw error } } + + +@discardableResult +func run(_ operation: () async throws(E1) -> T, + rethrowing transform: (E1) -> E2) async throws(E2) -> T { + do { + let result = try await operation() + return result + } catch { + throw transform(error) + } +} diff --git a/Sources/App/Core/Extensions/Result+ext.swift b/Sources/App/Core/Extensions/Result+ext.swift index 07af18f83..e7690c5c9 100644 --- a/Sources/App/Core/Extensions/Result+ext.swift +++ b/Sources/App/Core/Extensions/Result+ext.swift @@ -25,17 +25,3 @@ extension Result where Failure == Error { var isError: Bool { return !isSucess } } - - -// Not really a part of the Result type but closely enough related to put here -// Perhaps put this in AsyncDefer.swift and rename the file? -@discardableResult -func run(_ operation: () async throws(E1) -> T, - throwing transform: (E1) -> E2) async throws(E2) -> T { - do { - let result = try await operation() - return result - } catch { - throw transform(error) - } -} From b09fe99223c5e416334b07cd3d784ae01584deb6 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 11:39:06 +0100 Subject: [PATCH 24/33] Cleanup --- Sources/App/Commands/Ingest.swift | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index e5da940b3..d8df0df2f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -245,19 +245,20 @@ func fetchMetadata(client: Client, package: Joined) async t Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) } + // sas 2024-12-13: This should be an `async let` as well but it doesn't compile right now with the + // typed throw. Reported as + // https://github.com/swiftlang/swift/issues/76169 + // async let metadata = try await Current.fetchMetadata(client, owner, repository) + let metadata = try await run { + try await Current.fetchMetadata(client, owner, repository) + } rethrowing: { + Ingestion.Error(packageId: package.model.id!, + underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: "\($0)")) + } async let license = await Current.fetchLicense(client, owner, repository) async let readme = await Current.fetchReadme(client, owner, repository) - // First one should be an `async let` as well but it doesn't compile right now. Reported as - // https://github.com/swiftlang/swift/issues/76169 - return (try await Result { try await Current.fetchMetadata(client, owner, repository) } - .mapError { - Ingestion.Error(packageId: package.model.id!, - underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: "\($0)")) - } - .get(), - await license, - await readme) + return (metadata, await license, await readme) } From 1dcbd09ead6bc69e642d3d1c8e6f04955ea68e8b Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 12:09:11 +0100 Subject: [PATCH 25/33] Restore async let in fetchMetadata --- Sources/App/Commands/Ingest.swift | 46 ++++++++++++++---------------- Tests/AppTests/IngestorTests.swift | 9 ++---- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index d8df0df2f..0e02bda67 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -169,7 +169,22 @@ extension Ingestion { static func ingest(client: Client, database: Database, package: Joined) async { let result = await Result { () async throws(Ingestion.Error) -> Joined in Current.logger().info("Ingesting \(package.package.url)") - let (metadata, license, readme) = try await fetchMetadata(client: client, package: package) + + // Even though we have a `Joined` as a parameter, we must not rely + // on `repository` for owner/name as it will be nil when a package is first ingested. + // The only way to get `owner` and `repository` here is by parsing them from the URL. + let (owner, repository) = try await run { + try Github.parseOwnerName(url: package.model.url) + } rethrowing: { _ in + Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) + } + + let (metadata, license, readme) = try await run { + try await fetchMetadata(client: client, package: package.model, owner: owner, repository: repository) + } rethrowing: { + Ingestion.Error(packageId: package.model.id!, + underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: "\($0)")) + } let repo = try await findOrCreateRepository(on: database, for: package) let s3Readme: S3Readme? @@ -233,32 +248,15 @@ extension Ingestion { return repository.s3Readme } } -} -func fetchMetadata(client: Client, package: Joined) async throws(Ingestion.Error) -> (Github.Metadata, Github.License?, Github.Readme?) { - // Even though we get through a `Joined` as a parameter, it's - // we must not rely on `repository` as it will be nil when a package is first ingested. - // The only way to get `owner` and `repository` here is by parsing them from the URL. - let (owner, repository) = try await run { - try Github.parseOwnerName(url: package.model.url) - } rethrowing: { _ in - Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) - } - // sas 2024-12-13: This should be an `async let` as well but it doesn't compile right now with the - // typed throw. Reported as - // https://github.com/swiftlang/swift/issues/76169 - // async let metadata = try await Current.fetchMetadata(client, owner, repository) - let metadata = try await run { - try await Current.fetchMetadata(client, owner, repository) - } rethrowing: { - Ingestion.Error(packageId: package.model.id!, - underlyingError: .fetchMetadataFailed(owner: owner, name: repository, details: "\($0)")) - } - async let license = await Current.fetchLicense(client, owner, repository) - async let readme = await Current.fetchReadme(client, owner, repository) + static func fetchMetadata(client: Client, package: Package, owner: String, repository: String) async throws -> (Github.Metadata, Github.License?, Github.Readme?) { + async let metadata = try await Current.fetchMetadata(client, owner, repository) + async let license = await Current.fetchLicense(client, owner, repository) + async let readme = await Current.fetchReadme(client, owner, repository) - return (metadata, await license, await readme) + return try await (metadata, license, readme) + } } diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 9dc69a9e2..8a54b07f9 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -623,11 +623,8 @@ class IngestorTests: AppTestCase { func test_issue_761_no_license() async throws { // https://github.com/SwiftPackageIndex/SwiftPackageIndex-Server/issues/761 // setup - let pkg = try await { - let p = Package(url: "https://github.com/foo/1") - try await p.save(on: app.db) - return Joined(model: p) - }() + let pkg = Package(url: "https://github.com/foo/1") + try await pkg.save(on: app.db) // use mock for metadata request which we're not interested in ... Current.fetchMetadata = { _, _, _ in Github.Metadata() } // and live fetch request for fetchLicense, whose behaviour we want to test ... @@ -637,7 +634,7 @@ class IngestorTests: AppTestCase { let client = MockClient { _, resp in resp.status = .notFound } // MUT - let (_, license, _) = try await fetchMetadata(client: client, package: pkg) + let (_, license, _) = try await Ingestion.fetchMetadata(client: client, package: pkg, owner: "foo", repository: "1") // validate XCTAssertEqual(license, nil) From ec8b61df0c39b63795c12f7b90029eb15000a680 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 13:01:45 +0100 Subject: [PATCH 26/33] Cleanup --- .../App/Core/Extensions/S3Readme+ext.swift | 78 +++++++++++++++++++ Sources/App/Core/Extensions/S3Store+ext.swift | 66 +--------------- 2 files changed, 81 insertions(+), 63 deletions(-) create mode 100644 Sources/App/Core/Extensions/S3Readme+ext.swift diff --git a/Sources/App/Core/Extensions/S3Readme+ext.swift b/Sources/App/Core/Extensions/S3Readme+ext.swift new file mode 100644 index 000000000..99aab319d --- /dev/null +++ b/Sources/App/Core/Extensions/S3Readme+ext.swift @@ -0,0 +1,78 @@ +// Copyright Dave Verwer, Sven A. Schmidt, and other contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Dependencies +import S3Store +import Vapor + + +extension S3Readme { + enum Error: Swift.Error { + case envVariableNotSet(String) + case invalidURL(String) + case missingBody + case requestFailed(key: S3Store.Key, error: Swift.Error) + case storeReadmeFailed + case storeImagesFailed + } + + static func fetchReadme(client: Client, owner: String, repository: String) async throws(S3Readme.Error) -> String { + let key = try S3Store.Key.readme(owner: owner, repository: repository) + let response: ClientResponse + do { + response = try await client.get(URI(string: key.objectUrl)) + } catch { + throw .requestFailed(key: key, error: error) + } + guard let body = response.body else { throw .missingBody } + return body.asString() + } + + static func storeReadme(owner: String, repository: String, readme: String) async throws(S3Readme.Error) -> String { + @Dependency(\.environment) var environment + guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } + guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} + let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) + let key = try S3Store.Key.readme(owner: owner, repository: repository) + + Current.logger().debug("Copying readme to \(key.s3Uri) ...") + do { + try await store.save(payload: readme, to: key) + } catch { + throw .requestFailed(key: key, error: error) + } + + return key.objectUrl + } + + static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws(S3Readme.Error) { + @Dependency(\.environment) var environment + guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } + guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} + + let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) + for imageToCache in imagesToCache { + Current.logger().debug("Copying readme image to \(imageToCache.s3Key.s3Uri) ...") + do { + let response = try await client.get(URI(stringLiteral: imageToCache.originalUrl)) + if var body = response.body, let imageData = body.readData(length: body.readableBytes) { + try await store.save(payload: imageData, to: imageToCache.s3Key) + } + } catch { + throw .requestFailed(key: imageToCache.s3Key, error: error) + } + } + } + +} diff --git a/Sources/App/Core/Extensions/S3Store+ext.swift b/Sources/App/Core/Extensions/S3Store+ext.swift index 125d45ce8..4f0ee6a60 100644 --- a/Sources/App/Core/Extensions/S3Store+ext.swift +++ b/Sources/App/Core/Extensions/S3Store+ext.swift @@ -12,70 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -import S3Store -import Vapor -import Dependencies - - -extension S3Readme { - enum Error: Swift.Error { - case envVariableNotSet(String) - case invalidURL(String) - case missingBody - case requestFailed(key: S3Store.Key, error: Swift.Error) - case storeReadmeFailed - case storeImagesFailed - } - - static func fetchReadme(client: Client, owner: String, repository: String) async throws(S3Readme.Error) -> String { - let key = try S3Store.Key.readme(owner: owner, repository: repository) - let response: ClientResponse - do { - response = try await client.get(URI(string: key.objectUrl)) - } catch { - throw .requestFailed(key: key, error: error) - } - guard let body = response.body else { throw .missingBody } - return body.asString() - } - - static func storeReadme(owner: String, repository: String, readme: String) async throws(S3Readme.Error) -> String { - @Dependency(\.environment) var environment - guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } - guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} - let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) - let key = try S3Store.Key.readme(owner: owner, repository: repository) - - Current.logger().debug("Copying readme to \(key.s3Uri) ...") - do { - try await store.save(payload: readme, to: key) - } catch { - throw .requestFailed(key: key, error: error) - } - - return key.objectUrl - } +import Foundation - static func storeReadmeImages(client: Client, imagesToCache: [Github.Readme.ImageToCache]) async throws(S3Readme.Error) { - @Dependency(\.environment) var environment - guard let accessKeyId = environment.awsAccessKeyId() else { throw .envVariableNotSet("AWS_ACCESS_KEY_ID") } - guard let secretAccessKey = environment.awsSecretAccessKey() else { throw .envVariableNotSet("AWS_SECRET_ACCESS_KEY")} - - let store = S3Store(credentials: .init(keyId: accessKeyId, secret: secretAccessKey)) - for imageToCache in imagesToCache { - Current.logger().debug("Copying readme image to \(imageToCache.s3Key.s3Uri) ...") - do { - let response = try await client.get(URI(stringLiteral: imageToCache.originalUrl)) - if var body = response.body, let imageData = body.readData(length: body.readableBytes) { - try await store.save(payload: imageData, to: imageToCache.s3Key) - } - } catch { - throw .requestFailed(key: imageToCache.s3Key, error: error) - } - } - } - -} +import Dependencies +import S3Store extension S3Store.Key { From b2c2ae505754f9d9632d8412a489fd981202ef74 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 13:06:44 +0100 Subject: [PATCH 27/33] Re-enable test --- Tests/AppTests/IngestorTests.swift | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index 8a54b07f9..c182a9981 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -362,9 +362,8 @@ class IngestorTests: AppTestCase { } } - func _test_ingest_unique_owner_name_violation() async throws { + func test_ingest_unique_owner_name_violation() async throws { // Test error behaviour when two packages resolving to the same owner/name are ingested: - // - don't update package // - don't create repository records // setup try await Package(id: .id0, url: "https://github.com/foo/0", status: .ok, processingStage: .reconciliation) From 04ef182001f837ea99240cab7f658c7d63643f07 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Fri, 13 Dec 2024 13:24:28 +0100 Subject: [PATCH 28/33] Explicitly analyse packages and verify processing stage --- Tests/AppTests/IngestorTests.swift | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index c182a9981..e4871b36a 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -424,7 +424,7 @@ class IngestorTests: AppTestCase { try await withDependencies { $0.date.now = .now $0.environment.allowSocialPosts = { false } - } operation: { + } operation: { [db = app.db] in Current.fileManager.fileExists = { @Sendable _ in true } Current.git.commitCount = { @Sendable _ in 1 } Current.git.firstCommitDate = { @Sendable _ in .t0 } @@ -440,7 +440,10 @@ class IngestorTests: AppTestCase { return "" } - try await Analyze.analyze(client: app.client, database: app.db, mode: .limit(10)) + try await Analyze.analyze(client: app.client, database: db, mode: .id(.id0)) + try await Analyze.analyze(client: app.client, database: db, mode: .id(.id1)) + try await XCTAssertEqualAsync(try await Package.find(.id0, on: db)?.processingStage, .analysis) + try await XCTAssertEqualAsync(try await Package.find(.id1, on: db)?.processingStage, .analysis) } } From 0c0ebf6042d6f4378d8346456deb61e34a804e83 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 14 Dec 2024 13:02:43 +0100 Subject: [PATCH 29/33] Turn `Ingestion.fetchMetadata` into `throws(Github.Error)` --- Sources/App/Commands/Ingest.swift | 17 +++++++++++++++-- Sources/App/Core/AppEnvironment.swift | 4 ++-- Tests/AppTests/ErrorReportingTests.swift | 2 +- Tests/AppTests/IngestorTests.swift | 15 +++++---------- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 0e02bda67..820c68c7f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -250,12 +250,25 @@ extension Ingestion { } - static func fetchMetadata(client: Client, package: Package, owner: String, repository: String) async throws -> (Github.Metadata, Github.License?, Github.Readme?) { + static func fetchMetadata(client: Client, package: Package, owner: String, repository: String) async throws(Github.Error) -> (Github.Metadata, Github.License?, Github.Readme?) { async let metadata = try await Current.fetchMetadata(client, owner, repository) async let license = await Current.fetchLicense(client, owner, repository) async let readme = await Current.fetchReadme(client, owner, repository) - return try await (metadata, license, readme) + do { + return try await (metadata, license, readme) + } catch let error as Github.Error { + throw error + } catch { + // This whole do { ... } catch { ... } should be unnecessary - it's a workaround for + // https://github.com/swiftlang/swift/issues/76169 + assert(false, "Unexpected error type: \(type(of: error))") + // We need to throw _something_ here (we should never hit this codepath though) + throw Github.Error.requestFailed(.internalServerError) + // We could theoretically avoid this whole second catch and just do + // error as! GithubError + // but let's play it safe and not risk a server crash, unlikely as it may be. + } } } diff --git a/Sources/App/Core/AppEnvironment.swift b/Sources/App/Core/AppEnvironment.swift index edb1e155b..196451658 100644 --- a/Sources/App/Core/AppEnvironment.swift +++ b/Sources/App/Core/AppEnvironment.swift @@ -25,7 +25,7 @@ import FoundationNetworking struct AppEnvironment: Sendable { var fetchHTTPStatusCode: @Sendable (_ url: String) async throws -> HTTPStatus var fetchLicense: @Sendable (_ client: Client, _ owner: String, _ repository: String) async -> Github.License? - var fetchMetadata: @Sendable (_ client: Client, _ owner: String, _ repository: String) async throws -> Github.Metadata + var fetchMetadata: @Sendable (_ client: Client, _ owner: String, _ repository: String) async throws(Github.Error) -> Github.Metadata var fetchReadme: @Sendable (_ client: Client, _ owner: String, _ repository: String) async -> Github.Readme? var fetchS3Readme: @Sendable (_ client: Client, _ owner: String, _ repository: String) async throws -> String var fileManager: FileManager @@ -86,7 +86,7 @@ extension AppEnvironment { static let live = AppEnvironment( fetchHTTPStatusCode: { url in try await Networking.fetchHTTPStatusCode(url) }, fetchLicense: { client, owner, repo in await Github.fetchLicense(client:client, owner: owner, repository: repo) }, - fetchMetadata: { client, owner, repo in try await Github.fetchMetadata(client:client, owner: owner, repository: repo) }, + fetchMetadata: { client, owner, repo throws(Github.Error) in try await Github.fetchMetadata(client:client, owner: owner, repository: repo) }, fetchReadme: { client, owner, repo in await Github.fetchReadme(client:client, owner: owner, repository: repo) }, fetchS3Readme: { client, owner, repo in try await S3Readme.fetchReadme(client:client, owner: owner, repository: repo) }, fileManager: .live, diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 40153a5ec..8bee183b5 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -34,7 +34,7 @@ class ErrorReportingTests: AppTestCase { func test_Ingestor_error_reporting() async throws { // setup try await Package(id: .id0, url: "1", processingStage: .reconciliation).save(on: app.db) - Current.fetchMetadata = { _, _, _ in throw Github.Error.invalidURL("1") } + Current.fetchMetadata = { _, _, _ throws(Github.Error) in throw Github.Error.invalidURL("1") } try await withDependencies { $0.date.now = .now diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index e4871b36a..ba5ff34a0 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -65,16 +65,12 @@ class IngestorTests: AppTestCase { func test_ingest_continue_on_error() async throws { // Test completion of ingestion despite early error // setup - enum TestError: Error, Equatable { - case badRequest - } - let packages = try await savePackages(on: app.db, ["https://github.com/foo/1", "https://github.com/foo/2"], processingStage: .reconciliation) .map(Joined.init(model:)) - Current.fetchMetadata = { _, owner, repository in + Current.fetchMetadata = { _, owner, repository throws(Github.Error) in if owner == "foo" && repository == "1" { - throw TestError.badRequest + throw Github.Error.requestFailed(.badRequest) } return .mock(owner: owner, repository: repository) } @@ -328,11 +324,10 @@ class IngestorTests: AppTestCase { let urls = ["https://github.com/foo/1", "https://github.com/foo/2", "https://github.com/foo/3"] - let packages = try await savePackages(on: app.db, urls.asURLs, - processingStage: .reconciliation) - Current.fetchMetadata = { _, owner, repository in + try await savePackages(on: app.db, urls.asURLs, processingStage: .reconciliation) + Current.fetchMetadata = { _, owner, repository throws(Github.Error) in if owner == "foo" && repository == "2" { - throw AppError.genericError(packages[1].id, "error 2") + throw Github.Error.requestFailed(.badRequest) } return .mock(owner: owner, repository: repository) } From d597758e634ab84639a12bfb8eb756a0b151b231 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 14 Dec 2024 13:37:50 +0100 Subject: [PATCH 30/33] Add error forcing mechanics --- Sources/App/Commands/Ingest.swift | 33 +++++++++++++++++-- .../Core/Dependencies/EnvironmentClient.swift | 19 ++++++++++- app.yml | 1 + 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 820c68c7f..96d63fd43 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +import Dependencies import Fluent import PostgresKit import Vapor @@ -168,13 +169,17 @@ func ingest(client: Client, extension Ingestion { static func ingest(client: Client, database: Database, package: Joined) async { let result = await Result { () async throws(Ingestion.Error) -> Joined in + @Dependency(\.environment) var environment Current.logger().info("Ingesting \(package.package.url)") // Even though we have a `Joined` as a parameter, we must not rely // on `repository` for owner/name as it will be nil when a package is first ingested. // The only way to get `owner` and `repository` here is by parsing them from the URL. let (owner, repository) = try await run { - try Github.parseOwnerName(url: package.model.url) + if environment.shouldFail(failureMode: .invalidURL) { + throw Github.Error.invalidURL(package.model.url) + } + return try Github.parseOwnerName(url: package.model.url) } rethrowing: { _ in Ingestion.Error.invalidURL(packageId: package.model.id!, url: package.model.url) } @@ -223,7 +228,12 @@ extension Ingestion { static func findOrCreateRepository(on database: Database, for package: Joined) async throws(Ingestion.Error) -> Repository { try await run { - try await Repository.findOrCreate(on: database, for: package.model) + @Dependency(\.environment) var environment + if environment.shouldFail(failureMode: .findOrCreateRepositoryFailed) { + throw Abort(.internalServerError) + } + + return try await Repository.findOrCreate(on: database, for: package.model) } rethrowing: { Ingestion.Error( packageId: package.model.id!, @@ -251,6 +261,11 @@ extension Ingestion { static func fetchMetadata(client: Client, package: Package, owner: String, repository: String) async throws(Github.Error) -> (Github.Metadata, Github.License?, Github.Readme?) { + @Dependency(\.environment) var environment + if environment.shouldFail(failureMode: .fetchMetadataFailed) { + throw Github.Error.requestFailed(.internalServerError) + } + async let metadata = try await Current.fetchMetadata(client, owner, repository) async let license = await Current.fetchLicense(client, owner, repository) async let readme = await Current.fetchReadme(client, owner, repository) @@ -286,6 +301,20 @@ func updateRepository(on database: Database, readmeInfo: Github.Readme?, s3Readme: S3Readme?, fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) { + @Dependency(\.environment) var environment + if environment.shouldFail(failureMode: .noRepositoryMetadata) { + throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) + } + if environment.shouldFail(failureMode: .repositorySaveFailed) { + throw .repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: "TestError") + } + if environment.shouldFail(failureMode: .repositorySaveUniqueViolation) { + throw .repositorySaveUniqueViolation(owner: repository.owner, + name: repository.name, + details: "TestError") + } guard let repoMetadata = metadata.repository else { throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) } diff --git a/Sources/App/Core/Dependencies/EnvironmentClient.swift b/Sources/App/Core/Dependencies/EnvironmentClient.swift index b3da31ab7..166993620 100644 --- a/Sources/App/Core/Dependencies/EnvironmentClient.swift +++ b/Sources/App/Core/Dependencies/EnvironmentClient.swift @@ -44,6 +44,16 @@ struct EnvironmentClient { var mastodonCredentials: @Sendable () -> Mastodon.Credentials? var mastodonPost: @Sendable (_ client: Client, _ post: String) async throws -> Void var random: @Sendable (_ range: ClosedRange) -> Double = { XCTFail("random"); return Double.random(in: $0) } + + enum FailureMode: String { + case fetchMetadataFailed + case findOrCreateRepositoryFailed + case invalidURL + case noRepositoryMetadata + case repositorySaveFailed + case repositorySaveUniqueViolation + } + var shouldFail: @Sendable (_ failureMode: FailureMode) -> Bool = { _ in false } } @@ -100,7 +110,14 @@ extension EnvironmentClient: DependencyKey { .map(Mastodon.Credentials.init(accessToken:)) }, mastodonPost: { client, message in try await Mastodon.post(client: client, message: message) }, - random: { range in Double.random(in: range) } + random: { range in Double.random(in: range) }, + shouldFail: { failureMode in + let shouldFail = Environment.get("FAILURE_MODE") + .map { Data($0.utf8) } + .flatMap { try? JSONDecoder().decode([String: Double].self, from: $0) } ?? [:] + guard let rate = shouldFail[failureMode.rawValue] else { return false } + return Double.random(in: 0...1) <= rate + } ) } } diff --git a/app.yml b/app.yml index 0245834ff..e2306bc2b 100644 --- a/app.yml +++ b/app.yml @@ -46,6 +46,7 @@ x-shared: &shared DATABASE_USERNAME: ${DATABASE_USERNAME} DATABASE_PASSWORD: ${DATABASE_PASSWORD} DATABASE_USE_TLS: ${DATABASE_USE_TLS} + FAILURE_MODE: ${FAILURE_MODE} GITHUB_TOKEN: ${GITHUB_TOKEN} GITLAB_API_TOKEN: ${GITLAB_API_TOKEN} GITLAB_PIPELINE_LIMIT: ${GITLAB_PIPELINE_LIMIT} From 11e305270c345e5764c084182120dd35c40adc31 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sat, 14 Dec 2024 18:29:05 +0100 Subject: [PATCH 31/33] Improve error logging --- Sources/App/Commands/Ingest.swift | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index 96d63fd43..c0e03703f 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -37,18 +37,18 @@ enum Ingestion { var description: String { switch self { - case let .fetchMetadataFailed(_, _, details): - "fetchMetadataFailed(\(details))" - case .findOrCreateRepositoryFailed: - "findOrCreateRepositoryFailed" + case let .fetchMetadataFailed(owner, name, details): + "fetchMetadataFailed(\(owner), \(name), \(details))" + case let .findOrCreateRepositoryFailed(url, details): + "findOrCreateRepositoryFailed(\(url), \(details))" case let .invalidURL(url): "invalidURL(\(url))" - case .noRepositoryMetadata: - "noRepositoryMetadata" - case let .repositorySaveFailed(_, _, details): - "repositorySaveFailed(\(String(reflecting: details)))" - case let .repositorySaveUniqueViolation(_, _, details): - "repositorySaveUniqueViolation(\(details))" + case let .noRepositoryMetadata(owner, name): + "noRepositoryMetadata(\(owner), \(name))" + case let .repositorySaveFailed(owner, name, details): + "repositorySaveFailed(\(owner), \(name), \(details))" + case let .repositorySaveUniqueViolation(owner, name, details): + "repositorySaveUniqueViolation(\(owner), \(name), \(details))" } } } From 00791400a631d610207a3073f939ed7dc5116676 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sun, 15 Dec 2024 13:47:55 +0100 Subject: [PATCH 32/33] Update error message expectation --- Tests/AppTests/IngestorTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestorTests.swift index ba5ff34a0..a2374419c 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestorTests.swift @@ -412,7 +412,7 @@ class IngestorTests: AppTestCase { XCTAssertEqual(logs.count, 1) let log = try XCTUnwrap(logs.first) XCTAssertEqual(log.level, .critical) - XCTAssertEqual(log.message, #"Ingestion.Error(\#(try failed.requireID()), repositorySaveUniqueViolation(duplicate key value violates unique constraint "idx_repositories_owner_name"))"#) + XCTAssertEqual(log.message, #"Ingestion.Error(\#(try failed.requireID()), repositorySaveUniqueViolation(owner, name, duplicate key value violates unique constraint "idx_repositories_owner_name"))"#) } // ensure analysis can process these packages From 475ce9d8beed4e9d06a4f407d3a6d4e245622a28 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Sun, 15 Dec 2024 14:48:01 +0100 Subject: [PATCH 33/33] Make IngestCommand.run non-throwing --- Sources/App/Commands/Ingest.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingest.swift index c0e03703f..c19501bef 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingest.swift @@ -81,7 +81,7 @@ struct IngestCommand: AsyncCommand { var help: String { "Run package ingestion (fetching repository metadata)" } - func run(using context: CommandContext, signature: SPICommand.Signature) async throws { + func run(using context: CommandContext, signature: SPICommand.Signature) async { let client = context.application.client let db = context.application.db Current.setLogger(Logger(component: "ingest"))