diff --git a/.changelog/3518.md b/.changelog/3518.md new file mode 100644 index 0000000000..c845424cf2 --- /dev/null +++ b/.changelog/3518.md @@ -0,0 +1,4 @@ +### Added +- **GameHacking.org Cheat Proxy** — Cloudflare Workers middleware (`Scripts/cheat-proxy/`) that scrapes GameHacking.org and returns normalised JSON cheat entries with 24h server-side KV caching; reduces scraper fragility and load on the source site +- **Cheat proxy settings** — `useCheatProxy` (default `true`) and `cheatProxyURL` settings allow the proxy endpoint to be enabled/disabled and configured without an app update +- **Automatic proxy fallback** — When the proxy is unreachable or returns no results, `GameHackingOrgLookup` transparently falls back to the existing direct HTML scraper diff --git a/PVLibrary/Package.swift b/PVLibrary/Package.swift index 815a2a4181..65823e5868 100644 --- a/PVLibrary/Package.swift +++ b/PVLibrary/Package.swift @@ -243,6 +243,7 @@ let package = Package( "PVLookup", "PVPrimitives", "PVFileSystem", + "PVSettings", .product(name: "RealmSwift", package: "realm-swift"), .product(name: "ZipArchive", package: "ZipArchive"), ] diff --git a/PVLibrary/Sources/PVLibrary/Cheat/GameHackingOrgLookup.swift b/PVLibrary/Sources/PVLibrary/Cheat/GameHackingOrgLookup.swift index c56b99fd92..d33ec9239d 100644 --- a/PVLibrary/Sources/PVLibrary/Cheat/GameHackingOrgLookup.swift +++ b/PVLibrary/Sources/PVLibrary/Cheat/GameHackingOrgLookup.swift @@ -21,6 +21,7 @@ import Foundation import PVLogging +import PVSettings // MARK: - GameHackingOrgLookup @@ -49,6 +50,10 @@ public actor GameHackingOrgLookup { private static let maxMemoryCacheEntries = 50 /// Minimum seconds between requests to be polite to the server. private static let minRequestInterval: TimeInterval = 1.0 + /// Compile-time default proxy URL. Can be overridden via PVSettings `cheatProxyURL`. + /// Set to a non-empty string after deploying the Cloudflare Worker + /// (see `Scripts/cheat-proxy/README.md`). + static let defaultProxyURL: String = "" // MARK: - State @@ -83,9 +88,26 @@ public actor GameHackingOrgLookup { return diskHit.entries } - // 3. Network fetch — never throws outward; always returns empty on failure. + // 3. Network fetch — try proxy first (if enabled), then fall back to direct scraping. DLOG("GameHackingOrgLookup: fetching online for title='\(title)' slug=\(systemSlug ?? "nil")") - let results = await fetchWithFallback(title: title, systemSlug: systemSlug) + let results: [CheatDatabaseEntry] + + let proxyURL = resolvedProxyURL() + if !proxyURL.isEmpty, Defaults[.useCheatProxy] { + // fetchFromProxy returns nil when the proxy is unreachable or fails (caller should + // fallback to direct scraping), or a non-nil array when the proxy successfully + // responded — including an empty array meaning "no cheats found" (no fallback needed). + let proxyResults = await fetchFromProxy(title: title, systemSlug: systemSlug, proxyBaseURL: proxyURL) + if let proxyResults { + DLOG("GameHackingOrgLookup: proxy returned \(proxyResults.count) codes for '\(title)'") + results = proxyResults + } else { + DLOG("GameHackingOrgLookup: proxy failed/unreachable, falling back to direct scrape for '\(title)'") + results = await fetchWithFallback(title: title, systemSlug: systemSlug) + } + } else { + results = await fetchWithFallback(title: title, systemSlug: systemSlug) + } evictMemoryCacheIfNeeded() memoryCache[key] = (Date(), results) @@ -95,8 +117,93 @@ public actor GameHackingOrgLookup { return results } + // MARK: - Proxy URL Resolution + + /// Returns the effective proxy base URL, preferring the user-configured value over the compile-time default. + private func resolvedProxyURL() -> String { + let stored = Defaults[.cheatProxyURL].trimmingCharacters(in: .whitespacesAndNewlines) + if !stored.isEmpty { return stored } + return Self.defaultProxyURL + } + // MARK: - Fetch Logic + /// Fetch cheat entries from the Provenance cheat proxy worker. + /// + /// The proxy endpoint is `GET /cheats?title=&system=<slug>`. + /// + /// Returns: + /// - `nil` when the proxy is unreachable, returns a non-2xx status, or a network/decode + /// error occurs — the caller should fall back to direct scraping. + /// - `[]` when the proxy successfully contacted upstream but found no cheats + /// (signalled by `X-Proxy-Status: ok` in the response) — no fallback needed. + /// - `[entries]` when the proxy found results. + private func fetchFromProxy(title: String, systemSlug: String?, proxyBaseURL: String) async -> [CheatDatabaseEntry]? { + var components = URLComponents(string: proxyBaseURL.hasSuffix("/") + ? proxyBaseURL + "cheats" + : proxyBaseURL + "/cheats") + var queryItems = [URLQueryItem(name: "title", value: title)] + if let slug = systemSlug { + queryItems.append(URLQueryItem(name: "system", value: slug)) + } + components?.queryItems = queryItems + + guard let url = components?.url else { + WLOG("GameHackingOrgLookup: invalid proxy URL '\(proxyBaseURL)'") + return nil + } + + do { + var request = URLRequest(url: url) + request.setValue("Provenance-Emu/1.0", forHTTPHeaderField: "User-Agent") + request.timeoutInterval = 10 + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, + (200..<300).contains(http.statusCode) else { + WLOG("GameHackingOrgLookup: proxy non-200 for '\(title)'") + return nil + } + + let raw = try JSONDecoder().decode([ProxyCheatEntry].self, from: data) + if raw.isEmpty { + // Only trust an empty result as "no cheats found" when the proxy confirms it + // successfully contacted upstream via X-Proxy-Status: ok. Without this header + // the empty array may be a transient error — fall back to direct scraping. + let proxyStatus = http.value(forHTTPHeaderField: "X-Proxy-Status") + guard proxyStatus == "ok" else { + DLOG("GameHackingOrgLookup: proxy returned empty without ok status for '\(title)' — falling back") + return nil + } + return [] + } + + return raw.enumerated().map { index, entry in + CheatDatabaseEntry( + id: Self.idOffset + index, + cheatName: entry.name, + cheatCode: entry.code, + cheatDescription: nil, + deviceName: "GameHacking.org", + deviceFormat: nil, + category: entry.category ?? "General", + romTitle: title, + systemName: systemSlug, + isOnlineResult: true + ) + } + } catch { + WLOG("GameHackingOrgLookup: proxy fetch error for '\(title)': \(error)") + return nil + } + } + + /// JSON model returned by the cheat proxy worker. + private struct ProxyCheatEntry: Decodable { + let name: String + let code: String + let category: String? + } + /// Try fetching with system filter first; fall back to no-system search on failure. private func fetchWithFallback(title: String, systemSlug: String?) async -> [CheatDatabaseEntry] { // Strategy 1: search with system filter (if we have a slug) diff --git a/PVLibrary/Tests/PVLibraryTests/GameHackingOrgLookupTests.swift b/PVLibrary/Tests/PVLibraryTests/GameHackingOrgLookupTests.swift index 73d61826e1..ae64b83d58 100644 --- a/PVLibrary/Tests/PVLibraryTests/GameHackingOrgLookupTests.swift +++ b/PVLibrary/Tests/PVLibraryTests/GameHackingOrgLookupTests.swift @@ -1,10 +1,12 @@ // GameHackingOrgLookupTests.swift // PVLibraryTests // -// Unit tests for GameHackingOrgLookup's HTML parsing strategies. +// Unit tests for GameHackingOrgLookup's HTML parsing strategies and proxy path. // Tests run against static HTML fixtures — no network required. @testable import PVLibrary +import Defaults +import PVSettings import XCTest final class GameHackingOrgLookupTests: XCTestCase { @@ -114,4 +116,170 @@ final class GameHackingOrgLookupTests: XCTestCase { let result = await lookup.looksLikeCode("Infinite Lives") XCTAssertFalse(result) } + + // MARK: - Proxy Path (URLProtocol stubs) + + func testSearchCheats_proxyReturnsResults() async { + XCTAssertTrue(URLProtocol.registerClass(ProxyCannedProtocol.self), "URLProtocol registration failed — test may hit real network") + defer { URLProtocol.unregisterClass(ProxyCannedProtocol.self) } + + let json = #"[{"name":"Infinite Lives","code":"DEADBEEF00000001","category":"General"}]"# + ProxyCannedProtocol.cannedJSON = Data(json.utf8) + ProxyCannedProtocol.statusCode = 200 + ProxyCannedProtocol.cannedHeaders = ["X-Proxy-Status": "ok"] + ProxyCannedProtocol.lastRequest = nil + defer { ProxyCannedProtocol.cannedHeaders = [:] } + + Defaults[.useCheatProxy] = true + Defaults[.cheatProxyURL] = "https://test.proxy.pvemu.invalid" + defer { + Defaults.reset(.useCheatProxy) + Defaults.reset(.cheatProxyURL) + } + + let title = "ProxyHappyPath_\(UUID().uuidString)" + let entries = await GameHackingOrgLookup.shared.searchCheats(title: title, systemSlug: "n64") + + // The proxy URL should have been contacted with the correct path/query + let intercepted = ProxyCannedProtocol.lastRequest?.url?.absoluteString ?? "" + XCTAssertTrue(intercepted.contains("/cheats"), "Expected /cheats in proxy request URL, got: \(intercepted)") + XCTAssertTrue(intercepted.contains("title="), "Expected title= query param in proxy request URL") + + // Results should be decoded from the proxy JSON + XCTAssertEqual(entries.count, 1) + XCTAssertEqual(entries.first?.cheatName, "Infinite Lives") + XCTAssertEqual(entries.first?.cheatCode, "DEADBEEF00000001") + XCTAssertEqual(entries.first?.deviceName, "GameHacking.org") + XCTAssertTrue(entries.first?.isOnlineResult ?? false) + } + + func testSearchCheats_proxyReturnsEmpty_noFallback() async { + // Proxy returns [] with X-Proxy-Status: ok — meaning "upstream confirmed no cheats". + // searchCheats should trust this and NOT fall back to direct scraping. + // DirectScrapeBlockerProtocol is registered to ensure no gamehacking.org request is made. + XCTAssertTrue(URLProtocol.registerClass(ProxyCannedProtocol.self), "URLProtocol registration failed — test may hit real network") + XCTAssertTrue(URLProtocol.registerClass(DirectScrapeBlockerProtocol.self), "URLProtocol registration failed — test may hit real network") + defer { + URLProtocol.unregisterClass(ProxyCannedProtocol.self) + URLProtocol.unregisterClass(DirectScrapeBlockerProtocol.self) + } + + ProxyCannedProtocol.cannedJSON = Data("[]".utf8) + ProxyCannedProtocol.statusCode = 200 + // X-Proxy-Status: ok tells the client the proxy successfully ran and found nothing + ProxyCannedProtocol.cannedHeaders = ["X-Proxy-Status": "ok"] + ProxyCannedProtocol.lastRequest = nil + DirectScrapeBlockerProtocol.requestCount = 0 + defer { + ProxyCannedProtocol.cannedHeaders = [:] + DirectScrapeBlockerProtocol.requestCount = 0 + } + + Defaults[.useCheatProxy] = true + Defaults[.cheatProxyURL] = "https://test.proxy.pvemu.invalid" + defer { + Defaults.reset(.useCheatProxy) + Defaults.reset(.cheatProxyURL) + } + + let title = "ProxyEmptyNoFallback_\(UUID().uuidString)" + let entries = await GameHackingOrgLookup.shared.searchCheats(title: title, systemSlug: nil) + + // Proxy was contacted and returned empty with ok status — no direct scrape should happen + XCTAssertTrue(entries.isEmpty) + let intercepted = ProxyCannedProtocol.lastRequest?.url?.absoluteString ?? "" + XCTAssertTrue(intercepted.contains("/cheats"), "Proxy should have been contacted") + XCTAssertEqual(DirectScrapeBlockerProtocol.requestCount, 0, "Direct scrape should NOT occur when proxy confirms no cheats") + } + + func testSearchCheats_proxyDisabled_doesNotContactProxy() async { + // Proxy is disabled — only the direct scrape path runs. + // DirectScrapeBlockerProtocol intercepts gamehacking.org requests so no real + // network call is made; it returns empty HTML so the scrape yields no results. + XCTAssertTrue(URLProtocol.registerClass(ProxyCannedProtocol.self), "URLProtocol registration failed — test may hit real network") + XCTAssertTrue(URLProtocol.registerClass(DirectScrapeBlockerProtocol.self), "URLProtocol registration failed — test may hit real network") + defer { + URLProtocol.unregisterClass(ProxyCannedProtocol.self) + URLProtocol.unregisterClass(DirectScrapeBlockerProtocol.self) + } + + ProxyCannedProtocol.cannedJSON = Data() + ProxyCannedProtocol.lastRequest = nil + DirectScrapeBlockerProtocol.requestCount = 0 + defer { DirectScrapeBlockerProtocol.requestCount = 0 } + + Defaults[.useCheatProxy] = false + Defaults[.cheatProxyURL] = "https://test.proxy.pvemu.invalid" + defer { + Defaults.reset(.useCheatProxy) + Defaults.reset(.cheatProxyURL) + } + + let title = "ProxyDisabled_\(UUID().uuidString)" + _ = await GameHackingOrgLookup.shared.searchCheats(title: title, systemSlug: nil) + // The proxy should not have been contacted when useCheatProxy is false + XCTAssertNil(ProxyCannedProtocol.lastRequest, "Proxy should not be contacted when useCheatProxy is false") + } +} + +// MARK: - URLProtocol stub for proxy tests + +/// Intercepts requests to the test proxy host and returns canned JSON. +private final class ProxyCannedProtocol: URLProtocol { + static var cannedJSON: Data = Data() + static var statusCode: Int = 200 + static var cannedHeaders: [String: String] = [:] + static var lastRequest: URLRequest? + + override class func canInit(with request: URLRequest) -> Bool { + request.url?.host?.contains("test.proxy.pvemu.invalid") ?? false + } + + override class func canonicalRequest(for request: URLRequest) -> URLRequest { request } + + override func startLoading() { + ProxyCannedProtocol.lastRequest = request + var headers = ["Content-Type": "application/json"] + for (key, value) in ProxyCannedProtocol.cannedHeaders { + headers[key] = value + } + let response = HTTPURLResponse( + url: request.url!, + statusCode: ProxyCannedProtocol.statusCode, + httpVersion: "HTTP/1.1", + headerFields: headers + )! + client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + client?.urlProtocol(self, didLoad: ProxyCannedProtocol.cannedJSON) + client?.urlProtocolDidFinishLoading(self) + } + + override func stopLoading() {} +} + +/// Intercepts requests to gamehacking.org and returns empty HTML, preventing real network calls +/// during tests that exercise the direct-scrape fallback path. +private final class DirectScrapeBlockerProtocol: URLProtocol { + static var requestCount: Int = 0 + + override class func canInit(with request: URLRequest) -> Bool { + request.url?.host?.contains("gamehacking.org") ?? false + } + + override class func canonicalRequest(for request: URLRequest) -> URLRequest { request } + + override func startLoading() { + DirectScrapeBlockerProtocol.requestCount += 1 + let response = HTTPURLResponse( + url: request.url!, + statusCode: 200, + httpVersion: "HTTP/1.1", + headerFields: ["Content-Type": "text/html"] + )! + client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + client?.urlProtocol(self, didLoad: Data("<html><body></body></html>".utf8)) + client?.urlProtocolDidFinishLoading(self) + } + + override func stopLoading() {} } diff --git a/PVSettings/Sources/PVSettings/Settings/Model/PVSettingsModel.swift b/PVSettings/Sources/PVSettings/Settings/Model/PVSettingsModel.swift index 58b985affd..6847f644f9 100644 --- a/PVSettings/Sources/PVSettings/Settings/Model/PVSettingsModel.swift +++ b/PVSettings/Sources/PVSettings/Settings/Model/PVSettingsModel.swift @@ -322,6 +322,27 @@ public extension Defaults.Keys { static let playerUsername = Key<String>("playerUsername", default: "") } +// MARK: Cheats +public extension Defaults.Keys { + /// When `true`, the app first queries the cheat proxy endpoint for GameHacking.org + /// cheats instead of scraping the site directly. Falls back to direct scraping only + /// if the proxy is unreachable, returns a non-2xx status, or returns an empty result + /// without confirming success via `X-Proxy-Status: ok`. An empty result with + /// `X-Proxy-Status: ok` is treated as "no cheats found" and skips the fallback. + static let useCheatProxy = Key<Bool>("useCheatProxy", default: true) + + /// Base URL of the deployed Provenance cheat proxy worker. + /// + /// Behavior: + /// - If `useCheatProxy` is `false`, the proxy is not used regardless of this value. + /// - If `useCheatProxy` is `true` and this is non-empty, this value is used as the proxy base URL. + /// - If `useCheatProxy` is `true` and this is empty, a built-in compile-time default URL + /// (if configured) will be used; if no default is set the proxy path is skipped entirely. + /// + /// See `Scripts/cheat-proxy/README.md` for deployment instructions. + static let cheatProxyURL = Key<String>("cheatProxyURL", default: "") +} + public enum ButtonPressEffect: String, Codable, Equatable, UserDefaultsRepresentable, Defaults.Serializable, CaseIterable { case bubble = "bubble" case ring = "ring" diff --git a/Scripts/cheat-proxy/README.md b/Scripts/cheat-proxy/README.md new file mode 100644 index 0000000000..6d4ce5e00b --- /dev/null +++ b/Scripts/cheat-proxy/README.md @@ -0,0 +1,93 @@ +# Provenance Cheat Proxy + +A thin Cloudflare Workers middleware that scrapes [GameHacking.org](https://gamehacking.org) +and returns normalized JSON cheat entries with server-side KV caching (24 h TTL). + +## Endpoint + +``` +GET /cheats?title=<game title>&system=<system slug> +``` + +**Parameters:** +- `title` (required) — The game title to search for. +- `system` (optional) — The GameHacking.org system slug (e.g. `n64`, `gba`, `gc`). + +**Response:** +```json +[ + { "name": "Infinite Lives", "code": "8107A5C02400", "category": "General" }, + { "name": "Max Health", "code": "8107A5C40064", "category": "General" } +] +``` + +Returns an empty array `[]` when no cheats are found or when the request is invalid +(e.g. missing `title`, unknown path). The response is always HTTP 200 with a JSON array +so clients can safely decode without checking the status code. Invalid requests also +include an `X-Validation-Error` header with a short description. + +Responses include an `X-Proxy-Status` header (`ok` or `error`) indicating whether the +proxy successfully contacted GameHacking.org. Callers should treat an empty array with +`X-Proxy-Status: ok` as "confirmed no cheats found" and skip the direct-scrape fallback; +fall back only when the proxy request fails or `X-Proxy-Status` is missing or `error`. + +## Health check + +``` +GET /health → { "status": "ok" } +``` + +## Deployment (manual — requires a Cloudflare account) + +> **Note:** Deployment is manual and requires a free [Cloudflare](https://cloudflare.com) +> account and the [Wrangler CLI](https://developers.cloudflare.com/workers/wrangler/). + +1. **Install Wrangler:** + ```bash + npm install -g wrangler + wrangler login + ``` + +2. **Create the KV namespace:** + ```bash + wrangler kv:namespace create CHEAT_CACHE + ``` + Copy the `id` from the output and paste it into `wrangler.toml` replacing + `REPLACE_WITH_YOUR_KV_NAMESPACE_ID`. + +3. **Deploy:** + ```bash + cd Scripts/cheat-proxy + wrangler deploy + ``` + Wrangler will print the worker URL, e.g.: + `https://provenance-cheat-proxy.<your-subdomain>.workers.dev` + +4. **Configure the app:** + Set the proxy URL in Provenance settings (Settings → Cheats → Proxy URL) or + update the compile-time default in + `PVLibrary/Sources/PVLibrary/Cheat/GameHackingOrgLookup.swift`: + ```swift + static let defaultProxyURL = "https://provenance-cheat-proxy.<your-subdomain>.workers.dev" + ``` + +## Local development + +```bash +cd Scripts/cheat-proxy +wrangler dev +``` + +The worker is then available at `http://localhost:8787/cheats?title=Mario&system=n64`. + +## Rate limits (Cloudflare free tier) + +| Limit | Value | +|------------------|-------------------| +| Requests/day | 100,000 | +| KV reads/day | 100,000 | +| KV writes/day | 1,000 | +| CPU time/request | 10 ms (bundled) | + +The 24 h KV cache ensures that repeated lookups for the same title+system pair +use only one KV write per day and hit the fast read path for all subsequent requests. diff --git a/Scripts/cheat-proxy/worker.js b/Scripts/cheat-proxy/worker.js new file mode 100644 index 0000000000..7207584d6b --- /dev/null +++ b/Scripts/cheat-proxy/worker.js @@ -0,0 +1,417 @@ +/** + * Provenance Cheat Proxy — Cloudflare Workers + * + * Thin middleware proxy for GameHacking.org cheat lookup. + * + * Endpoint: + * GET /cheats?title=<title>&system=<slug> + * + * Response: JSON array of cheat entries + * [{ "name": "...", "code": "...", "category": "General" }, ...] + * + * Caching: Results are cached in Cloudflare KV for 24 hours per (title, system) pair. + * + * Deployment: See README.md for setup instructions. + */ + +// KV binding name — must match wrangler.toml [[kv_namespaces]] binding +const KV_NAMESPACE = "CHEAT_CACHE"; +const CACHE_TTL_SECONDS = 24 * 60 * 60; // 24 hours + +const GAMEHACKING_BASE = "https://gamehacking.org"; +const GAMEHACKING_SEARCH = "https://gamehacking.org/search/"; + +const USER_AGENT = "Mozilla/5.0 (compatible; Provenance-Emu/1.0; +https://github.com/Provenance-Emu/Provenance)"; + +// Maximum lengths for KV key inputs — Cloudflare KV keys are limited to 512 bytes +const MAX_TITLE_LENGTH = 200; +const MAX_SYSTEM_LENGTH = 64; + +export default { + async fetch(request, env, ctx) { + const url = new URL(request.url); + const origin = request.headers.get("Origin") || ""; + + // Handle CORS preflight + if (request.method === "OPTIONS") { + return new Response(null, { + status: 204, + headers: corsHeaders(origin, env), + }); + } + + if (url.pathname === "/health") { + return new Response(JSON.stringify({ status: "ok" }), { + headers: { "Content-Type": "application/json", ...corsHeaders(origin, env) }, + }); + } + + if (url.pathname !== "/cheats") { + // Return empty array (200) rather than 404 so clients can always decode an array. + // The path mismatch is surfaced via a diagnostic header. + return jsonResponse([], { "X-Validation-Error": "Unknown path — use /cheats" }, origin, env); + } + + const title = url.searchParams.get("title"); + const system = url.searchParams.get("system") || ""; + + if (!title || title.trim() === "") { + // Return empty array (200) so clients can always decode an array. + return jsonResponse([], { "X-Validation-Error": "Missing required parameter: title" }, origin, env); + } + + if (title.trim().length > MAX_TITLE_LENGTH) { + return jsonResponse([], { "X-Validation-Error": "Parameter too long: title" }, origin, env); + } + + if (system.length > MAX_SYSTEM_LENGTH) { + return jsonResponse([], { "X-Validation-Error": "Parameter too long: system" }, origin, env); + } + + const cacheKey = makeCacheKey(title, system); + + // Check KV cache + if (env[KV_NAMESPACE]) { + try { + const cached = await env[KV_NAMESPACE].get(cacheKey, { type: "json" }); + if (cached !== null) { + // X-Proxy-Status: ok signals to clients that the proxy successfully + // contacted upstream — an empty array here means "no cheats found", + // not a transient error, so clients should skip the direct-scrape fallback. + return jsonResponse(cached, { "X-Cache": "HIT", "X-Proxy-Status": "ok" }, origin, env); + } + } catch (err) { + console.error("KV get error:", err); + } + } + + // Fetch from GameHacking.org + let results = []; + let fetchSucceeded = false; + try { + results = await fetchCheats(title, system || null); + fetchSucceeded = true; + } catch (err) { + console.error("Cheat fetch error:", err); + // Return empty array rather than error — caller falls back to direct scraping + results = []; + } + + // Store in KV with TTL only when the upstream fetch succeeded, + // to avoid caching transient errors as "no cheats" for 24h. + if (env[KV_NAMESPACE] && fetchSucceeded) { + ctx.waitUntil( + env[KV_NAMESPACE].put(cacheKey, JSON.stringify(results), { + expirationTtl: CACHE_TTL_SECONDS, + }).catch((err) => console.error("KV put error:", err)) + ); + } + + // X-Proxy-Status: ok when the upstream fetch succeeded (even with 0 results). + // X-Proxy-Status: error when the fetch itself threw (network/parse failure). + // Clients use this to decide whether to fall back to direct scraping. + return jsonResponse(results, { "X-Cache": "MISS", "X-Proxy-Status": fetchSucceeded ? "ok" : "error" }, origin, env); + }, +}; + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +/** + * Build CORS response headers. + * If the `ALLOWED_ORIGINS` env var is set (comma-separated), only matching + * origins receive `Access-Control-Allow-Origin`; otherwise the wildcard is used. + */ +function corsHeaders(origin, env) { + const allowed = env.ALLOWED_ORIGINS + ? env.ALLOWED_ORIGINS.split(",").map((o) => o.trim()).filter(Boolean) + : []; + + let allowOrigin = "*"; + if (allowed.length > 0) { + allowOrigin = allowed.includes(origin) ? origin : allowed[0]; + } + + return { + "Access-Control-Allow-Origin": allowOrigin, + "Access-Control-Allow-Methods": "GET, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type", + ...(allowed.length > 0 ? { "Vary": "Origin" } : {}), + }; +} + +function jsonResponse(data, extraHeaders = {}, origin = "", env = {}) { + return new Response(JSON.stringify(data), { + headers: { + "Content-Type": "application/json", + "Cache-Control": `public, max-age=${CACHE_TTL_SECONDS}`, + ...corsHeaders(origin, env), + ...extraHeaders, + }, + }); +} + +/** + * Truncate a string so that its UTF-8 byte representation fits within maxBytes. + * Cloudflare KV keys are limited by byte length (512 bytes), not character count, + * so character-level slicing is insufficient for non-ASCII titles (e.g. CJK, emoji). + */ +function truncateToBytes(str, maxBytes) { + if (str.length * 4 <= maxBytes) return str; // fast-path: all code-units use ≤4 bytes + const encoder = new TextEncoder(); + const encoded = encoder.encode(str); + if (encoded.length <= maxBytes) return str; + const truncated = encoded.slice(0, maxBytes); + return new TextDecoder("utf-8", { fatal: false }).decode(truncated); +} + +/** + * Build a KV cache key from title and system. + * Inputs are truncated by UTF-8 byte length to avoid exceeding Cloudflare KV's + * 512-byte key limit even for non-ASCII titles (CJK, emoji, etc.). + */ +function makeCacheKey(title, system) { + const t = truncateToBytes(title.toLowerCase().trim(), MAX_TITLE_LENGTH); + const s = truncateToBytes((system || "any").toLowerCase().trim(), MAX_SYSTEM_LENGTH); + return `ghorg::${t}::${s}`; +} + +// ─── Scraping ───────────────────────────────────────────────────────────────── + +/** + * Fetch cheats for a given title + optional system slug from GameHacking.org. + * Returns a normalised array: [{ name, code, category }] + */ +async function fetchCheats(title, systemSlug) { + // Strategy 1: with system filter + if (systemSlug) { + const results = await fetchSearchResults(title, systemSlug); + if (results.length > 0) return results; + } + + // Strategy 2: without system filter + return fetchSearchResults(title, null); +} + +async function fetchSearchResults(title, systemSlug) { + const searchURL = buildSearchURL(title, systemSlug); + const searchHTML = await fetchHTML(searchURL); + if (!searchHTML) return []; + + const gamePath = bestGameLink(searchHTML, title); + if (!gamePath) return []; + + const gameURL = GAMEHACKING_BASE + gamePath; + const gameHTML = await fetchHTML(gameURL); + if (!gameHTML) return []; + + return parseCheatPage(gameHTML, title); +} + +function buildSearchURL(title, systemSlug) { + const params = new URLSearchParams({ q: title }); + if (systemSlug) params.set("system", systemSlug); + return `${GAMEHACKING_SEARCH}?${params.toString()}`; +} + +async function fetchHTML(url) { + const resp = await fetch(url, { + headers: { + "User-Agent": USER_AGENT, + "Accept": "text/html,application/xhtml+xml", + }, + }); + if (resp.status >= 500) { + // Server error — treat as transient failure so it is not KV-cached + throw new Error(`Upstream HTTP ${resp.status} from ${url}`); + } + if (!resp.ok) return null; // 4xx → legitimate "not found" + return await resp.text(); +} + +// ─── Search result parsing ──────────────────────────────────────────────────── + +/** + * Extract the best-matching game page path from search results HTML. + * Returns e.g. "/game/12345" or null. + */ +function bestGameLink(html, title) { + const patterns = [ + /href="(\/game\/[^"]+)"[^>]*>([^<]{3,80})<\/a>/gi, + /href="(\/system\/[^"]+)"[^>]*>([^<]{3,80})<\/a>/gi, + ]; + + const links = []; + for (const pattern of patterns) { + let m; + while ((m = pattern.exec(html)) !== null) { + links.push({ path: m[1], title: decodeHTMLEntities(m[2].trim()) }); + } + if (links.length > 0) break; + } + + if (links.length === 0) return null; + + const normTitle = normalise(title); + let bestLink = null; + let bestScore = 0.4; + + for (const link of links) { + const score = diceSimilarity(normTitle, normalise(link.title)); + if (score > bestScore) { + bestScore = score; + bestLink = link; + } + } + + return bestLink ? bestLink.path : null; +} + +// ─── Cheat page parsing ─────────────────────────────────────────────────────── + +/** + * Parse a GameHacking.org game page and return normalised cheat entries. + */ +function parseCheatPage(html, romTitle) { + // Strategy 1: table rows + let entries = parseTableCheats(html); + if (entries.length > 0) return entries; + + // Strategy 2: definition lists + entries = parseDefinitionListCheats(html); + if (entries.length > 0) return entries; + + // Strategy 3: inline code class spans + return parseInlineCheats(html); +} + +function parseTableCheats(html) { + const entries = []; + const trPattern = /<tr[^>]*>([\s\S]*?)<\/tr>/gi; + const tdPattern = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi; + const codePattern = /^[0-9A-Fa-f]{4,16}[\s+\-:]*[0-9A-Fa-f]{0,16}$/; + + let trMatch; + while ((trMatch = trPattern.exec(html)) !== null) { + const rowHTML = trMatch[1]; + const cells = []; + let tdMatch; + tdPattern.lastIndex = 0; + while ((tdMatch = tdPattern.exec(rowHTML)) !== null) { + cells.push(stripTags(tdMatch[1])); + } + if (cells.length < 2) continue; + + let codeCell = null; + let nameCell = null; + + for (const cell of cells) { + const trimmed = cell.trim().replace(/\s+/g, ""); + if (!codeCell && trimmed.length >= 4 && codePattern.test(trimmed)) { + codeCell = trimmed; + } else if (!nameCell && cell.trim().length > 0 && cell.trim().length < 200) { + nameCell = cell.trim(); + } + } + + if (!codeCell || !nameCell) continue; + const nameLower = nameCell.toLowerCase(); + if (nameLower === "name" || nameLower === "code" || nameLower === "description") continue; + + entries.push({ name: nameCell, code: codeCell, category: "General" }); + } + + return entries; +} + +function parseDefinitionListCheats(html) { + const entries = []; + const pattern = /<dt[^>]*>([\s\S]*?)<\/dt>\s*<dd[^>]*>([\s\S]*?)<\/dd>/gi; + let m; + while ((m = pattern.exec(html)) !== null) { + const dt = stripTags(m[1]).trim(); + const dd = stripTags(m[2]).trim(); + if (!dt || !dd) continue; + + const [name, code] = looksLikeCode(dd) ? [dt, dd] : [dd, dt]; + if (!looksLikeCode(code)) continue; + + entries.push({ name, code: code.replace(/\s+/g, ""), category: "General" }); + } + return entries; +} + +function parseInlineCheats(html) { + const entries = []; + const pattern = /class="code"[^>]*>([0-9A-Fa-f\s]+)<\/[^>]+>[\s\S]*?class="[^"]*name[^"]*"[^>]*>([^<]{2,80})</gi; + let m; + while ((m = pattern.exec(html)) !== null) { + const code = m[1].trim().replace(/\s+/g, ""); + const name = decodeHTMLEntities(m[2].trim()); + if (!code || !name) continue; + entries.push({ name, code, category: "General" }); + } + return entries; +} + +// ─── Utility functions ──────────────────────────────────────────────────────── + +function stripTags(html) { + return decodeHTMLEntities(html.replace(/<[^>]+>/g, "")).trim(); +} + +function looksLikeCode(s) { + const hex = s.trim(); + if (hex.length < 4) return false; + return /^[0-9A-Fa-f\s+]{4,}$/.test(hex); +} + +function normalise(s) { + return s.toLowerCase() + .replace(/\(usa\)/g, "") + .replace(/\(europe\)/g, "") + .replace(/\(japan\)/g, "") + .replace(/\(world\)/g, "") + .trim(); +} + +function diceSimilarity(a, b) { + if (a === b) return 1.0; + const aGrams = bigrams(a); + const bGrams = bigrams(b); + if (aGrams.size === 0 || bGrams.size === 0) return 0; + + let intersection = 0; + let aTotal = 0; + let bTotal = 0; + + for (const [gram, aCount] of aGrams) { + aTotal += aCount; + const bCount = bGrams.get(gram) || 0; + intersection += Math.min(aCount, bCount); + } + for (const bCount of bGrams.values()) { + bTotal += bCount; + } + + return 2.0 * intersection / (aTotal + bTotal); +} + +function bigrams(s) { + const freq = new Map(); + for (let i = 0; i < s.length - 1; i++) { + const gram = s.slice(i, i + 2); + freq.set(gram, (freq.get(gram) || 0) + 1); + } + return freq; +} + +function decodeHTMLEntities(s) { + return s + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/ /g, " "); +} diff --git a/Scripts/cheat-proxy/wrangler.toml b/Scripts/cheat-proxy/wrangler.toml new file mode 100644 index 0000000000..ece0dd0d78 --- /dev/null +++ b/Scripts/cheat-proxy/wrangler.toml @@ -0,0 +1,20 @@ +name = "provenance-cheat-proxy" +main = "worker.js" +compatibility_date = "2024-01-01" +compatibility_flags = ["nodejs_compat"] + +# Cloudflare KV namespace for caching cheat results (24h TTL per title+system pair). +# Create it with: wrangler kv:namespace create CHEAT_CACHE +# Then replace the id below with the output from that command. +[[kv_namespaces]] +binding = "CHEAT_CACHE" +id = "REPLACE_WITH_YOUR_KV_NAMESPACE_ID" + +[vars] +# Optional: set ALLOWED_ORIGINS to restrict CORS if you deploy to a custom domain. +# ALLOWED_ORIGINS = "https://provenance-emu.com" + +# Route configuration — update with your custom domain if applicable. +# [[routes]] +# pattern = "cheats.provenance-emu.com/*" +# zone_name = "provenance-emu.com"