diff --git a/packages/php-wasm/compile/php/php_wasm.c b/packages/php-wasm/compile/php/php_wasm.c index 5a977fa779..7d5d3313a1 100644 --- a/packages/php-wasm/compile/php/php_wasm.c +++ b/packages/php-wasm/compile/php/php_wasm.c @@ -1451,6 +1451,11 @@ static void wasm_sapi_register_server_variables(zval *track_vars_array TSRMLS_DC value = SG(request_info).request_uri; if (value != NULL) { + /** + * REQUEST_URI represents the requested path relative to the site root. + * This is **before** any URL rewriting rules (e.g. apache .htaccess) have been + * applied. + */ php_register_variable("REQUEST_URI", value, track_vars_array TSRMLS_CC); } @@ -1459,7 +1464,9 @@ static void wasm_sapi_register_server_variables(zval *track_vars_array TSRMLS_DC { // Confirm path translated starts with the document root /** - * PHP_SELF is the script path relative to the document root. + * PHP_SELF represents the requested script path resolved to a filesystem path relative to the document + * root. This is after any URL rewriting rules (e.g. apache .htaccess) + * have been applied. * * For example: * @@ -1478,6 +1485,10 @@ static void wasm_sapi_register_server_variables(zval *track_vars_array TSRMLS_DC char *script_name = wasm_server_context->path_translated + strlen(wasm_server_context->document_root); char *script_filename = wasm_server_context->path_translated; char *php_self = wasm_server_context->path_translated + strlen(wasm_server_context->document_root); + /** + * SCRIPT_NAME represents the path to the PHP script being executed after + * any URL rewriting rules (e.g. apache .htaccess) have been applied. + */ php_register_variable("SCRIPT_NAME", estrdup(script_name), track_vars_array TSRMLS_CC); php_register_variable("SCRIPT_FILENAME", estrdup(script_filename), track_vars_array TSRMLS_CC); php_register_variable("PHP_SELF", estrdup(php_self), track_vars_array TSRMLS_CC); diff --git a/packages/php-wasm/node/src/test/php-request-handler.spec.ts b/packages/php-wasm/node/src/test/php-request-handler.spec.ts index 1cf3b8da38..cf3b10bda1 100644 --- a/packages/php-wasm/node/src/test/php-request-handler.spec.ts +++ b/packages/php-wasm/node/src/test/php-request-handler.spec.ts @@ -41,8 +41,8 @@ interface ConfigForRequestTests { absoluteUrl: string | undefined; } -const configsForRequestTests: ConfigForRequestTests[] = - SupportedPHPVersions.map((phpVersion) => { +let configsForRequestTests: ConfigForRequestTests[] = SupportedPHPVersions.map( + (phpVersion) => { const documentRoots = [ '/', // TODO: Re-enable when we can avoid GH workflow cancelation. @@ -64,7 +64,14 @@ const configsForRequestTests: ConfigForRequestTests[] = absoluteUrl, })); }); - }).flat(2); + } +).flat(2); + +if ('PHP' in process.env) { + configsForRequestTests = configsForRequestTests.filter( + (config) => config.phpVersion === process.env['PHP'] + ); +} describe.each(configsForRequestTests)( '[PHP $phpVersion, DocRoot $docRoot, AbsUrl $absoluteUrl] PHPRequestHandler – request', @@ -156,6 +163,7 @@ describe.each(configsForRequestTests)( }); it('should serve a static file with urlencoded entities in the path', async () => { + console.log({ absoluteUrl, docRoot }); php.writeFile( joinPaths(docRoot, 'Screenshot 2024-04-05 at 7.13.36 AM.html'), `Hello World` @@ -670,8 +678,12 @@ describe.each(configsForRequestTests)( } ); -describe.each(SupportedPHPVersions)( - '[PHP %s] PHPRequestHandler – PHP_SELF', +let phpVersions = SupportedPHPVersions; +if ('PHP' in process.env) { + phpVersions = [process.env['PHP']] as any; +} +describe.each(phpVersions)( + '[PHP %s] PHPRequestHandler – $_SERVER entries', (phpVersion) => { let handler: PHPRequestHandler; beforeEach(async () => { @@ -732,6 +744,234 @@ describe.each(SupportedPHPVersions)( expect(response.text).toEqual('/var/www'); }); + + describe('PHP Dev Server scenario (with PATH_INFO)', () => { + it('should set $_SERVER variables correctly for script with PATH_INFO', async () => { + const php = await handler.getPrimaryPhp(); + php.mkdirTree('/var/www/subdir'); + php.writeFile( + '/var/www/subdir/script.php', + ` $_SERVER['REQUEST_URI'], + 'SCRIPT_NAME' => $_SERVER['SCRIPT_NAME'], + 'SCRIPT_FILENAME' => $_SERVER['SCRIPT_FILENAME'], + 'PATH_INFO' => $_SERVER['PATH_INFO'] ?? '(not set)', + 'PHP_SELF' => $_SERVER['PHP_SELF'], + ]); + ` + ); + + const response = await handler.request({ + url: '/subdir/script.php/b.php/c.php', + }); + + const result = response.json; + expect(result['REQUEST_URI']).toEqual( + '/subdir/script.php/b.php/c.php' + ); + expect(result['SCRIPT_NAME']).toEqual('/subdir/script.php'); + expect(result['SCRIPT_FILENAME']).toEqual( + '/var/www/subdir/script.php' + ); + expect(result['PATH_INFO']).toEqual('/b.php/c.php'); + expect(result['PHP_SELF']).toEqual( + '/subdir/script.php/b.php/c.php' + ); + }); + }); + + describe('Apache vanilla request scenario', () => { + it('should set $_SERVER variables correctly for vanilla request with query string', async () => { + const php = await handler.getPrimaryPhp(); + php.mkdirTree('/var/www/subdir'); + php.writeFile( + '/var/www/subdir/script.php', + ` $_SERVER['REQUEST_URI'], + 'SCRIPT_NAME' => $_SERVER['SCRIPT_NAME'], + 'SCRIPT_FILENAME' => $_SERVER['SCRIPT_FILENAME'], + 'PATH_INFO' => $_SERVER['PATH_INFO'] ?? '(not set)', + 'PHP_SELF' => $_SERVER['PHP_SELF'], + 'QUERY_STRING' => $_SERVER['QUERY_STRING'] ?? '', + 'REQUEST_METHOD' => $_SERVER['REQUEST_METHOD'], + 'DOCUMENT_ROOT' => $_SERVER['DOCUMENT_ROOT'], + 'GET_param' => $_GET['param'] ?? '(not set)', + ]); + ` + ); + + const response = await handler.request({ + url: '/subdir/script.php?param=value', + }); + + const result = response.json; + expect(result['REQUEST_URI']).toEqual( + '/subdir/script.php?param=value' + ); + expect(result['SCRIPT_NAME']).toEqual('/subdir/script.php'); + expect(result['SCRIPT_FILENAME']).toEqual( + '/var/www/subdir/script.php' + ); + expect(result['PATH_INFO']).toEqual(''); + // This should actually be a missing key, not an empty string. + // @TODO: Adjust this inconsistency. + // expect(result['PATH_INFO']).toEqual('(not set)'); + expect(result['PHP_SELF']).toEqual('/subdir/script.php'); + expect(result['QUERY_STRING']).toEqual('param=value'); + expect(result['REQUEST_METHOD']).toEqual('GET'); + expect(result['DOCUMENT_ROOT']).toEqual('/var/www'); + expect(result['GET_param']).toEqual('value'); + }); + }); + + describe('Apache rewriting rules scenario', () => { + it('should set $_SERVER variables correctly when rewrite rules are applied', async () => { + const handlerWithRewrite = new PHPRequestHandler({ + phpFactory: async () => + new PHP(await loadNodeRuntime(phpVersion)), + documentRoot: '/var/www', + maxPhpInstances: 1, + rewriteRules: [ + { + match: /^\/api\/v1\/user\/([0-9]+)$/, + replacement: + '/subdir/script.php?endpoint=user&id=$1', + }, + ], + }); + const php = await handlerWithRewrite.getPrimaryPhp(); + php.mkdirTree('/var/www/subdir'); + php.writeFile( + '/var/www/subdir/script.php', + ` $_SERVER['REQUEST_URI'], + 'SCRIPT_NAME' => $_SERVER['SCRIPT_NAME'], + 'SCRIPT_FILENAME' => $_SERVER['SCRIPT_FILENAME'], + 'PATH_INFO' => $_SERVER['PATH_INFO'] ?? '(not set)', + 'PHP_SELF' => $_SERVER['PHP_SELF'], + 'QUERY_STRING' => $_SERVER['QUERY_STRING'] ?? '', + 'GET_endpoint' => $_GET['endpoint'] ?? '(not set)', + 'GET_id' => $_GET['id'] ?? '(not set)', + ]); + ` + ); + + const response = await handlerWithRewrite.request({ + url: '/api/v1/user/123', + }); + + const result = response.json; + // REQUEST_URI should be the original URL (before rewriting) per Apache behavior + expect(result['REQUEST_URI']).toEqual('/api/v1/user/123'); + // SCRIPT_NAME is the path to the script relative to document root + expect(result['SCRIPT_NAME']).toEqual('/subdir/script.php'); + // SCRIPT_FILENAME is the absolute path to the script file + expect(result['SCRIPT_FILENAME']).toEqual( + '/var/www/subdir/script.php' + ); + // PATH_INFO is not set for this type of rewrite + expect(result['PATH_INFO']).toEqual('(not set)'); + // PHP_SELF should be the script path per Apache behavior + expect(result['PHP_SELF']).toEqual('/subdir/script.php'); + // QUERY_STRING should contain the rewritten query parameters + expect(result['QUERY_STRING']).toEqual('endpoint=user&id=123'); + // $_GET should have the parsed query parameters + expect(result['GET_endpoint']).toEqual('user'); + expect(result['GET_id']).toEqual('123'); + + php.exit(); + }); + + it('should preserve original REQUEST_URI while rewriting to a different script', async () => { + const handlerWithRewrite = new PHPRequestHandler({ + phpFactory: async () => + new PHP(await loadNodeRuntime(phpVersion)), + documentRoot: '/var/www', + maxPhpInstances: 1, + rewriteRules: [ + { + match: /^\/pretty\/url/, + replacement: '/index.php?page=pretty', + }, + ], + }); + const php = await handlerWithRewrite.getPrimaryPhp(); + php.writeFile( + '/var/www/index.php', + ` $_SERVER['REQUEST_URI'], + 'PHP_SELF' => $_SERVER['PHP_SELF'], + 'SCRIPT_NAME' => $_SERVER['SCRIPT_NAME'], + ]); + ` + ); + + const response = await handlerWithRewrite.request({ + url: '/pretty/url', + }); + + const result = response.json; + // REQUEST_URI should be the original URL per Apache behavior + expect(result['REQUEST_URI']).toEqual('/pretty/url'); + // PHP_SELF should be the script path per Apache behavior + expect(result['PHP_SELF']).toEqual('/index.php'); + // SCRIPT_NAME is the script path + expect(result['SCRIPT_NAME']).toEqual('/index.php'); + + php.exit(); + }); + + it('should preserve the original query params through URL rewriting', async () => { + const handlerWithRewrite = new PHPRequestHandler({ + phpFactory: async () => + new PHP(await loadNodeRuntime(phpVersion)), + documentRoot: '/var/www', + maxPhpInstances: 1, + rewriteRules: [ + { + match: /^\/pretty\/url/, + replacement: '/index.php?page=pretty', + }, + ], + }); + const php = await handlerWithRewrite.getPrimaryPhp(); + php.writeFile( + '/var/www/index.php', + ` $_SERVER['REQUEST_URI'], + 'PHP_SELF' => $_SERVER['PHP_SELF'], + 'SCRIPT_NAME' => $_SERVER['SCRIPT_NAME'], + 'QUERY_STRING' => $_SERVER['QUERY_STRING'], + ]); + ` + ); + + const response = await handlerWithRewrite.request({ + url: '/pretty/url?foo=bar&page=different-value', + }); + + const result = response.json; + // REQUEST_URI should be the original URL per Apache behavior + expect(result['REQUEST_URI']).toEqual( + '/pretty/url?foo=bar&page=different-value' + ); + // QUERY_STRING should contain all the query parameters: original + rewritten + expect(result['QUERY_STRING']).toEqual( + 'page=pretty&foo=bar&page=different-value' + ); + // PHP_SELF should be the script path per Apache behavior + expect(result['PHP_SELF']).toEqual('/index.php'); + // SCRIPT_NAME is the script path + expect(result['SCRIPT_NAME']).toEqual('/index.php'); + + php.exit(); + }); + }); } ); diff --git a/packages/php-wasm/universal/src/lib/php-request-handler.ts b/packages/php-wasm/universal/src/lib/php-request-handler.ts index e0e380e5fa..19f3e3201f 100644 --- a/packages/php-wasm/universal/src/lib/php-request-handler.ts +++ b/packages/php-wasm/universal/src/lib/php-request-handler.ts @@ -1,4 +1,4 @@ -import { joinPaths } from '@php-wasm/util'; +import { dirname, joinPaths } from '@php-wasm/util'; import { ensurePathPrefix, toRelativeUrl, @@ -361,24 +361,29 @@ export class PHPRequestHandler implements AsyncDisposable { */ async request(request: PHPRequest): Promise { const isAbsolute = URL.canParse(request.url); - const requestedUrl = new URL( + const originalRequestUrl = new URL( // Remove the hash part of the URL as it's not meant for the server. request.url.split('#')[0], isAbsolute ? undefined : DEFAULT_BASE_URL ); - const normalizedRequestedPath = applyRewriteRules( + const rewrittenRequestUrl = this.#applyRewriteRules(originalRequestUrl); + const primaryPhp = await this.getPrimaryPhp(); + let fsPath = joinPaths( + this.#DOCROOT, + /** + * Turn a URL such as `https://playground/scope:my-site/wp-admin/index.php` + * into a site-relative path, such as `/wp-admin/index.php`. + */ removePathPrefix( - decodeURIComponent(requestedUrl.pathname), + /** + * URL.pathname returns a URL-encoded path. We need to decode it + * before using it as a filesystem path. + */ + decodeURIComponent(rewrittenRequestUrl.pathname), this.#PATHNAME - ), - this.rewriteRules + ) ); - - const primaryPhp = await this.getPrimaryPhp(); - - let fsPath = joinPaths(this.#DOCROOT, normalizedRequestedPath); - if (primaryPhp.isDir(fsPath)) { // Ensure directory URIs have a trailing slash. Otherwise, // relative URIs in index.php or index.html files are relative @@ -404,7 +409,7 @@ export class PHPRequestHandler implements AsyncDisposable { if (!fsPath.endsWith('/')) { return new PHPResponse( 301, - { Location: [`${requestedUrl.pathname}/`] }, + { Location: [`${rewrittenRequestUrl.pathname}/`] }, new Uint8Array(0) ); } @@ -415,6 +420,38 @@ export class PHPRequestHandler implements AsyncDisposable { const possibleIndexPath = joinPaths(fsPath, possibleIndexFile); if (primaryPhp.isFile(possibleIndexPath)) { fsPath = possibleIndexPath; + + // Include the resolved index file in the final rewritten request URL. + rewrittenRequestUrl.pathname = joinPaths( + rewrittenRequestUrl.pathname, + possibleIndexFile + ); + break; + } + } + } + + if (!primaryPhp.isFile(fsPath)) { + /** + * Try resolving a partial path. + * + * Example: + * + * – Request URL: /file.php/index.php + * – Document Root: /var/www + * + * If /var/www/file.php/index.php does not exist, but /var/www/file.php does, + * use /var/www/file.php. This is also what Apache and PHP Dev Server do. + */ + let pathToTry = rewrittenRequestUrl.pathname; + while (true) { + pathToTry = dirname(pathToTry); + if (pathToTry === '/' || !pathToTry.includes('/')) { + // We've tried all segments for a partial path. + break; + } + if (primaryPhp.isFile(joinPaths(this.#DOCROOT, pathToTry))) { + fsPath = joinPaths(this.#DOCROOT, pathToTry); break; } } @@ -422,7 +459,7 @@ export class PHPRequestHandler implements AsyncDisposable { if (!primaryPhp.isFile(fsPath)) { const fileNotFoundAction = this.getFileNotFoundAction( - normalizedRequestedPath + rewrittenRequestUrl.pathname ); switch (fileNotFoundAction.type) { case 'response': @@ -447,13 +484,10 @@ export class PHPRequestHandler implements AsyncDisposable { // file-not-found fallback actions may redirect to non-existent files. if (primaryPhp.isFile(fsPath)) { if (fsPath.endsWith('.php')) { - const effectiveRequest: PHPRequest = { - ...request, - // Pass along URL with the #fragment filtered out - url: requestedUrl.toString(), - }; const response = await this.#spawnPHPAndDispatchRequest( - effectiveRequest, + request, + originalRequestUrl, + rewrittenRequestUrl, fsPath ); @@ -480,6 +514,32 @@ export class PHPRequestHandler implements AsyncDisposable { } } + /** + * Apply the rewrite rules to the original request URL. + * + * @param originalRequestUrl - The original request URL. + * @returns The rewritten request URL. + */ + #applyRewriteRules(originalRequestUrl: URL): URL { + const siteRelativePath = removePathPrefix( + decodeURIComponent(originalRequestUrl.pathname), + this.#PATHNAME + ); + const rewrittenRequestPath = applyRewriteRules( + siteRelativePath, + this.rewriteRules + ); + const rewrittenRequestUrl = new URL( + joinPaths(this.#PATHNAME, rewrittenRequestPath), + originalRequestUrl.toString() + ); + // Merge the query string parameters from the original request URL. + for (const [key, value] of originalRequestUrl.searchParams.entries()) { + rewrittenRequestUrl.searchParams.append(key, value); + } + return rewrittenRequestUrl; + } + /** * Serves a static file from the PHP filesystem. * @@ -508,6 +568,8 @@ export class PHPRequestHandler implements AsyncDisposable { */ async #spawnPHPAndDispatchRequest( request: PHPRequest, + originalRequestUrl: URL, + rewrittenRequestUrl: URL, scriptPath: string ): Promise { let spawnedPHP: SpawnedPHP | undefined = undefined; @@ -526,6 +588,8 @@ export class PHPRequestHandler implements AsyncDisposable { return await this.#dispatchToPHP( spawnedPHP.php, request, + originalRequestUrl, + rewrittenRequestUrl, scriptPath ); } finally { @@ -543,6 +607,8 @@ export class PHPRequestHandler implements AsyncDisposable { async #dispatchToPHP( php: PHP, request: PHPRequest, + originalRequestUrl: URL, + rewrittenRequestUrl: URL, scriptPath: string ): Promise { let preferredMethod: PHPRunOptions['method'] = 'GET'; @@ -566,18 +632,16 @@ export class PHPRequestHandler implements AsyncDisposable { try { const response = await php.run({ relativeUri: ensurePathPrefix( - toRelativeUrl(new URL(request.url)), + toRelativeUrl(new URL(rewrittenRequestUrl.toString())), this.#PATHNAME ), protocol: this.#PROTOCOL, method: request.method || preferredMethod, - $_SERVER: { - REMOTE_ADDR: '127.0.0.1', - DOCUMENT_ROOT: this.#DOCROOT, - HTTPS: this.#ABSOLUTE_URL.startsWith('https://') - ? 'on' - : '', - }, + $_SERVER: this.prepare$_SERVER( + originalRequestUrl, + rewrittenRequestUrl, + scriptPath + ), body, scriptPath, headers, @@ -598,6 +662,232 @@ export class PHPRequestHandler implements AsyncDisposable { } } + /** + * Computes the essential $_SERVER entries for a request. + * + * php_wasm.c sets some defaults, assuming it runs as a CLI script. + * This function overrides them with the values correct in the request + * context. + * + * @TODO: Consolidate the $_SERVER setting logic into a single place instead + * of splitting it between the C SAPI and the TypeScript code. The PHP + * class has a `.cli()` method that could take care of the CLI-specific + * $_SERVER values. + * + * Path and URL-related $_SERVER entries are theoretically documented + * at https://www.php.net/manual/en/reserved.variables.server.php, + * but that page is not very helpful in practice. Here are tables derived + * by interacting with PHP servers: + * + * ## PHP Dev Server + * + * Setup: + * – `/home/adam/subdir/script.php` file contains `` + * – `php -S 127.0.0.1:8041` running in `/home/adam` directory + * – A request is sent to `http://127.0.0.1:8041/subdir/script.php/b.php/c.php` + * + * Results: + * + * $_SERVER['REQUEST_URI'] | `/subdir/script.php/b.php/c.php` + * $_SERVER['SCRIPT_NAME'] | `/subdir/script.php` + * $_SERVER['SCRIPT_FILENAME']| `/home/adam/subdir/script.php` + * $_SERVER['PATH_INFO'] | `/b.php/c.php` + * $_SERVER['PHP_SELF'] | `/subdir/script.php/b.php/c.php` + * + * ## Apache – rewriting rules + * + * Setup: + * – `/var/www/html/subdir/script.php` file contains `` + * – Apache is listening on port 8041 + * – The document root is `/var/www/html` + * – A request is sent to `http://127.0.0.1:8041/api/v1/user/123` + * + * .htaccess file: + * + * ```apache + * RewriteEngine On + * RewriteRule ^api/v1/user/([0-9]+)$ /subdir/script.php?endpoint=user&id=$1 [L,QSA] + * ``` + * + * Results: + * + * ``` + * $_SERVER['REQUEST_URI'] | /api/v1/user/123 + * $_SERVER['SCRIPT_NAME'] | /subdir/script.php + * $_SERVER['SCRIPT_FILENAME'] | /var/www/html/subdir/script.php + * $_SERVER['PATH_INFO'] | (key not set) + * $_SERVER['PHP_SELF'] | /subdir/script.php + * $_SERVER['QUERY_STRING'] | endpoint=user&id=123 + * $_SERVER['REDIRECT_STATUS'] | 200 + * $_SERVER['REDIRECT_URL'] | /api/v1/user/123 + * $_SERVER['REDIRECT_QUERY_STRING'] | endpoint=user&id=123 + * === $_GET Variables === + * $_GET['endpoint'] | user + * $_GET['id'] | 123 + * ``` + * + * ## Apache – vanilla request + * + * Setup: + * – The same as above. + * – A request sent http://localhost:8041/subdir/script.php?param=value + * + * Results: + * + * ``` + * $_SERVER['REQUEST_URI'] | /subdir/script.php?param=value + * $_SERVER['SCRIPT_NAME'] | /subdir/script.php + * $_SERVER['SCRIPT_FILENAME'] | /var/www/html/subdir/script.php + * $_SERVER['PATH_INFO'] | (key not set) + * $_SERVER['PHP_SELF'] | /subdir/script.php + * $_SERVER['REDIRECT_URL'] | (key not set) + * $_SERVER['REDIRECT_STATUS'] | (key not set) + * $_SERVER['QUERY_STRING'] | param=value + * $_SERVER['REQUEST_METHOD'] | GET + * $_SERVER['DOCUMENT_ROOT'] | /var/www/html + * + * === $_GET Variables === + * $_GET['param'] | value + * ``` + */ + private prepare$_SERVER( + originalRequestUrl: URL, + rewrittenRequestUrl: URL, + resolvedScriptPath: string + ): Record { + const $_SERVER: Record = { + REMOTE_ADDR: '127.0.0.1', + DOCUMENT_ROOT: this.#DOCROOT, + HTTPS: this.#ABSOLUTE_URL.startsWith('https://') ? 'on' : '', + }; + + /** + * REQUEST_URI + * + * The original path + query string extracted from the requested URL + * **before** applying any URL rewriting. + */ + $_SERVER['REQUEST_URI'] = + originalRequestUrl.pathname + originalRequestUrl.search; + + if (resolvedScriptPath.startsWith(this.#DOCROOT)) { + /** + * SCRIPT_NAME + * + * > Contains the current script's path. This is useful for pages + * > which need to point to themselves. + * + * Filesystem path of the script relative to the document root. + * Note this is a filesystem path so URL rewriting is not applicable here. + */ + $_SERVER['SCRIPT_NAME'] = resolvedScriptPath.substring( + this.#DOCROOT.length + ); + + /** + * PHP_SELF – the path sourced from the final **request URL** after the + * rewrite rules have been applied. + * + * php.net documentation is very misleading on this one: + * + * > The filename of the currently executing script, relative + * > to the document root. For instance, $_SERVER['PHP_SELF'] + * > in a script at the address http://example.com/foo/bar.php + * > would be /foo/bar.php. + * + * @see https://www.php.net/manual/en/reserved.variables.server.php#:~:text=PHP_SELF + * + * This is not what Apache, nor what the PHP dev server do: + * + * – Document Root: /var/www + * – Script file: /var/www/subdir/script.php + * – Requesting /subdir/script.php/b.php/c.php + * + * $_SERVER['PHP_SELF'] = "/subdir/script.php/b.php/c.php" + * + * So, in that regard, it is a URL path, not a filesystem path. + * + * When URL rewriting is involved, it's the same. + * + * Consider this Apache example from above: + * + * – Document Root: /var/www/html + * – Script file: /var/www/html/subdir/script.php + * – Rewrite rule: ^api/v1/user/([0-9]+)$ /subdir/script.php?endpoint=user&id=$1 [L,QSA] + * – Requesting /api/v1/user/123 + * + * $_SERVER['PHP_SELF'] = "/subdir/script.php" + * + * So, on the face value, this is a filesystem path. However, see + * what happens if we slightly modify that rewrite rule to: + * + * – Rewrite rule: ^api/v1/user/([0-9]+)$ /subdir/script.php/next.php + * ^^^^^^^^^ + * – Requesting /api/v1/user/123 + * + * $_SERVER['PHP_SELF'] = "/subdir/script.php/next.php" + * + * So: + * * PHP_SELF is not sourced from the filesystem path. + * * PHP_SELF is sourced from the final request URL after the + * rewrite rules have been applied. + */ + $_SERVER['PHP_SELF'] = rewrittenRequestUrl.pathname; + + /** + * PATH_INFO + * + * > Contains any client-provided pathname information trailing the actual + * > script filename but preceding the query string, if available. For instance, + * > if the current script was accessed via the URI http://www.example.com/php/path_info.php/some/stuff?foo=bar, + * > then $_SERVER['PATH_INFO'] would contain /some/stuff. + * + * This **does not** include the query string. + * + * @see https://www.php.net/manual/en/reserved.variables.server.php#:~:text=PATH_INFO + */ + if ($_SERVER['REQUEST_URI'].startsWith($_SERVER['SCRIPT_NAME'])) { + $_SERVER['PATH_INFO'] = $_SERVER['REQUEST_URI'].substring( + $_SERVER['SCRIPT_NAME'].length + ); + // Remove the query string if present. + if ($_SERVER['PATH_INFO'].includes('?')) { + $_SERVER['PATH_INFO'] = $_SERVER['PATH_INFO'].substring( + 0, + $_SERVER['PATH_INFO'].indexOf('?') + ); + } + } + } + + /** + * QUERY_STRING + * + * The query string from the original and rewritten request URLs. + * Does not include the leading question mark. + * + * Note it contains all the query parameters from the original + * URL merged with the new parameters from the rewritten request URLs. + * + * Example: + * – Original request URL: /pretty/url?foo=bar&page=different-value + * – Rewritten request URL: /pretty/url?page=pretty + * – QUERY_STRING: page=pretty&foo=bar&page=different-value + */ + $_SERVER['QUERY_STRING'] = rewrittenRequestUrl.search.substring(1); + + /** + * There's a few relevant entries we are NOT setting here: + * + * – SCRIPT_FILENAME: Absolute path to the script file. It is set by + * php_wasm.c. + * – REDIRECT_STATUS: Apache sets it, but it's optional so we skip it. + * – REDIRECT_URL: Apache sets it, but it's optional so we skip it. + * – REDIRECT_QUERY_STRING: Apache sets it, but it's optional so we skip it. + */ + return $_SERVER; + } + async [Symbol.asyncDispose]() { await this.processManager[Symbol.asyncDispose](); } @@ -629,7 +919,8 @@ export function inferMimeType(path: string): string { export function applyRewriteRules(path: string, rules: RewriteRule[]): string { for (const rule of rules) { if (new RegExp(rule.match).test(path)) { - return path.replace(rule.match, rule.replacement); + path = path.replace(rule.match, rule.replacement); + break; } } return path; diff --git a/packages/php-wasm/universal/src/lib/php.ts b/packages/php-wasm/universal/src/lib/php.ts index 501ba0e5c0..ac7463e58f 100644 --- a/packages/php-wasm/universal/src/lib/php.ts +++ b/packages/php-wasm/universal/src/lib/php.ts @@ -684,6 +684,10 @@ export class PHP implements Disposable { for (const key in $_SERVER) { this.#setServerGlobalEntry(key, $_SERVER[key]); } + this.#setServerGlobalEntry( + 'PHP_SELF', + request.relativeUri || '' + ); const env = request.env || {}; for (const key in env) { @@ -770,6 +774,7 @@ export class PHP implements Disposable { $_SERVER[`${HTTP_prefix}${name.toUpperCase().replace(/-/g, '_')}`] = headers[name]; } + return $_SERVER; } diff --git a/packages/php-wasm/universal/src/lib/universal-php.ts b/packages/php-wasm/universal/src/lib/universal-php.ts index c886d9c976..6e884401ee 100644 --- a/packages/php-wasm/universal/src/lib/universal-php.ts +++ b/packages/php-wasm/universal/src/lib/universal-php.ts @@ -115,7 +115,9 @@ export interface PHPRequest { export interface PHPRunOptions { /** - * Request path following the domain:port part. + * Request path following the domain:port part – + * after any URL rewriting rules (e.g. apache .htaccess) + * have been applied. */ relativeUri?: string; diff --git a/packages/playground/blueprints/src/lib/steps/enable-multisite.ts b/packages/playground/blueprints/src/lib/steps/enable-multisite.ts index 0ceca20800..38c72d6f36 100644 --- a/packages/playground/blueprints/src/lib/steps/enable-multisite.ts +++ b/packages/playground/blueprints/src/lib/steps/enable-multisite.ts @@ -58,6 +58,6 @@ export const enableMultisite: StepHandler = async ( }); await wpCLI(playground, { - command: 'wp core multisite-convert', + command: 'wp core multisite-convert --base=' + sitePath, }); }; diff --git a/packages/playground/remote/vite.config.ts b/packages/playground/remote/vite.config.ts index 3ac33d023d..c02170abe2 100644 --- a/packages/playground/remote/vite.config.ts +++ b/packages/playground/remote/vite.config.ts @@ -85,7 +85,7 @@ export default defineConfig(({ mode }) => { server: { port: remoteDevServerPort, host: remoteDevServerHost, - allowedHosts: ['playground.test'], + allowedHosts: ['playground.test', 'playground-preview.test'], fs: { // Allow serving files from the 'packages' directory allow: ['../../'], diff --git a/packages/playground/wordpress/src/rewrite-rules.ts b/packages/playground/wordpress/src/rewrite-rules.ts index e56adfc08b..1c1b4cbdda 100644 --- a/packages/playground/wordpress/src/rewrite-rules.ts +++ b/packages/playground/wordpress/src/rewrite-rules.ts @@ -1,11 +1,164 @@ import type { RewriteRule } from '@php-wasm/universal'; /** - * The default rewrite rules for WordPress. + * WordPress rewrite rules adapted for Playground. + * + * These rules are matched against the requested path without the site path prefix. + * + * For example: + * + * * The site URL is `https://playground.wordpress.net/scope:ambitious-chic-country/`. + * * The site path prefix is `/scope:ambitious-chic-country/`. + * * The requested URL is `https://playground.wordpress.net/scope:ambitious-chic-country/wp-admin/index.php`, + * * The requested path without the site path prefix is `/wp-admin/index.php`. + * + * And so, the rewrite rules are matched against `/wp-admin/index.php`. + * This is similar to setting the `RewriteBase` to `/scope:ambitious-chic-country`. + * + * ## Rationale + * + * WordPress does not use a single, static set of rewrite rules. Rather, it generates + * its own .htaccess file based on the current configuration using the save_mod_rewrite_rules() + * function: + * + * https://developer.wordpress.org/reference/functions/save_mod_rewrite_rules/ + * + * Here's a few examples of what that .htaccess might look like for different + * WordPress configurations: + * + * ### Vanilla WordPress single-site installation + * + * ```apache + * RewriteBase / + * RewriteRule ^index\.php$ - [L] + * RewriteCond %{REQUEST_FILENAME} !-f + * RewriteCond %{REQUEST_FILENAME} !-d + * RewriteRule . /index.php [L] + * ``` + * + * ### Single-site installation living at a /subdirectory/ + * + * ```apache + * # https://developer.wordpress.org/advanced-administration/server/wordpress-in-directory/: + * RewriteCond %{REQUEST_URI} !^/subdirectory/ + * RewriteCond %{REQUEST_FILENAME} !-f + * RewriteCond %{REQUEST_FILENAME} !-d + * RewriteRule ^(.*)$ /subdirectory/$1 + * RewriteRule ^(/)?$ subdirectory/index.php [L] + * ``` + * + * Some sources also set the RewriteBase to `/subdirectory/`. + * + * ### Multisite installation using subfolder network type + * + * ```apache + * # https://wordpress.org/documentation/article/htaccess/#multisite + * + * RewriteBase / + * RewriteRule ^index\.php$ - [L] + * + * // add a trailing slash to /wp-admin + * RewriteRule ^([_0-9a-zA-Z-]+/)?wp-admin$ $1wp-admin/ [R=301,L] + * + * RewriteCond %{REQUEST_FILENAME} -f [OR] + * RewriteCond %{REQUEST_FILENAME} -d + * RewriteRule ^ - [L] + * RewriteRule ^([_0-9a-zA-Z-]+/)?(wp-(content|admin|includes).*) $2 [L] + * RewriteRule ^([_0-9a-zA-Z-]+/)?(.*\.php)$ $2 [L] + * RewriteRule . index.php [L] + * ``` + * + * # Multisite living at /scope:ambitious-chic-country/ + * + * ```apache + * RewriteBase /scope:ambitious-chic-country/ + * RewriteRule ^index\.php$ - [L] + * + * // Add a trailing slash to /wp-admin + * RewriteRule ^([_0-9a-zA-Z-]+/)?wp-admin$ $1wp-admin/ [R=301,L] + * + * RewriteCond %{REQUEST_FILENAME} -f [OR] + * RewriteCond %{REQUEST_FILENAME} -d + * RewriteRule ^ - [L] + * + * // The `wordpress/` prefix matches the document root, but seeing + * // it here is unexpected. @TODO: Why is it being added by WordPress? + * RewriteRule ^([_0-9a-zA-Z-]+/)?(wp-(content|admin|includes).*) wordpress/$2 [L] + * RewriteRule ^([_0-9a-zA-Z-]+/)?(.*\.php)$ wordpress/$2 [L] + * RewriteRule . index.php [L] + * ``` + * + * ## .htaccess syntax + * + * Here's an excerpt/summary from the .htaccess documentation [^1][^2] for + * convenience: + * + * The mod_rewrite module uses a rule-based rewriting engine, based + * on a PCRE regular-expression parser, to rewrite requested URLs on + * the fly. By default, mod_rewrite maps a URL to a filesystem path. + * However, it can also be used to redirect one URL to another URL, + * or to invoke an internal proxy fetch. + * + * ## RewriteBase Directive + * + * The RewriteBase directive specifies the URL prefix to be used for + * per-directory (htaccess) RewriteRule directives that substitute a + * relative path. + * + * Syntax: + * RewriteBase URL-path + * + * (Setting RewriteBase to "/" makes it possible to use RewriteRule + * patterns that **do not** start with a slash.) + * + * ## RewriteRule Directive + * + * Defines rules for the rewriting engine. + * + * Syntax: + * RewriteRule Pattern Substitution [flags] + * + * ## Flags + * + * - L|Last + * Stop processing the rule set. In most contexts, this means + * that if the rule matches, no further rules will be processed + * + * - NC|No Case + * Ignore case when matching. + * + * - R|Redirect + * Causes a HTTP redirect to be issued to the browser. + * + * ## Differences with .htaccess + * + * [1] https://httpd.apache.org/docs/current/rewrite/intro.html + * [2] https://httpd.apache.org/docs/current/rewrite/flags.html */ export const wordPressRewriteRules: RewriteRule[] = [ + /** + * Substitutes the multisite WordPress rewrite rule: + * + * RewriteBase / + * RewriteRule ^([_0-9a-zA-Z-]+/)?(wp-(content|admin|includes).*) $2 [L] + */ { - match: /^\/(.*?)(\/wp-(content|admin|includes)\/.*)/g, + match: new RegExp( + /* The .htaccess rule does not have an explicit initial slash, + but it's still implied by `RewriteBase /` */ + `^(/[_0-9a-zA-Z-]+)?` + + /** + * Avoid discarding the initial slash of the rewritten URL. + * .htaccess places the trailing slash in the first group. It + * relies on the implicit `RewriteBase /` again – the final, + * rewritten URL still has the `/` at the beginning. This rule + * does not have an implied RewriteBase, so the only way to preserve + * the `/` at the beginning is to avoid replacing it. + */ + '(/' + + // The rest of the pattern is the same: + `wp-(content|admin|includes)/.*)` + ), replacement: '$2', }, ]; diff --git a/packages/playground/wordpress/src/test/rewrite-rules.spec.ts b/packages/playground/wordpress/src/test/rewrite-rules.spec.ts index ab7de08e7d..6d4feb99db 100644 --- a/packages/playground/wordpress/src/test/rewrite-rules.spec.ts +++ b/packages/playground/wordpress/src/test/rewrite-rules.spec.ts @@ -37,10 +37,32 @@ describe('Test WordPress rewrites', () => { ); }); - it('Should strip multisite prefix and scope', async () => { + it('Should only target the initial wp-admin|wp-content|wp-includes path (1)', async () => { expect( applyRewriteRules( - '/scope:0.1/test/wp-content/themes/twentytwentyfour/assets/images/windows.webp', + '/wp-content/themes/Newspaper/includes/wp-booster/wp-admin/images/plugins/tagdiv-small.png', + wordPressRewriteRules + ) + ).toBe( + '/wp-content/themes/Newspaper/includes/wp-booster/wp-admin/images/plugins/tagdiv-small.png' + ); + }); + + it('Should only target the initial wp-admin|wp-content|wp-includes path (2)', async () => { + expect( + applyRewriteRules( + '/wp-content/themes/Newspaper/includes/wp-booster/wp-content/images/plugins/tagdiv-small.png', + wordPressRewriteRules + ) + ).toBe( + '/wp-content/themes/Newspaper/includes/wp-booster/wp-content/images/plugins/tagdiv-small.png' + ); + }); + + it('Should not strip wp-content prefix from a path', async () => { + expect( + applyRewriteRules( + '/wp-content/themes/twentytwentyfour/assets/images/windows.webp', wordPressRewriteRules ) ).toBe(