Skip to content

Commit a4f46d6

Browse files
committed
Parse PAX header and extract path
1 parent e44cdd5 commit a4f46d6

File tree

1 file changed

+174
-7
lines changed

1 file changed

+174
-7
lines changed

ext/phar/tar.c

Lines changed: 174 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,126 @@ static zend_result phar_tar_process_metadata(phar_entry_info *entry, php_stream
198198
}
199199
/* }}} */
200200

201+
/* Parses a PAX header and callbacks into `cb` for each valid key-value pair,
202+
* passing along a `ctx` pointer to use for storing data when desired. */
203+
static const char *phar_parse_pax_header(char *ptr, uint32_t size, void *ctx, const char *(*cb)(const char *, uint32_t, const char *, void *))
204+
{
205+
const char *pax_data_end = ptr + size;
206+
while (ptr < pax_data_end) {
207+
/* Format: "%d %s=%s\n" */
208+
char *endptr;
209+
char *blank = memchr(ptr, ' ', pax_data_end - ptr);
210+
if (!blank) {
211+
break;
212+
}
213+
*blank = '\0';
214+
size_t kv_size = strtoull(ptr, &endptr, 10);
215+
/* blank, non-empty keyword, equals, newline terminator */
216+
if (endptr != blank || kv_size < 4 || kv_size > pax_data_end - ptr) {
217+
break;
218+
}
219+
220+
/* Check terminator */
221+
char *record_end = ptr + kv_size;
222+
if (record_end[-1] != '\n') {
223+
break;
224+
}
225+
226+
/* Validate keyword */
227+
const char *key = blank + 1;
228+
const char *equals = memchr(key, '=', record_end - key);
229+
if (!equals || key == equals) {
230+
break;
231+
}
232+
233+
const char *err = cb(key, equals - key, record_end, ctx);
234+
if (err) {
235+
return err;
236+
}
237+
238+
ptr = record_end;
239+
}
240+
241+
return NULL;
242+
}
243+
244+
struct phar_pax_local {
245+
zend_string *filename_override;
246+
bool is_persistent;
247+
};
248+
249+
static const char *phar_pax_parse_path(const char *key, uint32_t key_len, const char *record_end, struct phar_pax_local *pax)
250+
{
251+
const char *filename_start = key + key_len + 1;
252+
size_t pax_local_filename_len = record_end - filename_start;
253+
if (pax_local_filename_len <= 1) {
254+
return "invalid path length";
255+
}
256+
257+
/* strip '\n' */
258+
pax_local_filename_len--;
259+
/* Ending '/' stripping */
260+
if (filename_start[pax_local_filename_len - 1] == '/') {
261+
pax_local_filename_len--;
262+
}
263+
264+
/* Last one takes precedence if multiple are provided */
265+
if (pax->filename_override) {
266+
pefree(pax->filename_override, pax->is_persistent);
267+
pax->filename_override = NULL;
268+
}
269+
270+
pax->filename_override = zend_string_init(filename_start, pax_local_filename_len, pax->is_persistent);
271+
if (pax->is_persistent) {
272+
GC_MAKE_PERSISTENT_LOCAL(pax_local_filename);
273+
}
274+
275+
return NULL;
276+
}
277+
278+
static bool phar_pax_parse_hdrcharset(const char *key, uint32_t key_len, const char *record_end)
279+
{
280+
const char *value = key + key_len + 1;
281+
size_t value_len = record_end - value;
282+
283+
if ((value_len == strlen("BINARY") && strncmp(value, "BINARY", strlen("BINARY")) == 0)
284+
#if 0 /* TODO: support UTF-8 to local locale conversion? */
285+
|| (value_len == strlen("ISO-IR 10646 2000 UTF-8") && strncmp(value, "ISO-IR 10646 2000 UTF-8", strlen("ISO-IR 10646 2000 UTF-8")) == 0)
286+
#endif
287+
) {
288+
return true;
289+
} else {
290+
return false;
291+
}
292+
}
293+
294+
static const char *phar_pax_local_cb(const char *key, uint32_t key_len, const char *record_end, void *ctx)
295+
{
296+
if (key_len == strlen("hdrcharset") && memcmp(key, "hdrcharset", strlen("hdrcharset")) == 0) {
297+
if (!phar_pax_parse_hdrcharset(key, key_len, record_end)) {
298+
return "invalid header character set";
299+
}
300+
} else if (key_len == strlen("path") && memcmp(key, "path", strlen("path")) == 0) {
301+
return phar_pax_parse_path(key, key_len, record_end, ctx);
302+
}
303+
304+
return NULL;
305+
}
306+
307+
static const char *phar_pax_global_cb(const char *key, uint32_t key_len, const char *record_end, void *ctx)
308+
{
309+
if (key_len == strlen("hdrcharset") && memcmp(key, "hdrcharset", strlen("hdrcharset")) == 0) {
310+
if (!phar_pax_parse_hdrcharset(key, key_len, record_end)) {
311+
return "invalid header character set";
312+
}
313+
} else if (key_len == strlen("path") && memcmp(key, "path", strlen("path")) == 0) {
314+
/* Some application support this, others don't. This just adds needless complications. */
315+
return "unsupported global path override";
316+
}
317+
318+
return NULL;
319+
}
320+
201321
zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, char *alias, size_t alias_len, phar_archive_data** pphar, uint32_t compression, char **error) /* {{{ */
202322
{
203323
char buf[512], *actual_alias = NULL, *p;
@@ -208,6 +328,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
208328
phar_archive_data *myphar, *actual;
209329
bool last_was_longlink = false;
210330
size_t linkname_len;
331+
zend_string *pax_local_filename = NULL;
211332

212333
if (error) {
213334
*error = NULL;
@@ -270,13 +391,51 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
270391
size = entry.uncompressed_filesize = entry.compressed_filesize =
271392
phar_tar_oct_number(hdr->size, sizeof(hdr->size));
272393

273-
/* skip global/file headers (pax) */
274-
if (!old && (hdr->typeflag == TAR_GLOBAL_HDR || hdr->typeflag == TAR_FILE_HDR)) {
275-
size = (size+511)&~511;
276-
goto next;
394+
/* Process global/file pax header: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/pax.html */
395+
if (!old && hdr->typeflag == TAR_GLOBAL_HDR) {
396+
size = (size + 511) & ~511;
397+
char *pax_data = emalloc(size);
398+
399+
if (UNEXPECTED(php_stream_read(fp, pax_data, size) != size)) {
400+
efree(pax_data);
401+
goto truncated;
402+
}
403+
404+
const char *err = phar_parse_pax_header(pax_data, size, NULL, phar_pax_global_cb);
405+
efree(pax_data);
406+
407+
if (err) {
408+
/* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
409+
php_error_docref(NULL, E_NOTICE, "Global PAX header not understood: %s", err);
410+
}
411+
412+
goto next_no_seek;
413+
} else if (!old && hdr->typeflag == TAR_FILE_HDR) {
414+
size = (size + 511) & ~511;
415+
char *pax_data = emalloc(size);
416+
417+
if (UNEXPECTED(php_stream_read(fp, pax_data, size) != size)) {
418+
efree(pax_data);
419+
goto truncated;
420+
}
421+
422+
struct phar_pax_local pax;
423+
pax.filename_override = pax_local_filename;
424+
pax.is_persistent = myphar->is_persistent;
425+
const char *err = phar_parse_pax_header(pax_data, size, &pax, phar_pax_local_cb);
426+
pax_local_filename = pax.filename_override;
427+
efree(pax_data);
428+
429+
if (err) {
430+
/* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
431+
php_error_docref(NULL, E_NOTICE, "Global PAX header not understood: %s", err);
432+
}
433+
434+
goto next_no_seek;
277435
}
278436

279-
if (((!old && hdr->prefix[0] == 0) || old) && zend_strnlen(hdr->name, 100) == sizeof(".phar/signature.bin")-1 && !strncmp(hdr->name, ".phar/signature.bin", sizeof(".phar/signature.bin")-1)) {
437+
if ((pax_local_filename && zend_string_equals_literal(pax_local_filename, ".phar/signature.bin"))
438+
|| (((!old && hdr->prefix[0] == 0) || old) && !strcmp(hdr->name, ".phar/signature.bin"))) {
280439
zend_off_t curloc;
281440
size_t sig_len;
282441

@@ -285,6 +444,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
285444
spprintf(error, 4096, "phar error: tar-based phar \"%s\" has signature that is larger than 511 bytes, cannot process", fname);
286445
}
287446
bail:
447+
pefree(pax_local_filename, myphar->is_persistent);
288448
php_stream_close(fp);
289449
phar_destroy_phar_data(myphar);
290450
return FAILURE;
@@ -352,7 +512,11 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
352512
goto bail;
353513
}
354514

355-
if (!last_was_longlink && hdr->typeflag == 'L') {
515+
if (pax_local_filename) {
516+
// TODO: can this conflict with longlink?
517+
entry.filename = pax_local_filename;
518+
pax_local_filename = NULL;
519+
} else if (!last_was_longlink && hdr->typeflag == 'L') {
356520
last_was_longlink = true;
357521
/* support the ././@LongLink system for storing long filenames */
358522

@@ -557,9 +721,9 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
557721
size = (size+511)&~511;
558722

559723
if (((hdr->typeflag == '\0') || (hdr->typeflag == TAR_FILE)) && size > 0) {
560-
next:
561724
/* this is not good enough - seek succeeds even on truncated tars */
562725
php_stream_seek(fp, size, SEEK_CUR);
726+
next_no_seek:
563727
if ((uint32_t)php_stream_tell(fp) > totalsize) {
564728
if (error) {
565729
spprintf(error, 4096, "phar error: \"%s\" is a corrupted tar file (truncated)", fname);
@@ -576,6 +740,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
576740
read = php_stream_read(fp, buf, sizeof(buf));
577741

578742
if (read != sizeof(buf)) {
743+
truncated:
579744
if (error) {
580745
spprintf(error, 4096, "phar error: \"%s\" is a corrupted tar file (truncated)", fname);
581746
}
@@ -626,6 +791,8 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
626791

627792
myphar = actual;
628793

794+
pefree(pax_local_filename, myphar->is_persistent);
795+
629796
if (actual_alias) {
630797
phar_archive_data *fd_ptr;
631798

0 commit comments

Comments
 (0)