Skip to content

Commit 9d9281c

Browse files
committed
Parse PAX header and extract path
1 parent c9a2f08 commit 9d9281c

File tree

1 file changed

+188
-15
lines changed

1 file changed

+188
-15
lines changed

ext/phar/tar.c

Lines changed: 188 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,128 @@ static zend_result phar_tar_process_metadata(phar_entry_info *entry, php_stream
198198
}
199199
/* }}} */
200200

201+
/* Parses a PAX header and callbacks into `cb` for each valid key-value pair,
202+
* passing along a `ctx` pointer to use for storing data when desired. */
203+
static const char *phar_parse_pax_header(char *ptr, uint32_t size, void *ctx, const char *(*cb)(const char *, uint32_t, const char *, void *))
204+
{
205+
const char *pax_data_end = ptr + size;
206+
while (ptr < pax_data_end) {
207+
/* Format: "%d %s=%s\n" */
208+
char *endptr;
209+
char *blank = memchr(ptr, ' ', pax_data_end - ptr);
210+
if (!blank) {
211+
break;
212+
}
213+
*blank = '\0';
214+
size_t kv_size = strtoull(ptr, &endptr, 10);
215+
/* blank, non-empty keyword, equals, newline terminator */
216+
if (endptr != blank || kv_size < 4 || kv_size > pax_data_end - ptr) {
217+
break;
218+
}
219+
220+
/* Check terminator */
221+
char *record_end = ptr + kv_size;
222+
if (record_end[-1] != '\n') {
223+
break;
224+
}
225+
226+
/* Validate keyword */
227+
const char *key = blank + 1;
228+
const char *equals = memchr(key, '=', record_end - key);
229+
if (!equals || key == equals) {
230+
break;
231+
}
232+
233+
const char *err = cb(key, equals - key, record_end, ctx);
234+
if (err) {
235+
return err;
236+
}
237+
238+
ptr = record_end;
239+
}
240+
241+
return NULL;
242+
}
243+
244+
struct phar_pax_local {
245+
zend_string *filename_override;
246+
bool is_persistent;
247+
};
248+
249+
static const char *phar_pax_parse_path(const char *key, uint32_t key_len, const char *record_end, struct phar_pax_local *pax)
250+
{
251+
const char *filename_start = key + key_len + 1;
252+
size_t pax_local_filename_len = record_end - filename_start;
253+
if (pax_local_filename_len <= 1) {
254+
return "invalid path length";
255+
}
256+
257+
/* strip '\n' */
258+
pax_local_filename_len--;
259+
/* Ending '/' stripping */
260+
if (filename_start[pax_local_filename_len - 1] == '/') {
261+
pax_local_filename_len--;
262+
}
263+
264+
/* Last one takes precedence if multiple are provided */
265+
if (pax->filename_override) {
266+
pefree(pax->filename_override, pax->is_persistent);
267+
pax->filename_override = NULL;
268+
}
269+
270+
pax->filename_override = zend_string_init(filename_start, pax_local_filename_len, pax->is_persistent);
271+
if (pax->is_persistent) {
272+
GC_MAKE_PERSISTENT_LOCAL(pax_local_filename);
273+
}
274+
275+
return NULL;
276+
}
277+
278+
static bool phar_pax_parse_hdrcharset(const char *key, uint32_t key_len, const char *record_end)
279+
{
280+
const char *value = key + key_len + 1;
281+
size_t value_len = record_end - value;
282+
283+
if ((value_len == strlen("BINARY") && strncmp(value, "BINARY", strlen("BINARY")) == 0)
284+
#if 0 /* TODO: support UTF-8 to local locale conversion? */
285+
|| (value_len == strlen("ISO-IR 10646 2000 UTF-8") && strncmp(value, "ISO-IR 10646 2000 UTF-8", strlen("ISO-IR 10646 2000 UTF-8")) == 0)
286+
#endif
287+
) {
288+
return true;
289+
} else {
290+
return false;
291+
}
292+
}
293+
294+
static const char *phar_pax_local_cb(const char *key, uint32_t key_len, const char *record_end, void *ctx)
295+
{
296+
if (key_len == strlen("hdrcharset") && memcmp(key, "hdrcharset", strlen("hdrcharset")) == 0) {
297+
if (!phar_pax_parse_hdrcharset(key, key_len, record_end)) {
298+
return "invalid header character set";
299+
}
300+
} else if (key_len == strlen("path") && memcmp(key, "path", strlen("path")) == 0) {
301+
return phar_pax_parse_path(key, key_len, record_end, ctx);
302+
}
303+
304+
return NULL;
305+
}
306+
307+
static const char *phar_pax_global_cb(const char *key, uint32_t key_len, const char *record_end, void *ctx)
308+
{
309+
if (key_len == strlen("hdrcharset") && memcmp(key, "hdrcharset", strlen("hdrcharset")) == 0) {
310+
if (!phar_pax_parse_hdrcharset(key, key_len, record_end)) {
311+
return "invalid header character set";
312+
}
313+
} else if (key_len == strlen("path") && memcmp(key, "path", strlen("path")) == 0) {
314+
/* Some application support this (e.g. GNU tar), others don't (e.g. GNOME file roller).
315+
* This just adds needless complications. */
316+
*(bool *) ctx = true;
317+
return "unsupported global path override";
318+
}
319+
320+
return NULL;
321+
}
322+
201323
zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, char *alias, size_t alias_len, phar_archive_data** pphar, uint32_t compression, char **error) /* {{{ */
202324
{
203325
char buf[512], *actual_alias = NULL, *p;
@@ -206,7 +328,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
206328
tar_header *hdr;
207329
uint32_t sum1, sum2, size, old;
208330
phar_archive_data *myphar, *actual;
209-
bool last_was_longlink = false;
331+
bool last_was_name_override = false;
210332
size_t linkname_len;
211333

212334
if (error) {
@@ -270,13 +392,62 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
270392
size = entry.uncompressed_filesize = entry.compressed_filesize =
271393
phar_tar_oct_number(hdr->size, sizeof(hdr->size));
272394

273-
/* skip global/file headers (pax) */
274-
if (!old && (hdr->typeflag == TAR_GLOBAL_HDR || hdr->typeflag == TAR_FILE_HDR)) {
275-
size = (size+511)&~511;
276-
goto next;
395+
/* Process global/file pax header: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/pax.html */
396+
if (!old && hdr->typeflag == TAR_GLOBAL_HDR) {
397+
size = (size + 511) & ~511;
398+
char *pax_data = emalloc(size);
399+
400+
if (UNEXPECTED(php_stream_read(fp, pax_data, size) != size)) {
401+
efree(pax_data);
402+
goto truncated;
403+
}
404+
405+
bool hard_fail = false;
406+
const char *err = phar_parse_pax_header(pax_data, size, &hard_fail, phar_pax_global_cb);
407+
efree(pax_data);
408+
409+
if (err) {
410+
if (hard_fail) {
411+
/* Likely maliciously formed tar */
412+
spprintf(error, 4096, "phar error: \"%s\" is a corrupted tar file (invalid global pax header: %s)", fname, err);
413+
goto bail;
414+
} else {
415+
/* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
416+
php_error_docref(NULL, E_NOTICE, "Global PAX header component not understood: %s", err);
417+
}
418+
}
419+
420+
goto next_no_seek;
421+
} else if (!old && hdr->typeflag == TAR_FILE_HDR) {
422+
size = (size + 511) & ~511;
423+
char *pax_data = emalloc(size);
424+
425+
if (UNEXPECTED(php_stream_read(fp, pax_data, size) != size)) {
426+
efree(pax_data);
427+
goto truncated;
428+
}
429+
430+
struct phar_pax_local pax;
431+
pax.filename_override = entry.filename;
432+
pax.is_persistent = myphar->is_persistent;
433+
const char *err = phar_parse_pax_header(pax_data, size, &pax, phar_pax_local_cb);
434+
efree(pax_data);
435+
436+
if (pax.filename_override) {
437+
last_was_name_override = true;
438+
entry.filename = pax.filename_override;
439+
}
440+
441+
if (err) {
442+
/* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
443+
php_error_docref(NULL, E_NOTICE, "File PAX header component not understood: %s", err);
444+
}
445+
446+
goto next_no_seek;
277447
}
278448

279-
if (((!old && hdr->prefix[0] == 0) || old) && zend_strnlen(hdr->name, 100) == sizeof(".phar/signature.bin")-1 && !strncmp(hdr->name, ".phar/signature.bin", sizeof(".phar/signature.bin")-1)) {
449+
if ((entry.filename && zend_string_equals_literal(entry.filename, ".phar/signature.bin"))
450+
|| (((!old && hdr->prefix[0] == 0) || old) && !strcmp(hdr->name, ".phar/signature.bin"))) {
280451
zend_off_t curloc;
281452
size_t sig_len;
282453

@@ -352,8 +523,8 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
352523
goto bail;
353524
}
354525

355-
if (!last_was_longlink && hdr->typeflag == 'L') {
356-
last_was_longlink = true;
526+
if (!last_was_name_override && hdr->typeflag == 'L') {
527+
last_was_name_override = true;
357528
/* support the ././@LongLink system for storing long filenames */
358529

359530
/* Check for overflow - bug 61065 */
@@ -401,7 +572,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
401572
goto bail;
402573
}
403574
continue;
404-
} else if (!last_was_longlink && !old && hdr->prefix[0] != 0) {
575+
} else if (!last_was_name_override && !old && hdr->prefix[0] != 0) {
405576
char name[256];
406577
int i, j;
407578

@@ -430,7 +601,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
430601
if (myphar->is_persistent) {
431602
GC_MAKE_PERSISTENT_LOCAL(entry.filename);
432603
}
433-
} else if (!last_was_longlink) {
604+
} else if (!last_was_name_override) {
434605
/* calculate strlen, which can be no longer than 100 */
435606
uint32_t filename_len;
436607
for (filename_len = 0; filename_len < 100; filename_len++) {
@@ -449,7 +620,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
449620
GC_MAKE_PERSISTENT_LOCAL(entry.filename);
450621
}
451622
}
452-
last_was_longlink = false;
623+
last_was_name_override = false;
453624

454625
phar_add_virtual_dirs(myphar, ZSTR_VAL(entry.filename), ZSTR_LEN(entry.filename));
455626

@@ -499,21 +670,22 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
499670

500671
newentry = zend_hash_update_mem(&myphar->manifest, entry.filename, &entry, sizeof(phar_entry_info));
501672
ZEND_ASSERT(newentry != NULL);
673+
entry.filename = NULL;
502674

503675
if (entry.is_persistent) {
504676
++entry.manifest_pos;
505677
}
506678

507-
if (zend_string_starts_with_literal(entry.filename, ".phar/.metadata")) {
679+
if (zend_string_starts_with_literal(newentry->filename, ".phar/.metadata")) {
508680
if (FAILURE == phar_tar_process_metadata(newentry, fp)) {
509681
if (error) {
510-
spprintf(error, 4096, "phar error: tar-based phar \"%s\" has invalid metadata in magic file \"%s\"", fname, ZSTR_VAL(entry.filename));
682+
spprintf(error, 4096, "phar error: tar-based phar \"%s\" has invalid metadata in magic file \"%s\"", fname, ZSTR_VAL(newentry->filename));
511683
}
512684
goto bail;
513685
}
514686
}
515687

516-
if (!actual_alias && zend_string_equals_literal(entry.filename, ".phar/alias.txt")) {
688+
if (!actual_alias && zend_string_equals_literal(newentry->filename, ".phar/alias.txt")) {
517689
/* found explicit alias */
518690
if (size > 511) {
519691
if (error) {
@@ -557,9 +729,9 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
557729
size = (size+511)&~511;
558730

559731
if (((hdr->typeflag == '\0') || (hdr->typeflag == TAR_FILE)) && size > 0) {
560-
next:
561732
/* this is not good enough - seek succeeds even on truncated tars */
562733
php_stream_seek(fp, size, SEEK_CUR);
734+
next_no_seek:
563735
if ((uint32_t)php_stream_tell(fp) > totalsize) {
564736
if (error) {
565737
spprintf(error, 4096, "phar error: \"%s\" is a corrupted tar file (truncated)", fname);
@@ -576,6 +748,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
576748
read = php_stream_read(fp, buf, sizeof(buf));
577749

578750
if (read != sizeof(buf)) {
751+
truncated:
579752
if (error) {
580753
spprintf(error, 4096, "phar error: \"%s\" is a corrupted tar file (truncated)", fname);
581754
}

0 commit comments

Comments
 (0)