@@ -198,6 +198,126 @@ static zend_result phar_tar_process_metadata(phar_entry_info *entry, php_stream
198198}
199199/* }}} */
200200
201+ /* Parses a PAX header and callbacks into `cb` for each valid key-value pair,
202+ * passing along a `ctx` pointer to use for storing data when desired. */
203+ static const char * phar_parse_pax_header (char * ptr , uint32_t size , void * ctx , const char * (* cb )(const char * , uint32_t , const char * , void * ))
204+ {
205+ const char * pax_data_end = ptr + size ;
206+ while (ptr < pax_data_end ) {
207+ /* Format: "%d %s=%s\n" */
208+ char * endptr ;
209+ char * blank = memchr (ptr , ' ' , pax_data_end - ptr );
210+ if (!blank ) {
211+ break ;
212+ }
213+ * blank = '\0' ;
214+ size_t kv_size = strtoull (ptr , & endptr , 10 );
215+ /* blank, non-empty keyword, equals, newline terminator */
216+ if (endptr != blank || kv_size < 4 || kv_size > pax_data_end - ptr ) {
217+ break ;
218+ }
219+
220+ /* Check terminator */
221+ char * record_end = ptr + kv_size ;
222+ if (record_end [-1 ] != '\n' ) {
223+ break ;
224+ }
225+
226+ /* Validate keyword */
227+ const char * key = blank + 1 ;
228+ const char * equals = memchr (key , '=' , record_end - key );
229+ if (!equals || key == equals ) {
230+ break ;
231+ }
232+
233+ const char * err = cb (key , equals - key , record_end , ctx );
234+ if (err ) {
235+ return err ;
236+ }
237+
238+ ptr = record_end ;
239+ }
240+
241+ return NULL ;
242+ }
243+
244+ struct phar_pax_local {
245+ zend_string * filename_override ;
246+ bool is_persistent ;
247+ };
248+
249+ static const char * phar_pax_parse_path (const char * key , uint32_t key_len , const char * record_end , struct phar_pax_local * pax )
250+ {
251+ const char * filename_start = key + key_len + 1 ;
252+ size_t pax_local_filename_len = record_end - filename_start ;
253+ if (pax_local_filename_len <= 1 ) {
254+ return "invalid path length" ;
255+ }
256+
257+ /* strip '\n' */
258+ pax_local_filename_len -- ;
259+ /* Ending '/' stripping */
260+ if (filename_start [pax_local_filename_len - 1 ] == '/' ) {
261+ pax_local_filename_len -- ;
262+ }
263+
264+ /* Last one takes precedence if multiple are provided */
265+ if (pax -> filename_override ) {
266+ pefree (pax -> filename_override , pax -> is_persistent );
267+ pax -> filename_override = NULL ;
268+ }
269+
270+ pax -> filename_override = zend_string_init (filename_start , pax_local_filename_len , pax -> is_persistent );
271+ if (pax -> is_persistent ) {
272+ GC_MAKE_PERSISTENT_LOCAL (pax_local_filename );
273+ }
274+
275+ return NULL ;
276+ }
277+
278+ static bool phar_pax_parse_hdrcharset (const char * key , uint32_t key_len , const char * record_end )
279+ {
280+ const char * value = key + key_len + 1 ;
281+ size_t value_len = record_end - value ;
282+
283+ if ((value_len == strlen ("BINARY" ) && strncmp (value , "BINARY" , strlen ("BINARY" )) == 0 )
284+ #if 0 /* TODO: support UTF-8 to local locale conversion? */
285+ || (value_len == strlen ("ISO-IR 10646 2000 UTF-8" ) && strncmp (value , "ISO-IR 10646 2000 UTF-8" , strlen ("ISO-IR 10646 2000 UTF-8" )) == 0 )
286+ #endif
287+ ) {
288+ return true;
289+ } else {
290+ return false;
291+ }
292+ }
293+
294+ static const char * phar_pax_local_cb (const char * key , uint32_t key_len , const char * record_end , void * ctx )
295+ {
296+ if (key_len == strlen ("hdrcharset" ) && memcmp (key , "hdrcharset" , strlen ("hdrcharset" )) == 0 ) {
297+ if (!phar_pax_parse_hdrcharset (key , key_len , record_end )) {
298+ return "invalid header character set" ;
299+ }
300+ } else if (key_len == strlen ("path" ) && memcmp (key , "path" , strlen ("path" )) == 0 ) {
301+ return phar_pax_parse_path (key , key_len , record_end , ctx );
302+ }
303+
304+ return NULL ;
305+ }
306+
307+ static const char * phar_pax_global_cb (const char * key , uint32_t key_len , const char * record_end , void * ctx )
308+ {
309+ if (key_len == strlen ("hdrcharset" ) && memcmp (key , "hdrcharset" , strlen ("hdrcharset" )) == 0 ) {
310+ if (!phar_pax_parse_hdrcharset (key , key_len , record_end )) {
311+ return "invalid header character set" ;
312+ }
313+ } else if (key_len == strlen ("path" ) && memcmp (key , "path" , strlen ("path" )) == 0 ) {
314+ /* Some application support this, others don't. This just adds needless complications. */
315+ return "unsupported global path override" ;
316+ }
317+
318+ return NULL ;
319+ }
320+
201321zend_result phar_parse_tarfile (php_stream * fp , char * fname , size_t fname_len , char * alias , size_t alias_len , phar_archive_data * * pphar , uint32_t compression , char * * error ) /* {{{ */
202322{
203323 char buf [512 ], * actual_alias = NULL , * p ;
@@ -208,6 +328,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
208328 phar_archive_data * myphar , * actual ;
209329 bool last_was_longlink = false;
210330 size_t linkname_len ;
331+ zend_string * pax_local_filename = NULL ;
211332
212333 if (error ) {
213334 * error = NULL ;
@@ -270,13 +391,51 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
270391 size = entry .uncompressed_filesize = entry .compressed_filesize =
271392 phar_tar_oct_number (hdr -> size , sizeof (hdr -> size ));
272393
273- /* skip global/file headers (pax) */
274- if (!old && (hdr -> typeflag == TAR_GLOBAL_HDR || hdr -> typeflag == TAR_FILE_HDR )) {
275- size = (size + 511 )& ~511 ;
276- goto next ;
394+ /* Process global/file pax header: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/pax.html */
395+ if (!old && hdr -> typeflag == TAR_GLOBAL_HDR ) {
396+ size = (size + 511 ) & ~511 ;
397+ char * pax_data = emalloc (size );
398+
399+ if (UNEXPECTED (php_stream_read (fp , pax_data , size ) != size )) {
400+ efree (pax_data );
401+ goto truncated ;
402+ }
403+
404+ const char * err = phar_parse_pax_header (pax_data , size , NULL , phar_pax_global_cb );
405+ efree (pax_data );
406+
407+ if (err ) {
408+ /* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
409+ php_error_docref (NULL , E_NOTICE , "Global PAX header not understood: %s" , err );
410+ }
411+
412+ goto next_no_seek ;
413+ } else if (!old && hdr -> typeflag == TAR_FILE_HDR ) {
414+ size = (size + 511 ) & ~511 ;
415+ char * pax_data = emalloc (size );
416+
417+ if (UNEXPECTED (php_stream_read (fp , pax_data , size ) != size )) {
418+ efree (pax_data );
419+ goto truncated ;
420+ }
421+
422+ struct phar_pax_local pax ;
423+ pax .filename_override = pax_local_filename ;
424+ pax .is_persistent = myphar -> is_persistent ;
425+ const char * err = phar_parse_pax_header (pax_data , size , & pax , phar_pax_local_cb );
426+ pax_local_filename = pax .filename_override ;
427+ efree (pax_data );
428+
429+ if (err ) {
430+ /* Previous versions of PHP just ignored the PAX headers, so let's not hard fail here. */
431+ php_error_docref (NULL , E_NOTICE , "Global PAX header not understood: %s" , err );
432+ }
433+
434+ goto next_no_seek ;
277435 }
278436
279- if (((!old && hdr -> prefix [0 ] == 0 ) || old ) && zend_strnlen (hdr -> name , 100 ) == sizeof (".phar/signature.bin" )- 1 && !strncmp (hdr -> name , ".phar/signature.bin" , sizeof (".phar/signature.bin" )- 1 )) {
437+ if ((pax_local_filename && zend_string_equals_literal (pax_local_filename , ".phar/signature.bin" ))
438+ || (((!old && hdr -> prefix [0 ] == 0 ) || old ) && !strcmp (hdr -> name , ".phar/signature.bin" ))) {
280439 zend_off_t curloc ;
281440 size_t sig_len ;
282441
@@ -285,6 +444,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
285444 spprintf (error , 4096 , "phar error: tar-based phar \"%s\" has signature that is larger than 511 bytes, cannot process" , fname );
286445 }
287446bail :
447+ pefree (pax_local_filename , myphar -> is_persistent );
288448 php_stream_close (fp );
289449 phar_destroy_phar_data (myphar );
290450 return FAILURE ;
@@ -352,7 +512,11 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
352512 goto bail ;
353513 }
354514
355- if (!last_was_longlink && hdr -> typeflag == 'L' ) {
515+ if (pax_local_filename ) {
516+ // TODO: can this conflict with longlink?
517+ entry .filename = pax_local_filename ;
518+ pax_local_filename = NULL ;
519+ } else if (!last_was_longlink && hdr -> typeflag == 'L' ) {
356520 last_was_longlink = true;
357521 /* support the ././@LongLink system for storing long filenames */
358522
@@ -557,9 +721,9 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
557721 size = (size + 511 )& ~511 ;
558722
559723 if (((hdr -> typeflag == '\0' ) || (hdr -> typeflag == TAR_FILE )) && size > 0 ) {
560- next :
561724 /* this is not good enough - seek succeeds even on truncated tars */
562725 php_stream_seek (fp , size , SEEK_CUR );
726+ next_no_seek :
563727 if ((uint32_t )php_stream_tell (fp ) > totalsize ) {
564728 if (error ) {
565729 spprintf (error , 4096 , "phar error: \"%s\" is a corrupted tar file (truncated)" , fname );
@@ -576,6 +740,7 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
576740 read = php_stream_read (fp , buf , sizeof (buf ));
577741
578742 if (read != sizeof (buf )) {
743+ truncated :
579744 if (error ) {
580745 spprintf (error , 4096 , "phar error: \"%s\" is a corrupted tar file (truncated)" , fname );
581746 }
@@ -626,6 +791,8 @@ zend_result phar_parse_tarfile(php_stream* fp, char *fname, size_t fname_len, ch
626791
627792 myphar = actual ;
628793
794+ pefree (pax_local_filename , myphar -> is_persistent );
795+
629796 if (actual_alias ) {
630797 phar_archive_data * fd_ptr ;
631798
0 commit comments