1515 */
1616class Tar extends Archive
1717{
18+ const READ_CHUNK_SIZE = 1048576 ; // 1MB
1819
1920 protected $ file = '' ;
2021 protected $ comptype = Archive::COMPRESS_AUTO ;
@@ -23,6 +24,9 @@ class Tar extends Archive
2324 protected $ memory = '' ;
2425 protected $ closed = true ;
2526 protected $ writeaccess = false ;
27+ protected $ position = 0 ;
28+ protected $ contentUntil = 0 ;
29+ protected $ skipUntil = 0 ;
2630
2731 /**
2832 * Sets the compression to use
@@ -72,6 +76,7 @@ public function open($file)
7276 throw new ArchiveIOException ('Could not open file for reading: ' .$ this ->file );
7377 }
7478 $ this ->closed = false ;
79+ $ this ->position = 0 ;
7580 }
7681
7782 /**
@@ -118,12 +123,37 @@ public function yieldContents()
118123 continue ;
119124 }
120125
121- $ this ->skipbytes (ceil ($ header ['size ' ] / 512 ) * 512 );
126+ $ this ->contentUntil = $ this ->position + $ header ['size ' ];
127+ $ this ->skipUntil = $ this ->position + ceil ($ header ['size ' ] / 512 ) * 512 ;
128+
122129 yield $ this ->header2fileinfo ($ header );
130+
131+ $ skip = $ this ->skipUntil - $ this ->position ;
132+ if ($ skip > 0 ) {
133+ $ this ->skipbytes ($ skip );
134+ }
123135 }
124136
125137 $ this ->close ();
138+ }
126139
140+ /**
141+ * Reads content of a current archive entry.
142+ *
143+ * Works only when iterating trough the archive using the generator returned
144+ * by the yieldContents().
145+ *
146+ * @param int $length maximum number of bytes to read
147+ *
148+ * @return string
149+ */
150+ public function readCurrentEntry ($ length = PHP_INT_MAX )
151+ {
152+ $ length = (int ) min ($ length , $ this ->contentUntil - $ this ->position );
153+ if ($ length === 0 ) {
154+ return '' ;
155+ }
156+ return $ this ->readbytes ($ length );
127157 }
128158
129159 /**
@@ -290,16 +320,27 @@ public function addFile($file, $fileinfo = '')
290320 throw new ArchiveIOException ('Could not open file for reading: ' . $ file );
291321 }
292322 while (!feof ($ fp )) {
293- $ data = fread ( $ fp , 512 );
294- $ read += strlen ( $ data );
323+ // for performance reasons read bigger chunks at once
324+ $ data = fread ( $ fp , self :: READ_CHUNK_SIZE );
295325 if ($ data === false ) {
296326 break ;
297327 }
298328 if ($ data === '' ) {
299329 break ;
300330 }
301- $ packed = pack ("a512 " , $ data );
302- $ this ->writebytes ($ packed );
331+ $ dataLen = strlen ($ data );
332+ $ read += $ dataLen ;
333+ // how much of data read fully fills 512-byte blocks?
334+ $ passLen = ($ dataLen >> 9 ) << 9 ;
335+ if ($ passLen === $ dataLen ) {
336+ // all - just write the data
337+ $ this ->writebytes ($ data );
338+ } else {
339+ // directly write what fills 512-byte blocks fully
340+ $ this ->writebytes (substr ($ data , 0 , $ passLen ));
341+ // pad the reminder to 512 bytes
342+ $ this ->writebytes (pack ("a512 " , substr ($ data , $ passLen )));
343+ }
303344 }
304345 fclose ($ fp );
305346
@@ -335,8 +376,11 @@ public function addData($fileinfo, $data)
335376 $ fileinfo ->setSize ($ len );
336377 $ this ->writeFileHeader ($ fileinfo );
337378
338- for ($ s = 0 ; $ s < $ len ; $ s += 512 ) {
339- $ this ->writebytes (pack ("a512 " , substr ($ data , $ s , 512 )));
379+ // write directly everything but the last block which needs padding
380+ $ passLen = ($ len >> 9 ) << 9 ;
381+ $ this ->writebytes (substr ($ data , 0 , $ passLen ));
382+ if ($ passLen < $ len ) {
383+ $ this ->writebytes (pack ("a512 " , substr ($ data , $ passLen , 512 )));
340384 }
341385
342386 if (is_callable ($ this ->callback )) {
@@ -439,12 +483,14 @@ public function save($file)
439483 protected function readbytes ($ length )
440484 {
441485 if ($ this ->comptype === Archive::COMPRESS_GZIP ) {
442- return @gzread ($ this ->fh , $ length );
486+ $ ret = @gzread ($ this ->fh , $ length );
443487 } elseif ($ this ->comptype === Archive::COMPRESS_BZIP ) {
444- return @bzread ($ this ->fh , $ length );
488+ $ ret = @bzread ($ this ->fh , $ length );
445489 } else {
446- return @fread ($ this ->fh , $ length );
490+ $ ret = @fread ($ this ->fh , $ length );
447491 }
492+ $ this ->position += strlen ($ ret );
493+ return $ ret ;
448494 }
449495
450496 /**
@@ -494,6 +540,7 @@ protected function skipbytes($bytes)
494540 } else {
495541 @fseek ($ this ->fh , $ bytes , SEEK_CUR );
496542 }
543+ $ this ->position += $ bytes ;
497544 }
498545
499546 /**
@@ -553,8 +600,8 @@ protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $
553600 $ uid = sprintf ("%6s " , decoct ($ uid ));
554601 $ gid = sprintf ("%6s " , decoct ($ gid ));
555602 $ perm = sprintf ("%6s " , decoct ($ perm ));
556- $ size = sprintf ( " %11s " , decoct ( $ size) );
557- $ mtime = sprintf ( " %11s " , decoct ( $ mtime ) );
603+ $ size = self :: numberEncode ( $ size, 12 );
604+ $ mtime = self :: numberEncode ( $ size , 12 );
558605
559606 $ data_first = pack ("a100a8a8a8a12A12 " , $ name , $ perm , $ uid , $ gid , $ size , $ mtime );
560607 $ data_last = pack ("a1a100a6a2a32a32a8a8a155a12 " , $ typeflag , '' , 'ustar ' , '' , '' , '' , '' , '' , $ prefix , "" );
@@ -614,8 +661,8 @@ protected function parseHeader($block)
614661 $ return ['perm ' ] = OctDec (trim ($ header ['perm ' ]));
615662 $ return ['uid ' ] = OctDec (trim ($ header ['uid ' ]));
616663 $ return ['gid ' ] = OctDec (trim ($ header ['gid ' ]));
617- $ return ['size ' ] = OctDec ( trim ( $ header ['size ' ]) );
618- $ return ['mtime ' ] = OctDec ( trim ( $ header ['mtime ' ]) );
664+ $ return ['size ' ] = self :: numberDecode ( $ header ['size ' ]);
665+ $ return ['mtime ' ] = self :: numberDecode ( $ header ['mtime ' ]);
619666 $ return ['typeflag ' ] = $ header ['typeflag ' ];
620667 $ return ['link ' ] = trim ($ header ['link ' ]);
621668 $ return ['uname ' ] = trim ($ header ['uname ' ]);
@@ -713,4 +760,64 @@ public function filetype($file)
713760 return Archive::COMPRESS_NONE ;
714761 }
715762
763+ /**
764+ * Decodes numeric values according to the
765+ * https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
766+ * (basically with support for big numbers)
767+ *
768+ * @param string $field
769+ * $return int
770+ */
771+ static public function numberDecode ($ field )
772+ {
773+ $ firstByte = ord (substr ($ field , 0 , 1 ));
774+ if ($ firstByte === 255 ) {
775+ $ value = -1 << (8 * strlen ($ field ));
776+ $ shift = 0 ;
777+ for ($ i = strlen ($ field ) - 1 ; $ i >= 0 ; $ i --) {
778+ $ value += ord (substr ($ field , $ i , 1 )) << $ shift ;
779+ $ shift += 8 ;
780+ }
781+ } elseif ($ firstByte === 128 ) {
782+ $ value = 0 ;
783+ $ shift = 0 ;
784+ for ($ i = strlen ($ field ) - 1 ; $ i > 0 ; $ i --) {
785+ $ value += ord (substr ($ field , $ i , 1 )) << $ shift ;
786+ $ shift += 8 ;
787+ }
788+ } else {
789+ $ value = octdec (trim ($ field ));
790+ }
791+ return $ value ;
792+ }
793+
794+ /**
795+ * Encodes numeric values according to the
796+ * https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
797+ * (basically with support for big numbers)
798+ *
799+ * @param int $value
800+ * @param int $length field length
801+ * @return string
802+ */
803+ static public function numberEncode ($ value , $ length )
804+ {
805+ // old implementations leave last byte empty
806+ // octal encoding encodes three bits per byte
807+ $ maxValue = 1 << (($ length - 1 ) * 3 );
808+ if ($ value < 0 ) {
809+ // PHP already stores integers as 2's complement
810+ $ value = pack (PHP_INT_SIZE === 8 ? 'J ' : 'N ' , (int ) $ value );
811+ $ encoded = str_repeat (chr (255 ), max (1 , $ length - PHP_INT_SIZE ));
812+ $ encoded .= substr ($ value , max (0 , PHP_INT_SIZE - $ length + 1 ));
813+ } elseif ($ value >= $ maxValue ) {
814+ $ value = pack (PHP_INT_SIZE === 8 ? 'J ' : 'N ' , (int ) $ value );
815+ $ encoded = chr (128 ) . str_repeat (chr (0 ), max (0 , $ length - PHP_INT_SIZE - 1 ));
816+ $ encoded .= substr ($ value , max (0 , PHP_INT_SIZE - $ length + 1 ));
817+ } else {
818+ $ encoded = sprintf ("% " . ($ length - 1 ) . "s " , decoct ($ value ));
819+ }
820+ return $ encoded ;
821+ }
716822}
823+
0 commit comments