1- import type {
2- EntryType ,
3- DirectoryContent ,
4- HeaderOptions ,
5- ReadFileOptions ,
6- WalkDirectoryOptions ,
7- TarOptions ,
8- } from './types' ;
9- import fs from 'fs' ;
10- import path from 'path' ;
11- import { EntryTypes } from './types' ;
1+ import type { FileStat } from './types' ;
2+ import { EntryType , HeaderSize , HeaderOffset } from './types' ;
123import * as errors from './errors' ;
4+ import * as utils from './utils' ;
5+ import * as constants from './constants' ;
136
14- // Set defaults to the options used by the generators
15- const defaultHeaderOptions : HeaderOptions = {
16- fileNameEncoding : 'utf8' ,
17- blockSize : 512 ,
18- } ;
19- const defaultReadFileOptions : ReadFileOptions = {
20- fs : fs . promises ,
21- blockSize : 512 ,
22- } ;
23- const defaultWalkDirectoryOptions : WalkDirectoryOptions = {
24- fs : fs . promises ,
25- blockSize : 512 ,
26- } ;
27- const defaultTarOptions : TarOptions = {
28- fs : fs . promises ,
29- blockSize : 512 ,
30- fileNameEncoding : 'utf8' ,
31- } ;
32-
7+ // Computes the checksum by adding the value of every single byte in the header
338function computeChecksum ( header : Buffer ) : number {
349 if ( ! header . subarray ( 148 , 156 ) . every ( ( byte ) => byte === 32 ) ) {
3510 throw new errors . ErrorVirtualTarInvalidHeader (
@@ -39,24 +14,41 @@ function computeChecksum(header: Buffer): number {
3914 return header . reduce ( ( sum , byte ) => sum + byte , 0 ) ;
4015}
4116
17+ // TODO: Should logging be included?
4218function createHeader (
4319 filePath : string ,
44- stat : fs . Stats ,
20+ stat : FileStat ,
4521 type : EntryType ,
46- options : Partial < HeaderOptions > = defaultHeaderOptions ,
4722) : Buffer {
23+ // TODO: implement long-file-name headers
4824 if ( filePath . length < 1 || filePath . length > 255 ) {
4925 throw new errors . ErrorVirtualTarInvalidFileName (
5026 'The file name must be longer than 1 character and shorter than 255 characters' ,
5127 ) ;
5228 }
5329
54- // Merge the defaults with the provided options
55- const opts : HeaderOptions = { ...defaultHeaderOptions , ...options } ;
30+ // The file path must not contain any directories, and must only contain a
31+ // file name. This guard checks that.
32+ if ( filePath . includes ( '/' ) ) {
33+ throw new errors . ErrorVirtualTarInvalidFileName (
34+ 'File name must not contain /' ,
35+ ) ;
36+ }
37+
38+ // As the size does not matter for directories, it can be undefined. However,
39+ // if the header is being generated for a file, then it needs to have a valid
40+ // size. This guard checks that.
41+ if ( stat . size == null && type === EntryType . FILE ) {
42+ throw new errors . ErrorVirtualTarInvalidStat ( 'Size must be set for files' ) ;
43+ }
44+ const size = type === EntryType . FILE ? stat . size : 0 ;
45+
46+ // The time can be undefined, which would be referring to epoch 0.
47+ const time = utils . dateToUnixTime ( stat . mtime ?? new Date ( ) ) ;
5648
57- const size = type === EntryTypes . FILE ? stat . size : 0 ;
58- const time = parseInt ( ( stat . mtime . getTime ( ) / 1000 ) . toFixed ( 0 ) ) ; // Unix time
59- const header = Buffer . alloc ( opts . blockSize , 0 ) ;
49+ // Make sure to initialise the header with zeros to avoid writing nullish
50+ // blocks.
51+ const header = Buffer . alloc ( constants . BLOCK_SIZE , 0 ) ;
6052
6153 // The TAR headers follow this structure
6254 // Start Size Description
@@ -68,7 +60,7 @@ function createHeader(
6860 // 124 12 File size (null-padded octal, 0 for directories)
6961 // 136 12 Mtime (null-padded octal)
7062 // 148 8 Checksum (fill with ASCII spaces for computation)
71- // 156 1 Type flag (0 for file, 5 for directory)
63+ // 156 1 Type flag ('0' for file, '5' for directory)
7264 // 157 100 File owner name (null-terminated ASCII/UTF-8)
7365 // 257 6 'ustar\0' (magic string)
7466 // 263 2 '00' (ustar version)
@@ -78,119 +70,146 @@ function createHeader(
7870 // 337 8 Device minor (unset in this implementation)
7971 // 345 155 File name (last 155 bytes, total 255 bytes, null-padded)
8072 // 500 12 '\0' (unused)
73+ //
74+ // Note that all values are in ASCII format, which is different from the
75+ // default formatting of UTF-8 for Buffer.write(). All numbers are also in
76+ // octal format as opposed to decimal or hexadecimal.
77+
78+ // The first half of the file name (upto 100 bytes) is stored here.
79+ header . write (
80+ utils . splitFileName ( filePath , 0 , HeaderSize . FILE_NAME ) ,
81+ HeaderOffset . FILE_NAME ,
82+ HeaderSize . FILE_NAME ,
83+ constants . HEADER_ENCODING ,
84+ ) ;
85+
86+ // The file permissions, or the mode, is stored in the next chunk. This is
87+ // stored in an octal number format.
88+ header . write (
89+ utils . pad ( stat . mode ?? '' , HeaderSize . FILE_MODE , '0' , '\0' ) ,
90+ HeaderOffset . FILE_MODE ,
91+ HeaderSize . FILE_MODE ,
92+ constants . HEADER_ENCODING ,
93+ ) ;
94+
95+ // The owner UID is stored in this chunk
96+ header . write (
97+ utils . pad ( stat . uid ?? '' , HeaderSize . OWNER_UID , '0' , '\0' ) ,
98+ HeaderOffset . OWNER_UID ,
99+ HeaderSize . OWNER_UID ,
100+ constants . HEADER_ENCODING ,
101+ ) ;
102+
103+ // The owner GID is stored in this chunk
104+ header . write (
105+ utils . pad ( stat . gid ?? '' , HeaderSize . OWNER_GID , '0' , '\0' ) ,
106+ HeaderOffset . OWNER_GID ,
107+ HeaderSize . OWNER_GID ,
108+ constants . HEADER_ENCODING ,
109+ ) ;
110+
111+ // The file size is stored in this chunk. The file size must be zero for
112+ // directories, and it must be set for files.
113+ header . write (
114+ utils . pad ( size ?? '' , HeaderSize . FILE_SIZE , '0' , '\0' ) ,
115+ HeaderOffset . FILE_SIZE ,
116+ HeaderSize . FILE_SIZE ,
117+ constants . HEADER_ENCODING ,
118+ ) ;
119+
120+ // The file mtime is stored in this chunk. As the mtime is not modified when
121+ // extracting a TAR file, the mtime can be preserved while still getting
122+ // deterministic archives.
123+ header . write (
124+ utils . pad ( time , HeaderSize . FILE_MTIME , '0' , '\0' ) ,
125+ HeaderOffset . FILE_MTIME ,
126+ HeaderSize . FILE_MTIME ,
127+ constants . HEADER_ENCODING ,
128+ ) ;
129+
130+ // The checksum is calculated as the sum of all bytes in the header. It is
131+ // padded using ASCII spaces, as we currently don't have all the data yet.
132+ header . write (
133+ utils . pad ( '' , HeaderSize . CHECKSUM , ' ' ) ,
134+ HeaderOffset . CHECKSUM ,
135+ HeaderSize . CHECKSUM ,
136+ constants . HEADER_ENCODING ,
137+ ) ;
81138
139+ // The type of file is written as a single byte in the header.
82140 header . write (
83- filePath . slice ( 0 , 99 ) . padEnd ( 100 , '\0' ) ,
84- 0 ,
85- 100 ,
86- opts . fileNameEncoding ,
141+ type ,
142+ HeaderOffset . TYPE_FLAG ,
143+ HeaderSize . TYPE_FLAG ,
144+ constants . HEADER_ENCODING ,
87145 ) ;
88- header . write ( stat . mode . toString ( 8 ) . padStart ( 7 , '0' ) + '\0' , 100 , 12 , 'ascii' ) ;
89- header . write ( stat . uid . toString ( 8 ) . padStart ( 7 , '0' ) + '\0' , 108 , 12 , 'ascii' ) ;
90- header . write ( stat . gid . toString ( 8 ) . padStart ( 7 , '0' ) + '\0' , 116 , 12 , 'ascii' ) ;
91- header . write ( size . toString ( 8 ) . padStart ( 7 , '0' ) + '\0' , 124 , 12 , 'ascii' ) ;
92- header . write ( time . toString ( 8 ) . padStart ( 7 , '0' ) + '\0' , 136 , 12 , 'ascii' ) ;
93- header . write ( ' ' , 148 , 8 , 'ascii' ) ; // Placeholder for checksum
94- header . write ( type , 156 , 1 , 'ascii' ) ;
95- // File owner name will be null
96- header . write ( 'ustar\0' , 257 , 'ascii' ) ;
97- header . write ( '00' , 263 , 2 , 'ascii' ) ;
98- // Owner user name will be null
99- // Owner group name will be null
100- // Device major will be null
101- // Device minor will be null
146+
147+ // File owner name will be null, as regular stat-ing cannot extract that
148+ // information.
149+
150+ // This value is the USTAR magic string which makes this file appear as
151+ // a tar file. Without this, the file cannot be parsed and extracted.
152+ header . write (
153+ constants . USTAR_NAME ,
154+ HeaderOffset . USTAR_NAME ,
155+ HeaderSize . USTAR_NAME ,
156+ constants . HEADER_ENCODING ,
157+ ) ;
158+
159+ // This chunk stores the version of USTAR, which is '00' in this case.
160+ header . write (
161+ constants . USTAR_VERSION ,
162+ HeaderOffset . USTAR_VERSION ,
163+ HeaderSize . USTAR_VERSION ,
164+ constants . HEADER_ENCODING ,
165+ ) ;
166+
167+ // Owner user name will be null, as regular stat-ing cannot extract this
168+ // information.
169+
170+ // Owner group name will be null, as regular stat-ing cannot extract this
171+ // information.
172+
173+ // Device major will be null, as this specific to linux kernel knowing what
174+ // drivers to use for executing certain files, and is irrelevant here.
175+
176+ // Device minor will be null, as this specific to linux kernel knowing what
177+ // drivers to use for executing certain files, and is irrelevant here.
178+
179+ // The second half of the file name is entered here. This chunk handles file
180+ // names ranging 100 to 255 characters.
102181 header . write (
103- filePath . slice ( 100 ) . padEnd ( 155 , '\0' ) ,
104- 345 ,
105- 155 ,
106- opts . fileNameEncoding ,
182+ utils . splitFileName (
183+ filePath ,
184+ HeaderSize . FILE_NAME ,
185+ HeaderSize . FILE_NAME_EXTRA ,
186+ ) ,
187+ HeaderOffset . FILE_NAME_EXTRA ,
188+ HeaderSize . FILE_NAME_EXTRA ,
189+ constants . HEADER_ENCODING ,
107190 ) ;
108191
109192 // Updating with the new checksum
110193 const checksum = computeChecksum ( header ) ;
111- header . write ( checksum . toString ( 8 ) . padStart ( 6 , '0' ) + '\0 ' , 148 , 8 , 'ascii' ) ;
112-
113- return header ;
114- }
115194
116- async function * readFile (
117- filePath : string ,
118- options : Partial < ReadFileOptions > = defaultReadFileOptions ,
119- ) : AsyncGenerator < Buffer , void , void > {
120- const opts : ReadFileOptions = { ...defaultReadFileOptions , ...options } ;
121- const fileHandle = await opts . fs . open ( filePath , 'r' ) ;
122- const buffer = Buffer . alloc ( opts . blockSize ) ;
123- let bytesRead = - 1 ; // Initialisation value
124-
125- try {
126- while ( bytesRead !== 0 ) {
127- buffer . fill ( 0 ) ;
128- const result = await fileHandle . read ( buffer , 0 , opts . blockSize , null ) ;
129- bytesRead = result . bytesRead ;
130-
131- if ( bytesRead === 0 ) break ; // EOF reached
132- if ( bytesRead < 512 ) buffer . fill ( 0 , bytesRead , opts . blockSize ) ;
133-
134- yield buffer ;
135- }
136- } finally {
137- await fileHandle . close ( ) ;
138- }
139- }
195+ // Note the extra space in the padding for the checksum value. It is
196+ // intentionally placed there. The padding for checksum is ASCII spaces
197+ // instead of null, which is why it is used like this here.
198+ header . write (
199+ utils . pad ( checksum , HeaderSize . CHECKSUM , '0' , '\0 ' ) ,
200+ HeaderOffset . CHECKSUM ,
201+ HeaderSize . CHECKSUM ,
202+ constants . HEADER_ENCODING ,
203+ ) ;
140204
141- /**
142- * Traverse a directory recursively and yield file entries.
143- */
144- async function * walkDirectory (
145- baseDir : string ,
146- relativePath : string = '' ,
147- options : Partial < WalkDirectoryOptions > = defaultWalkDirectoryOptions ,
148- ) : AsyncGenerator < DirectoryContent > {
149- const opts : WalkDirectoryOptions = {
150- ...defaultWalkDirectoryOptions ,
151- ...options ,
152- } ;
153- const entries = await opts . fs . readdir ( path . join ( baseDir , relativePath ) ) ;
154-
155- // Sort the entries lexicographically
156- for ( const entry of entries . sort ( ) ) {
157- const fullPath = path . join ( baseDir , relativePath , entry ) ;
158- const stat = await opts . fs . stat ( fullPath ) ;
159- const tarPath = path . join ( relativePath , entry ) ;
160-
161- if ( stat . isDirectory ( ) ) {
162- yield { path : tarPath + '/' , stat : stat , type : EntryTypes . DIRECTORY } ;
163- yield * walkDirectory ( baseDir , path . join ( relativePath , entry ) ) ;
164- } else if ( stat . isFile ( ) ) {
165- yield { path : tarPath , stat : stat , type : EntryTypes . FILE } ;
166- }
167- }
205+ return header ;
168206}
169207
170- async function * createTar (
171- baseDir : string ,
172- options : Partial < TarOptions > = defaultTarOptions ,
173- ) : AsyncGenerator < Buffer , void , void > {
174- const opts = { ...defaultTarOptions , ...options } ;
175- const entryGen = walkDirectory ( baseDir , '' , {
176- fs : opts . fs ,
177- blockSize : opts . blockSize ,
178- } ) ;
179-
180- for await ( const entry of entryGen ) {
181- yield createHeader ( entry . path , entry . stat , entry . type ) ;
182-
183- if ( entry . type === EntryTypes . FILE ) {
184- yield * readFile ( path . join ( baseDir , entry . path ) , {
185- fs : opts . fs ,
186- blockSize : opts . blockSize ,
187- } ) ;
188- }
189- }
190-
191- // End-of-archive marker - two 512-byte null blocks
192- yield Buffer . alloc ( opts . blockSize , 0 ) ;
193- yield Buffer . alloc ( opts . blockSize , 0 ) ;
208+ // Creates blocks marking the ned of the header. Returns one buffer of 1024
209+ // bytes filled with nulls. This aligns with the tar end-of-archive marker
210+ // being two null-filled blocks.
211+ function generateEndMarker ( ) {
212+ return [ Buffer . alloc ( 512 , 0 ) , Buffer . alloc ( 512 , 0 ) ] ;
194213}
195214
196- export { createHeader , readFile , createTar } ;
215+ export { createHeader , generateEndMarker } ;
0 commit comments