Skip to content

Commit 09ab1c7

Browse files
committed
feat: restructured project to be more functional
1 parent 166c2e9 commit 09ab1c7

File tree

6 files changed

+315
-194
lines changed

6 files changed

+315
-194
lines changed

src/Generator.ts

Lines changed: 159 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,10 @@
1-
import type {
2-
EntryType,
3-
DirectoryContent,
4-
HeaderOptions,
5-
ReadFileOptions,
6-
WalkDirectoryOptions,
7-
TarOptions,
8-
} from './types';
9-
import fs from 'fs';
10-
import path from 'path';
11-
import { EntryTypes } from './types';
1+
import type { FileStat } from './types';
2+
import { EntryType, HeaderSize, HeaderOffset } from './types';
123
import * as errors from './errors';
4+
import * as utils from './utils';
5+
import * as constants from './constants';
136

14-
// Set defaults to the options used by the generators
15-
const defaultHeaderOptions: HeaderOptions = {
16-
fileNameEncoding: 'utf8',
17-
blockSize: 512,
18-
};
19-
const defaultReadFileOptions: ReadFileOptions = {
20-
fs: fs.promises,
21-
blockSize: 512,
22-
};
23-
const defaultWalkDirectoryOptions: WalkDirectoryOptions = {
24-
fs: fs.promises,
25-
blockSize: 512,
26-
};
27-
const defaultTarOptions: TarOptions = {
28-
fs: fs.promises,
29-
blockSize: 512,
30-
fileNameEncoding: 'utf8',
31-
};
32-
7+
// Computes the checksum by adding the value of every single byte in the header
338
function computeChecksum(header: Buffer): number {
349
if (!header.subarray(148, 156).every((byte) => byte === 32)) {
3510
throw new errors.ErrorVirtualTarInvalidHeader(
@@ -39,24 +14,41 @@ function computeChecksum(header: Buffer): number {
3914
return header.reduce((sum, byte) => sum + byte, 0);
4015
}
4116

17+
// TODO: Should logging be included?
4218
function createHeader(
4319
filePath: string,
44-
stat: fs.Stats,
20+
stat: FileStat,
4521
type: EntryType,
46-
options: Partial<HeaderOptions> = defaultHeaderOptions,
4722
): Buffer {
23+
// TODO: implement long-file-name headers
4824
if (filePath.length < 1 || filePath.length > 255) {
4925
throw new errors.ErrorVirtualTarInvalidFileName(
5026
'The file name must be longer than 1 character and shorter than 255 characters',
5127
);
5228
}
5329

54-
// Merge the defaults with the provided options
55-
const opts: HeaderOptions = { ...defaultHeaderOptions, ...options };
30+
// The file path must not contain any directories, and must only contain a
31+
// file name. This guard checks that.
32+
if (filePath.includes('/')) {
33+
throw new errors.ErrorVirtualTarInvalidFileName(
34+
'File name must not contain /',
35+
);
36+
}
37+
38+
// As the size does not matter for directories, it can be undefined. However,
39+
// if the header is being generated for a file, then it needs to have a valid
40+
// size. This guard checks that.
41+
if (stat.size == null && type === EntryType.FILE) {
42+
throw new errors.ErrorVirtualTarInvalidStat('Size must be set for files');
43+
}
44+
const size = type === EntryType.FILE ? stat.size : 0;
45+
46+
// The time can be undefined, which would be referring to epoch 0.
47+
const time = utils.dateToUnixTime(stat.mtime ?? new Date());
5648

57-
const size = type === EntryTypes.FILE ? stat.size : 0;
58-
const time = parseInt((stat.mtime.getTime() / 1000).toFixed(0)); // Unix time
59-
const header = Buffer.alloc(opts.blockSize, 0);
49+
// Make sure to initialise the header with zeros to avoid writing nullish
50+
// blocks.
51+
const header = Buffer.alloc(constants.BLOCK_SIZE, 0);
6052

6153
// The TAR headers follow this structure
6254
// Start Size Description
@@ -68,7 +60,7 @@ function createHeader(
6860
// 124 12 File size (null-padded octal, 0 for directories)
6961
// 136 12 Mtime (null-padded octal)
7062
// 148 8 Checksum (fill with ASCII spaces for computation)
71-
// 156 1 Type flag (0 for file, 5 for directory)
63+
// 156 1 Type flag ('0' for file, '5' for directory)
7264
// 157 100 File owner name (null-terminated ASCII/UTF-8)
7365
// 257 6 'ustar\0' (magic string)
7466
// 263 2 '00' (ustar version)
@@ -78,119 +70,146 @@ function createHeader(
7870
// 337 8 Device minor (unset in this implementation)
7971
// 345 155 File name (last 155 bytes, total 255 bytes, null-padded)
8072
// 500 12 '\0' (unused)
73+
//
74+
// Note that all values are in ASCII format, which is different from the
75+
// default formatting of UTF-8 for Buffer.write(). All numbers are also in
76+
// octal format as opposed to decimal or hexadecimal.
77+
78+
// The first half of the file name (upto 100 bytes) is stored here.
79+
header.write(
80+
utils.splitFileName(filePath, 0, HeaderSize.FILE_NAME),
81+
HeaderOffset.FILE_NAME,
82+
HeaderSize.FILE_NAME,
83+
constants.HEADER_ENCODING,
84+
);
85+
86+
// The file permissions, or the mode, is stored in the next chunk. This is
87+
// stored in an octal number format.
88+
header.write(
89+
utils.pad(stat.mode ?? '', HeaderSize.FILE_MODE, '0', '\0'),
90+
HeaderOffset.FILE_MODE,
91+
HeaderSize.FILE_MODE,
92+
constants.HEADER_ENCODING,
93+
);
94+
95+
// The owner UID is stored in this chunk
96+
header.write(
97+
utils.pad(stat.uid ?? '', HeaderSize.OWNER_UID, '0', '\0'),
98+
HeaderOffset.OWNER_UID,
99+
HeaderSize.OWNER_UID,
100+
constants.HEADER_ENCODING,
101+
);
102+
103+
// The owner GID is stored in this chunk
104+
header.write(
105+
utils.pad(stat.gid ?? '', HeaderSize.OWNER_GID, '0', '\0'),
106+
HeaderOffset.OWNER_GID,
107+
HeaderSize.OWNER_GID,
108+
constants.HEADER_ENCODING,
109+
);
110+
111+
// The file size is stored in this chunk. The file size must be zero for
112+
// directories, and it must be set for files.
113+
header.write(
114+
utils.pad(size ?? '', HeaderSize.FILE_SIZE, '0', '\0'),
115+
HeaderOffset.FILE_SIZE,
116+
HeaderSize.FILE_SIZE,
117+
constants.HEADER_ENCODING,
118+
);
119+
120+
// The file mtime is stored in this chunk. As the mtime is not modified when
121+
// extracting a TAR file, the mtime can be preserved while still getting
122+
// deterministic archives.
123+
header.write(
124+
utils.pad(time, HeaderSize.FILE_MTIME, '0', '\0'),
125+
HeaderOffset.FILE_MTIME,
126+
HeaderSize.FILE_MTIME,
127+
constants.HEADER_ENCODING,
128+
);
129+
130+
// The checksum is calculated as the sum of all bytes in the header. It is
131+
// padded using ASCII spaces, as we currently don't have all the data yet.
132+
header.write(
133+
utils.pad('', HeaderSize.CHECKSUM, ' '),
134+
HeaderOffset.CHECKSUM,
135+
HeaderSize.CHECKSUM,
136+
constants.HEADER_ENCODING,
137+
);
81138

139+
// The type of file is written as a single byte in the header.
82140
header.write(
83-
filePath.slice(0, 99).padEnd(100, '\0'),
84-
0,
85-
100,
86-
opts.fileNameEncoding,
141+
type,
142+
HeaderOffset.TYPE_FLAG,
143+
HeaderSize.TYPE_FLAG,
144+
constants.HEADER_ENCODING,
87145
);
88-
header.write(stat.mode.toString(8).padStart(7, '0') + '\0', 100, 12, 'ascii');
89-
header.write(stat.uid.toString(8).padStart(7, '0') + '\0', 108, 12, 'ascii');
90-
header.write(stat.gid.toString(8).padStart(7, '0') + '\0', 116, 12, 'ascii');
91-
header.write(size.toString(8).padStart(7, '0') + '\0', 124, 12, 'ascii');
92-
header.write(time.toString(8).padStart(7, '0') + '\0', 136, 12, 'ascii');
93-
header.write(' ', 148, 8, 'ascii'); // Placeholder for checksum
94-
header.write(type, 156, 1, 'ascii');
95-
// File owner name will be null
96-
header.write('ustar\0', 257, 'ascii');
97-
header.write('00', 263, 2, 'ascii');
98-
// Owner user name will be null
99-
// Owner group name will be null
100-
// Device major will be null
101-
// Device minor will be null
146+
147+
// File owner name will be null, as regular stat-ing cannot extract that
148+
// information.
149+
150+
// This value is the USTAR magic string which makes this file appear as
151+
// a tar file. Without this, the file cannot be parsed and extracted.
152+
header.write(
153+
constants.USTAR_NAME,
154+
HeaderOffset.USTAR_NAME,
155+
HeaderSize.USTAR_NAME,
156+
constants.HEADER_ENCODING,
157+
);
158+
159+
// This chunk stores the version of USTAR, which is '00' in this case.
160+
header.write(
161+
constants.USTAR_VERSION,
162+
HeaderOffset.USTAR_VERSION,
163+
HeaderSize.USTAR_VERSION,
164+
constants.HEADER_ENCODING,
165+
);
166+
167+
// Owner user name will be null, as regular stat-ing cannot extract this
168+
// information.
169+
170+
// Owner group name will be null, as regular stat-ing cannot extract this
171+
// information.
172+
173+
// Device major will be null, as this specific to linux kernel knowing what
174+
// drivers to use for executing certain files, and is irrelevant here.
175+
176+
// Device minor will be null, as this specific to linux kernel knowing what
177+
// drivers to use for executing certain files, and is irrelevant here.
178+
179+
// The second half of the file name is entered here. This chunk handles file
180+
// names ranging 100 to 255 characters.
102181
header.write(
103-
filePath.slice(100).padEnd(155, '\0'),
104-
345,
105-
155,
106-
opts.fileNameEncoding,
182+
utils.splitFileName(
183+
filePath,
184+
HeaderSize.FILE_NAME,
185+
HeaderSize.FILE_NAME_EXTRA,
186+
),
187+
HeaderOffset.FILE_NAME_EXTRA,
188+
HeaderSize.FILE_NAME_EXTRA,
189+
constants.HEADER_ENCODING,
107190
);
108191

109192
// Updating with the new checksum
110193
const checksum = computeChecksum(header);
111-
header.write(checksum.toString(8).padStart(6, '0') + '\0 ', 148, 8, 'ascii');
112-
113-
return header;
114-
}
115194

116-
async function* readFile(
117-
filePath: string,
118-
options: Partial<ReadFileOptions> = defaultReadFileOptions,
119-
): AsyncGenerator<Buffer, void, void> {
120-
const opts: ReadFileOptions = { ...defaultReadFileOptions, ...options };
121-
const fileHandle = await opts.fs.open(filePath, 'r');
122-
const buffer = Buffer.alloc(opts.blockSize);
123-
let bytesRead = -1; // Initialisation value
124-
125-
try {
126-
while (bytesRead !== 0) {
127-
buffer.fill(0);
128-
const result = await fileHandle.read(buffer, 0, opts.blockSize, null);
129-
bytesRead = result.bytesRead;
130-
131-
if (bytesRead === 0) break; // EOF reached
132-
if (bytesRead < 512) buffer.fill(0, bytesRead, opts.blockSize);
133-
134-
yield buffer;
135-
}
136-
} finally {
137-
await fileHandle.close();
138-
}
139-
}
195+
// Note the extra space in the padding for the checksum value. It is
196+
// intentionally placed there. The padding for checksum is ASCII spaces
197+
// instead of null, which is why it is used like this here.
198+
header.write(
199+
utils.pad(checksum, HeaderSize.CHECKSUM, '0', '\0 '),
200+
HeaderOffset.CHECKSUM,
201+
HeaderSize.CHECKSUM,
202+
constants.HEADER_ENCODING,
203+
);
140204

141-
/**
142-
* Traverse a directory recursively and yield file entries.
143-
*/
144-
async function* walkDirectory(
145-
baseDir: string,
146-
relativePath: string = '',
147-
options: Partial<WalkDirectoryOptions> = defaultWalkDirectoryOptions,
148-
): AsyncGenerator<DirectoryContent> {
149-
const opts: WalkDirectoryOptions = {
150-
...defaultWalkDirectoryOptions,
151-
...options,
152-
};
153-
const entries = await opts.fs.readdir(path.join(baseDir, relativePath));
154-
155-
// Sort the entries lexicographically
156-
for (const entry of entries.sort()) {
157-
const fullPath = path.join(baseDir, relativePath, entry);
158-
const stat = await opts.fs.stat(fullPath);
159-
const tarPath = path.join(relativePath, entry);
160-
161-
if (stat.isDirectory()) {
162-
yield { path: tarPath + '/', stat: stat, type: EntryTypes.DIRECTORY };
163-
yield* walkDirectory(baseDir, path.join(relativePath, entry));
164-
} else if (stat.isFile()) {
165-
yield { path: tarPath, stat: stat, type: EntryTypes.FILE };
166-
}
167-
}
205+
return header;
168206
}
169207

170-
async function* createTar(
171-
baseDir: string,
172-
options: Partial<TarOptions> = defaultTarOptions,
173-
): AsyncGenerator<Buffer, void, void> {
174-
const opts = { ...defaultTarOptions, ...options };
175-
const entryGen = walkDirectory(baseDir, '', {
176-
fs: opts.fs,
177-
blockSize: opts.blockSize,
178-
});
179-
180-
for await (const entry of entryGen) {
181-
yield createHeader(entry.path, entry.stat, entry.type);
182-
183-
if (entry.type === EntryTypes.FILE) {
184-
yield* readFile(path.join(baseDir, entry.path), {
185-
fs: opts.fs,
186-
blockSize: opts.blockSize,
187-
});
188-
}
189-
}
190-
191-
// End-of-archive marker - two 512-byte null blocks
192-
yield Buffer.alloc(opts.blockSize, 0);
193-
yield Buffer.alloc(opts.blockSize, 0);
208+
// Creates blocks marking the ned of the header. Returns one buffer of 1024
209+
// bytes filled with nulls. This aligns with the tar end-of-archive marker
210+
// being two null-filled blocks.
211+
function generateEndMarker() {
212+
return [Buffer.alloc(512, 0), Buffer.alloc(512, 0)];
194213
}
195214

196-
export { createHeader, readFile, createTar };
215+
export { createHeader, generateEndMarker };

src/constants.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export const BLOCK_SIZE = 512;
2+
export const USTAR_NAME = 'ustar\0';
3+
export const USTAR_VERSION = '00';
4+
export const HEADER_ENCODING = 'ascii';

src/errors.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@ class ErrorVirtualTarInvalidHeader<T> extends ErrorVirtualTar<T> {
1616
static description = 'The header has invalid data';
1717
}
1818

19+
class ErrorVirtualTarInvalidStat<T> extends ErrorVirtualTar<T> {
20+
static description = 'The stat contains invalid data';
21+
}
22+
1923
export {
2024
ErrorVirtualTar,
2125
ErrorVirtualTarUndefinedBehaviour,
2226
ErrorVirtualTarInvalidFileName,
2327
ErrorVirtualTarInvalidHeader,
28+
ErrorVirtualTarInvalidStat,
2429
};

0 commit comments

Comments
 (0)