Skip to content

Commit a36bb9c

Browse files
committed
feat: added simple parsing
1 parent 09ab1c7 commit a36bb9c

File tree

7 files changed

+283
-23
lines changed

7 files changed

+283
-23
lines changed

src/Generator.ts

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import * as errors from './errors';
44
import * as utils from './utils';
55
import * as constants from './constants';
66

7-
// Computes the checksum by adding the value of every single byte in the header
7+
// Computes the checksum by summing up all the bytes in the header
88
function computeChecksum(header: Buffer): number {
99
if (!header.subarray(148, 156).every((byte) => byte === 32)) {
1010
throw new errors.ErrorVirtualTarInvalidHeader(
@@ -80,7 +80,7 @@ function createHeader(
8080
utils.splitFileName(filePath, 0, HeaderSize.FILE_NAME),
8181
HeaderOffset.FILE_NAME,
8282
HeaderSize.FILE_NAME,
83-
constants.HEADER_ENCODING,
83+
constants.TEXT_ENCODING,
8484
);
8585

8686
// The file permissions, or the mode, is stored in the next chunk. This is
@@ -89,23 +89,23 @@ function createHeader(
8989
utils.pad(stat.mode ?? '', HeaderSize.FILE_MODE, '0', '\0'),
9090
HeaderOffset.FILE_MODE,
9191
HeaderSize.FILE_MODE,
92-
constants.HEADER_ENCODING,
92+
constants.TEXT_ENCODING,
9393
);
9494

9595
// The owner UID is stored in this chunk
9696
header.write(
9797
utils.pad(stat.uid ?? '', HeaderSize.OWNER_UID, '0', '\0'),
9898
HeaderOffset.OWNER_UID,
9999
HeaderSize.OWNER_UID,
100-
constants.HEADER_ENCODING,
100+
constants.TEXT_ENCODING,
101101
);
102102

103103
// The owner GID is stored in this chunk
104104
header.write(
105105
utils.pad(stat.gid ?? '', HeaderSize.OWNER_GID, '0', '\0'),
106106
HeaderOffset.OWNER_GID,
107107
HeaderSize.OWNER_GID,
108-
constants.HEADER_ENCODING,
108+
constants.TEXT_ENCODING,
109109
);
110110

111111
// The file size is stored in this chunk. The file size must be zero for
@@ -114,7 +114,7 @@ function createHeader(
114114
utils.pad(size ?? '', HeaderSize.FILE_SIZE, '0', '\0'),
115115
HeaderOffset.FILE_SIZE,
116116
HeaderSize.FILE_SIZE,
117-
constants.HEADER_ENCODING,
117+
constants.TEXT_ENCODING,
118118
);
119119

120120
// The file mtime is stored in this chunk. As the mtime is not modified when
@@ -124,7 +124,7 @@ function createHeader(
124124
utils.pad(time, HeaderSize.FILE_MTIME, '0', '\0'),
125125
HeaderOffset.FILE_MTIME,
126126
HeaderSize.FILE_MTIME,
127-
constants.HEADER_ENCODING,
127+
constants.TEXT_ENCODING,
128128
);
129129

130130
// The checksum is calculated as the sum of all bytes in the header. It is
@@ -133,15 +133,15 @@ function createHeader(
133133
utils.pad('', HeaderSize.CHECKSUM, ' '),
134134
HeaderOffset.CHECKSUM,
135135
HeaderSize.CHECKSUM,
136-
constants.HEADER_ENCODING,
136+
constants.TEXT_ENCODING,
137137
);
138138

139139
// The type of file is written as a single byte in the header.
140140
header.write(
141141
type,
142142
HeaderOffset.TYPE_FLAG,
143143
HeaderSize.TYPE_FLAG,
144-
constants.HEADER_ENCODING,
144+
constants.TEXT_ENCODING,
145145
);
146146

147147
// File owner name will be null, as regular stat-ing cannot extract that
@@ -153,15 +153,15 @@ function createHeader(
153153
constants.USTAR_NAME,
154154
HeaderOffset.USTAR_NAME,
155155
HeaderSize.USTAR_NAME,
156-
constants.HEADER_ENCODING,
156+
constants.TEXT_ENCODING,
157157
);
158158

159159
// This chunk stores the version of USTAR, which is '00' in this case.
160160
header.write(
161161
constants.USTAR_VERSION,
162162
HeaderOffset.USTAR_VERSION,
163163
HeaderSize.USTAR_VERSION,
164-
constants.HEADER_ENCODING,
164+
constants.TEXT_ENCODING,
165165
);
166166

167167
// Owner user name will be null, as regular stat-ing cannot extract this
@@ -186,7 +186,7 @@ function createHeader(
186186
),
187187
HeaderOffset.FILE_NAME_EXTRA,
188188
HeaderSize.FILE_NAME_EXTRA,
189-
constants.HEADER_ENCODING,
189+
constants.TEXT_ENCODING,
190190
);
191191

192192
// Updating with the new checksum
@@ -199,7 +199,7 @@ function createHeader(
199199
utils.pad(checksum, HeaderSize.CHECKSUM, '0', '\0 '),
200200
HeaderOffset.CHECKSUM,
201201
HeaderSize.CHECKSUM,
202-
constants.HEADER_ENCODING,
202+
constants.TEXT_ENCODING,
203203
);
204204

205205
return header;

src/Parser.ts

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import type { ParserState, Header, Data, End } from './types';
2+
import { HeaderOffset, HeaderSize, EntryType } from './types';
3+
import * as constants from './constants';
4+
import * as errors from './errors';
5+
import * as utils from './utils';
6+
7+
class Parser {
8+
protected state: ParserState = 'ready';
9+
protected remainingBytes = 0;
10+
11+
write(data: ArrayBuffer): Header | Data | End | undefined {
12+
if (data.byteLength !== constants.BLOCK_SIZE) {
13+
throw new errors.ErrorVirtualTarBlockSize(
14+
`Expected block size ${constants.BLOCK_SIZE} but received ${data.byteLength}`,
15+
);
16+
}
17+
18+
// TODO: test if the first block is header by checking magic value
19+
const view = new DataView(data, 0, constants.BLOCK_SIZE);
20+
21+
switch (this.state) {
22+
case 'ready': {
23+
if (utils.checkNullView(view)) {
24+
this.state = 'null';
25+
return;
26+
}
27+
28+
const fileName = utils.parseFileName(view);
29+
const fileSize = utils.extractOctal(
30+
view,
31+
HeaderOffset.FILE_SIZE,
32+
HeaderSize.FILE_SIZE,
33+
);
34+
const fileMtime = new Date(
35+
utils.extractOctal(
36+
view,
37+
HeaderOffset.FILE_MTIME,
38+
HeaderSize.FILE_MTIME,
39+
),
40+
);
41+
const fileMode = utils.extractOctal(
42+
view,
43+
HeaderOffset.FILE_MODE,
44+
HeaderSize.FILE_MODE,
45+
);
46+
const ownerGid = utils.extractOctal(
47+
view,
48+
HeaderOffset.OWNER_GID,
49+
HeaderSize.OWNER_GID,
50+
);
51+
const ownerUid = utils.extractOctal(
52+
view,
53+
HeaderOffset.OWNER_UID,
54+
HeaderSize.OWNER_UID,
55+
);
56+
const ownerName = utils.extractChars(
57+
view,
58+
HeaderOffset.OWNER_NAME,
59+
HeaderSize.OWNER_NAME,
60+
);
61+
const ownerGroupName = utils.extractChars(
62+
view,
63+
HeaderOffset.OWNER_GROUPNAME,
64+
HeaderSize.OWNER_GROUPNAME,
65+
);
66+
const ownerUserName = utils.extractChars(
67+
view,
68+
HeaderOffset.OWNER_USERNAME,
69+
HeaderSize.OWNER_USERNAME,
70+
);
71+
const fileType =
72+
utils.extractChars(
73+
view,
74+
HeaderOffset.TYPE_FLAG,
75+
HeaderSize.TYPE_FLAG,
76+
) === EntryType.FILE
77+
? 'file'
78+
: 'directory';
79+
80+
if (fileType === 'file') {
81+
this.state = 'header';
82+
this.remainingBytes = fileSize;
83+
}
84+
85+
const parsedHeader: Header = {
86+
type: 'header',
87+
fileType,
88+
fileName,
89+
fileMode,
90+
fileMtime,
91+
fileSize,
92+
ownerGid,
93+
ownerUid,
94+
ownerName,
95+
ownerUserName,
96+
ownerGroupName,
97+
};
98+
99+
return parsedHeader;
100+
}
101+
case 'header':
102+
if (this.remainingBytes > 512) {
103+
this.remainingBytes -= 512;
104+
return { type: 'data', data: utils.extractBytes(view) };
105+
} else {
106+
const data = utils.extractBytes(view, 0, this.remainingBytes);
107+
this.remainingBytes = 0;
108+
this.state = 'ready';
109+
return { type: 'data', data: data };
110+
}
111+
112+
case 'null':
113+
if (utils.checkNullView(view)) return { type: 'end' };
114+
else throw new errors.ErrorVirtualTarEndOfArchive();
115+
116+
default:
117+
utils.never('Unexpected state');
118+
}
119+
}
120+
}
121+
122+
export default Parser;

src/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
export const BLOCK_SIZE = 512;
22
export const USTAR_NAME = 'ustar\0';
33
export const USTAR_VERSION = '00';
4-
export const HEADER_ENCODING = 'ascii';
4+
export const TEXT_ENCODING = 'ascii';

src/errors.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,20 @@ class ErrorVirtualTarInvalidStat<T> extends ErrorVirtualTar<T> {
2020
static description = 'The stat contains invalid data';
2121
}
2222

23+
class ErrorVirtualTarBlockSize<T> extends ErrorVirtualTar<T> {
24+
static description = 'The block size is incorrect';
25+
}
26+
27+
class ErrorVirtualTarEndOfArchive<T> extends ErrorVirtualTar<T> {
28+
static description = 'No data can come after an end-of-archive marker';
29+
}
30+
2331
export {
2432
ErrorVirtualTar,
2533
ErrorVirtualTarUndefinedBehaviour,
2634
ErrorVirtualTarInvalidFileName,
2735
ErrorVirtualTarInvalidHeader,
2836
ErrorVirtualTarInvalidStat,
37+
ErrorVirtualTarBlockSize,
38+
ErrorVirtualTarEndOfArchive,
2939
};

src/types.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,31 @@ type FileStat = {
4949
mtime?: Date;
5050
};
5151

52-
export type { FileStat };
52+
type ParserState = 'ready' | 'header' | 'null';
53+
54+
type Header = {
55+
type: 'header';
56+
fileType: 'file' | 'directory';
57+
fileName: string;
58+
fileMode: number;
59+
ownerUid: number;
60+
ownerGid: number;
61+
fileSize: number;
62+
fileMtime: Date;
63+
ownerName: string;
64+
ownerUserName: string;
65+
ownerGroupName: string;
66+
};
67+
68+
type Data = {
69+
type: 'data';
70+
data: Uint8Array;
71+
};
72+
73+
type End = {
74+
type: 'end';
75+
};
76+
77+
export type { FileStat, ParserState, Header, Data, End };
5378

5479
export { EntryType, HeaderOffset, HeaderSize };

src/utils.ts

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import { HeaderOffset, HeaderSize } from './types';
12
import * as errors from './errors';
3+
import * as constants from './constants';
4+
5+
const nullRegex = /\0/g;
26

37
function never(message: string): never {
48
throw new errors.ErrorVirtualTarUndefinedBehaviour(message);
@@ -30,4 +34,66 @@ function dateToUnixTime(date: Date): number {
3034
return Math.round(date.getTime() / 1000);
3135
}
3236

33-
export { never, pad, splitFileName, dateToUnixTime };
37+
// PARSER
38+
39+
const decoder = new TextDecoder(constants.TEXT_ENCODING);
40+
41+
function extractBytes(
42+
view: DataView,
43+
offset?: number,
44+
length?: number,
45+
): Uint8Array {
46+
return new Uint8Array(view.buffer, offset, length);
47+
}
48+
49+
function extractChars(
50+
view: DataView,
51+
offset?: number,
52+
length?: number,
53+
): string {
54+
return decoder
55+
.decode(extractBytes(view, offset, length))
56+
.replace(nullRegex, '');
57+
}
58+
59+
function extractOctal(
60+
view: DataView,
61+
offset?: number,
62+
length?: number,
63+
): number {
64+
const value = extractChars(view, offset, length);
65+
return value.length > 0 ? parseInt(value, 8) : 0;
66+
}
67+
68+
function parseFileName(view: DataView) {
69+
const fileNameLower = extractChars(
70+
view,
71+
HeaderOffset.FILE_NAME,
72+
HeaderSize.FILE_NAME,
73+
);
74+
const fileNameUpper = extractChars(
75+
view,
76+
HeaderOffset.FILE_NAME_EXTRA,
77+
HeaderSize.FILE_NAME_EXTRA,
78+
);
79+
return fileNameLower + fileNameUpper;
80+
}
81+
82+
function checkNullView(view: DataView): boolean {
83+
for (let i = 0; i < constants.BLOCK_SIZE; i++) {
84+
if (view.getUint8(i) !== 0) return false;
85+
}
86+
return true;
87+
}
88+
89+
export {
90+
never,
91+
pad,
92+
splitFileName,
93+
dateToUnixTime,
94+
extractBytes,
95+
extractChars,
96+
extractOctal,
97+
parseFileName,
98+
checkNullView,
99+
};

0 commit comments

Comments
 (0)