Skip to content

Commit eb4d8a8

Browse files
committed
wip: creating test for streaming a directory as tar, trying to solve race condition
1 parent a3d1fbe commit eb4d8a8

File tree

1 file changed

+183
-51
lines changed

1 file changed

+183
-51
lines changed

tests/scratch.test.ts

Lines changed: 183 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,45 @@
1+
import type { types as vtarTypes } from '@matrixai/js-virtualtar';
12
import fs from 'fs';
23
import os from 'os';
34
import path from 'path';
4-
import {
5-
VirtualTarGenerator,
6-
VirtualTarParser,
7-
types as vtarTypes,
8-
} from '@matrixai/js-virtualtar';
5+
import { VirtualTarGenerator, VirtualTarParser } from '@matrixai/js-virtualtar';
96

10-
// Default chunk size for reading files from the filesystem.
117
const DEFAULT_CHUNK_SIZE = 64 * 1024;
128

9+
/**
10+
* An abstracted, reusable async generator to stream a file's content
11+
* from the local filesystem in manageable chunks.
12+
* @param localFilePath The path to the file on the local filesystem.
13+
* @param chunkSize The size of each chunk to read into memory.
14+
* @returns An AsyncGenerator yielding Buffer chunks of the file's content.
15+
*/
16+
async function* fileContentStreamer(
17+
localFilePath: string,
18+
chunkSize: number = DEFAULT_CHUNK_SIZE,
19+
): AsyncGenerator<Buffer, void, void> {
20+
let fd: fs.promises.FileHandle | undefined;
21+
try {
22+
// Open the file for reading.
23+
fd = await fs.promises.open(localFilePath, 'r');
24+
const buffer = Buffer.alloc(chunkSize);
25+
while (true) {
26+
// Read a chunk from the file into our buffer.
27+
const { bytesRead } = await fd.read(buffer, 0, chunkSize, null);
28+
if (bytesRead === 0) {
29+
// No more bytes to read, end of file.
30+
break;
31+
}
32+
// Yield only the portion of the buffer that contains actual data.
33+
yield buffer.subarray(0, bytesRead);
34+
}
35+
} finally {
36+
// Crucially, ensure the file handle is closed, even if errors occur.
37+
if (fd) {
38+
await fd.close();
39+
}
40+
}
41+
}
42+
1343
/**
1444
* Creates an AsyncGenerator that yields Uint8Array chunks of a tar archive
1545
* containing a single specified file, streamed directly from the file system.
@@ -47,31 +77,9 @@ async function* streamFileAsTar(
4777
gid: fileStats.gid,
4878
};
4979

50-
// 3. Create a dedicated async generator to stream the file's content.
51-
async function* fileContentStreamer(): AsyncGenerator<Buffer, void, void> {
52-
let fd: fs.promises.FileHandle | undefined;
53-
try {
54-
fd = await fs.promises.open(localFilePath, 'r');
55-
const buffer = Buffer.alloc(chunkSize);
56-
while (true) {
57-
const { bytesRead } = await fd.read(buffer, 0, chunkSize, null);
58-
if (bytesRead === 0) {
59-
break;
60-
}
61-
yield buffer.subarray(0, bytesRead);
62-
}
63-
} finally {
64-
if (fd) {
65-
await fd.close();
66-
}
67-
}
68-
}
69-
7080
// 4. Add the file entry to the tar generator.
71-
vtar.addFile(
72-
pathInArchive,
73-
tarFileStats,
74-
() => fileContentStreamer(),
81+
vtar.addFile(pathInArchive, tarFileStats, () =>
82+
fileContentStreamer(localFilePath, chunkSize),
7583
);
7684

7785
// 5. Finalize the tar archive.
@@ -81,12 +89,72 @@ async function* streamFileAsTar(
8189
yield* vtar.yieldChunks();
8290
}
8391

92+
/**
93+
* Creates an AsyncGenerator that yields Uint8Array chunks of a tar archive
94+
* containing the contents of a specified directory, streamed from the file system.
95+
*/
96+
async function* streamDirectoryAsTar(
97+
localDirPath: string,
98+
basePathInArchive: string,
99+
chunkSize: number = DEFAULT_CHUNK_SIZE,
100+
): AsyncGenerator<Uint8Array, void, void> {
101+
const vtar = new VirtualTarGenerator();
102+
103+
// This recursive function will "walk" the directory tree and add operations
104+
// to the VirtualTarGenerator instance.
105+
async function walkAndTar(currentFsPath: string, currentArchivePath: string) {
106+
const entries = await fs.promises.readdir(currentFsPath, { withFileTypes: true });
107+
// Using Promise.all to handle entries in parallel, which can be more efficient.
108+
await Promise.all(
109+
entries.map(async (entry) => {
110+
const fullFsPath = path.join(currentFsPath, entry.name);
111+
const fullArchivePath = path.join(currentArchivePath, entry.name);
112+
113+
if (entry.isDirectory()) {
114+
const dirStats = await fs.promises.stat(fullFsPath);
115+
const tarDirStats: vtarTypes.FileStat = {
116+
mode: dirStats.mode, mtime: dirStats.mtime, uid: dirStats.uid, gid: dirStats.gid,
117+
};
118+
vtar.addDirectory(fullArchivePath, tarDirStats);
119+
// Recurse into the subdirectory
120+
await walkAndTar(fullFsPath, fullArchivePath);
121+
} else if (entry.isFile()) {
122+
const fileStats = await fs.promises.stat(fullFsPath);
123+
const tarFileStats: vtarTypes.FileStat = {
124+
size: fileStats.size, mode: fileStats.mode, mtime: fileStats.mtime, uid: fileStats.uid, gid: fileStats.gid,
125+
};
126+
127+
vtar.addFile(
128+
fullArchivePath,
129+
tarFileStats,
130+
() => fileContentStreamer(fullFsPath, chunkSize)
131+
);
132+
}
133+
})
134+
);
135+
}
136+
137+
const walkPromise = (async () => {
138+
try {
139+
await walkAndTar(localDirPath, basePathInArchive);
140+
} catch(err) {
141+
// If the walk fails, we'll re-throw the error at the end.
142+
// The `finally` block ensures the consumer doesn't hang.
143+
throw err;
144+
} finally {
145+
146+
vtar.finalize();
147+
}
148+
})();
149+
150+
yield* vtar.yieldChunks();
151+
152+
await walkPromise;
153+
}
154+
84155
/**
85156
* Parses a tar stream and writes the contents (files and directories)
86157
* to a specified destination on the local filesystem.
87-
* This is the core function for the "parsing" part of the task.
88-
* @param tarStream An AsyncIterable that yields Uint8Array chunks of a tar archive.
89-
* @param destDir The destination directory to extract the contents to.
90158
*/
91159
async function parseTarStreamToFS(
92160
tarStream: AsyncIterable<Uint8Array>,
@@ -95,30 +163,22 @@ async function parseTarStreamToFS(
95163
console.log(`--- Parsing Tar Stream to Directory: ${destDir} ---`);
96164

97165
const vtarParser = new VirtualTarParser({
98-
// This callback runs when the parser finds a file header.
99166
onFile: async (header, dataStream) => {
100167
console.log(` -> Found file in archive: '${header.path}'`);
101168
const fullDestPath = path.join(destDir, header.path);
102-
103-
// Ensure the directory for the file exists.
104169
await fs.promises.mkdir(path.dirname(fullDestPath), { recursive: true });
105170

106-
// Open a file handle for writing.
107171
let fd: fs.promises.FileHandle | undefined;
108172
try {
109173
fd = await fs.promises.open(fullDestPath, 'w');
110-
// Stream the file's content chunks directly to the file on disk.
111174
for await (const chunk of dataStream()) {
112175
await fd.write(chunk);
113176
}
114177
console.log(` -> Wrote file to: '${fullDestPath}'`);
115178
} finally {
116-
if (fd) {
117-
await fd.close();
118-
}
179+
if (fd) await fd.close();
119180
}
120181
},
121-
// This callback runs when the parser finds a directory header.
122182
onDirectory: async (header) => {
123183
console.log(` -> Found directory in archive: '${header.path}'`);
124184
const fullDestPath = path.join(destDir, header.path);
@@ -129,11 +189,9 @@ async function parseTarStreamToFS(
129189
},
130190
});
131191

132-
// Feed the generated tar chunks from the stream into the parser.
133192
for await (const chunk of tarStream) {
134193
await vtarParser.write(chunk);
135194
}
136-
// Wait for all asynchronous parsing operations (like onFile) to complete.
137195
await vtarParser.settled();
138196
}
139197

@@ -154,15 +212,16 @@ describe('scratch', () => {
154212
});
155213

156214
test('should stream a file as a tar, then parse it back and verify content', async () => {
157-
// SETUP
215+
// SETUP
158216
const originalFileName = 'source-file.txt';
159-
const originalFileContent = 'This is a test of streaming a file with virtualtar!';
217+
const originalFileContent =
218+
'This is a test of streaming a file with virtualtar!';
160219
const localFilePath = path.join(tempDir, originalFileName);
161220
const pathInArchive = 'test/file-in-tar.txt';
162221
await fs.promises.writeFile(localFilePath, originalFileContent);
163222
console.log(`--- Original File Content ---\n'${originalFileContent}'\n`);
164-
165-
// GENERATION (stream to tar)
223+
224+
// GENERATION (stream to tar)
166225
const tarStreamGenerator = streamFileAsTar(localFilePath, pathInArchive);
167226

168227
// PARSING (tar to file)
@@ -171,9 +230,82 @@ describe('scratch', () => {
171230
await parseTarStreamToFS(tarStreamGenerator, extractionDir);
172231

173232
const extractedFilePath = path.join(extractionDir, pathInArchive);
174-
const extractedFileContent = await fs.promises.readFile(extractedFilePath, 'utf-8');
175-
233+
const extractedFileContent = await fs.promises.readFile(
234+
extractedFilePath,
235+
'utf-8',
236+
);
237+
176238
expect(extractedFileContent).toEqual(originalFileContent);
177-
console.log('✅ Verification successful: Original and parsed content match!');
239+
console.log(
240+
'✅ Verification successful: Original and parsed content match!',
241+
);
242+
});
243+
244+
test('should stream a directory as a tar, then parse it back and verify content', async () => {
245+
const sourceDirName = 'source-dir';
246+
const localDirPath = path.join(tempDir, sourceDirName);
247+
const subDirName = 'sub';
248+
const localSubDirPath = path.join(localDirPath, subDirName);
249+
const file1Name = 'file1.txt';
250+
const file2Name = 'file2.log';
251+
const file1Content = 'Content of file 1';
252+
const file2Content = 'Content of file 2 in subdirectory';
253+
254+
await fs.promises.mkdir(localSubDirPath, { recursive: true });
255+
await fs.promises.writeFile(
256+
path.join(localDirPath, file1Name),
257+
file1Content,
258+
);
259+
await fs.promises.writeFile(
260+
path.join(localSubDirPath, file2Name),
261+
file2Content,
262+
);
263+
console.log(
264+
`--- Created source directory structure in: ${localDirPath} ---\n`,
265+
);
266+
267+
const archiveBasePath = 'my-archive';
268+
269+
const tarStreamGenerator = streamDirectoryAsTar(
270+
localDirPath,
271+
archiveBasePath,
272+
);
273+
274+
const extractionDir = path.join(tempDir, 'extracted-dir');
275+
await fs.promises.mkdir(extractionDir);
276+
await parseTarStreamToFS(tarStreamGenerator, extractionDir);
277+
278+
const extractedFile1Path = path.join(
279+
extractionDir,
280+
archiveBasePath,
281+
file1Name,
282+
);
283+
const extractedFile1Content = await fs.promises.readFile(
284+
extractedFile1Path,
285+
'utf-8',
286+
);
287+
expect(extractedFile1Content).toEqual(file1Content);
288+
console.log(`✅ Verified content of: ${extractedFile1Path}`);
289+
290+
const extractedFile2Path = path.join(
291+
extractionDir,
292+
archiveBasePath,
293+
subDirName,
294+
file2Name,
295+
);
296+
const extractedFile2Content = await fs.promises.readFile(
297+
extractedFile2Path,
298+
'utf-8',
299+
);
300+
expect(extractedFile2Content).toEqual(file2Content);
301+
console.log(`✅ Verified content of: ${extractedFile2Path}`);
302+
303+
const subDirStat = await fs.promises.stat(
304+
path.join(extractionDir, archiveBasePath, subDirName),
305+
);
306+
expect(subDirStat.isDirectory()).toBe(true);
307+
console.log(
308+
'✅ Verification successful: Directory structure and all file contents match!',
309+
);
178310
});
179311
});

0 commit comments

Comments
 (0)