1+ import type { types as vtarTypes } from '@matrixai/js-virtualtar' ;
12import fs from 'fs' ;
23import os from 'os' ;
34import path from 'path' ;
4- import {
5- VirtualTarGenerator ,
6- VirtualTarParser ,
7- types as vtarTypes ,
8- } from '@matrixai/js-virtualtar' ;
5+ import { VirtualTarGenerator , VirtualTarParser } from '@matrixai/js-virtualtar' ;
96
10- // Default chunk size for reading files from the filesystem.
117const DEFAULT_CHUNK_SIZE = 64 * 1024 ;
128
9+ /**
10+ * An abstracted, reusable async generator to stream a file's content
11+ * from the local filesystem in manageable chunks.
12+ * @param localFilePath The path to the file on the local filesystem.
13+ * @param chunkSize The size of each chunk to read into memory.
14+ * @returns An AsyncGenerator yielding Buffer chunks of the file's content.
15+ */
16+ async function * fileContentStreamer (
17+ localFilePath : string ,
18+ chunkSize : number = DEFAULT_CHUNK_SIZE ,
19+ ) : AsyncGenerator < Buffer , void , void > {
20+ let fd : fs . promises . FileHandle | undefined ;
21+ try {
22+ // Open the file for reading.
23+ fd = await fs . promises . open ( localFilePath , 'r' ) ;
24+ const buffer = Buffer . alloc ( chunkSize ) ;
25+ while ( true ) {
26+ // Read a chunk from the file into our buffer.
27+ const { bytesRead } = await fd . read ( buffer , 0 , chunkSize , null ) ;
28+ if ( bytesRead === 0 ) {
29+ // No more bytes to read, end of file.
30+ break ;
31+ }
32+ // Yield only the portion of the buffer that contains actual data.
33+ yield buffer . subarray ( 0 , bytesRead ) ;
34+ }
35+ } finally {
36+ // Crucially, ensure the file handle is closed, even if errors occur.
37+ if ( fd ) {
38+ await fd . close ( ) ;
39+ }
40+ }
41+ }
42+
1343/**
1444 * Creates an AsyncGenerator that yields Uint8Array chunks of a tar archive
1545 * containing a single specified file, streamed directly from the file system.
@@ -47,31 +77,9 @@ async function* streamFileAsTar(
4777 gid : fileStats . gid ,
4878 } ;
4979
50- // 3. Create a dedicated async generator to stream the file's content.
51- async function * fileContentStreamer ( ) : AsyncGenerator < Buffer , void , void > {
52- let fd : fs . promises . FileHandle | undefined ;
53- try {
54- fd = await fs . promises . open ( localFilePath , 'r' ) ;
55- const buffer = Buffer . alloc ( chunkSize ) ;
56- while ( true ) {
57- const { bytesRead } = await fd . read ( buffer , 0 , chunkSize , null ) ;
58- if ( bytesRead === 0 ) {
59- break ;
60- }
61- yield buffer . subarray ( 0 , bytesRead ) ;
62- }
63- } finally {
64- if ( fd ) {
65- await fd . close ( ) ;
66- }
67- }
68- }
69-
7080 // 4. Add the file entry to the tar generator.
71- vtar . addFile (
72- pathInArchive ,
73- tarFileStats ,
74- ( ) => fileContentStreamer ( ) ,
81+ vtar . addFile ( pathInArchive , tarFileStats , ( ) =>
82+ fileContentStreamer ( localFilePath , chunkSize ) ,
7583 ) ;
7684
7785 // 5. Finalize the tar archive.
@@ -81,12 +89,72 @@ async function* streamFileAsTar(
8189 yield * vtar . yieldChunks ( ) ;
8290}
8391
92+ /**
93+ * Creates an AsyncGenerator that yields Uint8Array chunks of a tar archive
94+ * containing the contents of a specified directory, streamed from the file system.
95+ */
96+ async function * streamDirectoryAsTar (
97+ localDirPath : string ,
98+ basePathInArchive : string ,
99+ chunkSize : number = DEFAULT_CHUNK_SIZE ,
100+ ) : AsyncGenerator < Uint8Array , void , void > {
101+ const vtar = new VirtualTarGenerator ( ) ;
102+
103+ // This recursive function will "walk" the directory tree and add operations
104+ // to the VirtualTarGenerator instance.
105+ async function walkAndTar ( currentFsPath : string , currentArchivePath : string ) {
106+ const entries = await fs . promises . readdir ( currentFsPath , { withFileTypes : true } ) ;
107+ // Using Promise.all to handle entries in parallel, which can be more efficient.
108+ await Promise . all (
109+ entries . map ( async ( entry ) => {
110+ const fullFsPath = path . join ( currentFsPath , entry . name ) ;
111+ const fullArchivePath = path . join ( currentArchivePath , entry . name ) ;
112+
113+ if ( entry . isDirectory ( ) ) {
114+ const dirStats = await fs . promises . stat ( fullFsPath ) ;
115+ const tarDirStats : vtarTypes . FileStat = {
116+ mode : dirStats . mode , mtime : dirStats . mtime , uid : dirStats . uid , gid : dirStats . gid ,
117+ } ;
118+ vtar . addDirectory ( fullArchivePath , tarDirStats ) ;
119+ // Recurse into the subdirectory
120+ await walkAndTar ( fullFsPath , fullArchivePath ) ;
121+ } else if ( entry . isFile ( ) ) {
122+ const fileStats = await fs . promises . stat ( fullFsPath ) ;
123+ const tarFileStats : vtarTypes . FileStat = {
124+ size : fileStats . size , mode : fileStats . mode , mtime : fileStats . mtime , uid : fileStats . uid , gid : fileStats . gid ,
125+ } ;
126+
127+ vtar . addFile (
128+ fullArchivePath ,
129+ tarFileStats ,
130+ ( ) => fileContentStreamer ( fullFsPath , chunkSize )
131+ ) ;
132+ }
133+ } )
134+ ) ;
135+ }
136+
137+ const walkPromise = ( async ( ) => {
138+ try {
139+ await walkAndTar ( localDirPath , basePathInArchive ) ;
140+ } catch ( err ) {
141+ // If the walk fails, we'll re-throw the error at the end.
142+ // The `finally` block ensures the consumer doesn't hang.
143+ throw err ;
144+ } finally {
145+
146+ vtar . finalize ( ) ;
147+ }
148+ } ) ( ) ;
149+
150+ yield * vtar . yieldChunks ( ) ;
151+
152+ await walkPromise ;
153+ }
154+
84155/**
85156 * Parses a tar stream and writes the contents (files and directories)
86157 * to a specified destination on the local filesystem.
87- * This is the core function for the "parsing" part of the task.
88- * @param tarStream An AsyncIterable that yields Uint8Array chunks of a tar archive.
89- * @param destDir The destination directory to extract the contents to.
90158 */
91159async function parseTarStreamToFS (
92160 tarStream : AsyncIterable < Uint8Array > ,
@@ -95,30 +163,22 @@ async function parseTarStreamToFS(
95163 console . log ( `--- Parsing Tar Stream to Directory: ${ destDir } ---` ) ;
96164
97165 const vtarParser = new VirtualTarParser ( {
98- // This callback runs when the parser finds a file header.
99166 onFile : async ( header , dataStream ) => {
100167 console . log ( ` -> Found file in archive: '${ header . path } '` ) ;
101168 const fullDestPath = path . join ( destDir , header . path ) ;
102-
103- // Ensure the directory for the file exists.
104169 await fs . promises . mkdir ( path . dirname ( fullDestPath ) , { recursive : true } ) ;
105170
106- // Open a file handle for writing.
107171 let fd : fs . promises . FileHandle | undefined ;
108172 try {
109173 fd = await fs . promises . open ( fullDestPath , 'w' ) ;
110- // Stream the file's content chunks directly to the file on disk.
111174 for await ( const chunk of dataStream ( ) ) {
112175 await fd . write ( chunk ) ;
113176 }
114177 console . log ( ` -> Wrote file to: '${ fullDestPath } '` ) ;
115178 } finally {
116- if ( fd ) {
117- await fd . close ( ) ;
118- }
179+ if ( fd ) await fd . close ( ) ;
119180 }
120181 } ,
121- // This callback runs when the parser finds a directory header.
122182 onDirectory : async ( header ) => {
123183 console . log ( ` -> Found directory in archive: '${ header . path } '` ) ;
124184 const fullDestPath = path . join ( destDir , header . path ) ;
@@ -129,11 +189,9 @@ async function parseTarStreamToFS(
129189 } ,
130190 } ) ;
131191
132- // Feed the generated tar chunks from the stream into the parser.
133192 for await ( const chunk of tarStream ) {
134193 await vtarParser . write ( chunk ) ;
135194 }
136- // Wait for all asynchronous parsing operations (like onFile) to complete.
137195 await vtarParser . settled ( ) ;
138196}
139197
@@ -154,15 +212,16 @@ describe('scratch', () => {
154212 } ) ;
155213
156214 test ( 'should stream a file as a tar, then parse it back and verify content' , async ( ) => {
157- // SETUP
215+ // SETUP
158216 const originalFileName = 'source-file.txt' ;
159- const originalFileContent = 'This is a test of streaming a file with virtualtar!' ;
217+ const originalFileContent =
218+ 'This is a test of streaming a file with virtualtar!' ;
160219 const localFilePath = path . join ( tempDir , originalFileName ) ;
161220 const pathInArchive = 'test/file-in-tar.txt' ;
162221 await fs . promises . writeFile ( localFilePath , originalFileContent ) ;
163222 console . log ( `--- Original File Content ---\n'${ originalFileContent } '\n` ) ;
164-
165- // GENERATION (stream to tar)
223+
224+ // GENERATION (stream to tar)
166225 const tarStreamGenerator = streamFileAsTar ( localFilePath , pathInArchive ) ;
167226
168227 // PARSING (tar to file)
@@ -171,9 +230,82 @@ describe('scratch', () => {
171230 await parseTarStreamToFS ( tarStreamGenerator , extractionDir ) ;
172231
173232 const extractedFilePath = path . join ( extractionDir , pathInArchive ) ;
174- const extractedFileContent = await fs . promises . readFile ( extractedFilePath , 'utf-8' ) ;
175-
233+ const extractedFileContent = await fs . promises . readFile (
234+ extractedFilePath ,
235+ 'utf-8' ,
236+ ) ;
237+
176238 expect ( extractedFileContent ) . toEqual ( originalFileContent ) ;
177- console . log ( '✅ Verification successful: Original and parsed content match!' ) ;
239+ console . log (
240+ '✅ Verification successful: Original and parsed content match!' ,
241+ ) ;
242+ } ) ;
243+
244+ test ( 'should stream a directory as a tar, then parse it back and verify content' , async ( ) => {
245+ const sourceDirName = 'source-dir' ;
246+ const localDirPath = path . join ( tempDir , sourceDirName ) ;
247+ const subDirName = 'sub' ;
248+ const localSubDirPath = path . join ( localDirPath , subDirName ) ;
249+ const file1Name = 'file1.txt' ;
250+ const file2Name = 'file2.log' ;
251+ const file1Content = 'Content of file 1' ;
252+ const file2Content = 'Content of file 2 in subdirectory' ;
253+
254+ await fs . promises . mkdir ( localSubDirPath , { recursive : true } ) ;
255+ await fs . promises . writeFile (
256+ path . join ( localDirPath , file1Name ) ,
257+ file1Content ,
258+ ) ;
259+ await fs . promises . writeFile (
260+ path . join ( localSubDirPath , file2Name ) ,
261+ file2Content ,
262+ ) ;
263+ console . log (
264+ `--- Created source directory structure in: ${ localDirPath } ---\n` ,
265+ ) ;
266+
267+ const archiveBasePath = 'my-archive' ;
268+
269+ const tarStreamGenerator = streamDirectoryAsTar (
270+ localDirPath ,
271+ archiveBasePath ,
272+ ) ;
273+
274+ const extractionDir = path . join ( tempDir , 'extracted-dir' ) ;
275+ await fs . promises . mkdir ( extractionDir ) ;
276+ await parseTarStreamToFS ( tarStreamGenerator , extractionDir ) ;
277+
278+ const extractedFile1Path = path . join (
279+ extractionDir ,
280+ archiveBasePath ,
281+ file1Name ,
282+ ) ;
283+ const extractedFile1Content = await fs . promises . readFile (
284+ extractedFile1Path ,
285+ 'utf-8' ,
286+ ) ;
287+ expect ( extractedFile1Content ) . toEqual ( file1Content ) ;
288+ console . log ( `✅ Verified content of: ${ extractedFile1Path } ` ) ;
289+
290+ const extractedFile2Path = path . join (
291+ extractionDir ,
292+ archiveBasePath ,
293+ subDirName ,
294+ file2Name ,
295+ ) ;
296+ const extractedFile2Content = await fs . promises . readFile (
297+ extractedFile2Path ,
298+ 'utf-8' ,
299+ ) ;
300+ expect ( extractedFile2Content ) . toEqual ( file2Content ) ;
301+ console . log ( `✅ Verified content of: ${ extractedFile2Path } ` ) ;
302+
303+ const subDirStat = await fs . promises . stat (
304+ path . join ( extractionDir , archiveBasePath , subDirName ) ,
305+ ) ;
306+ expect ( subDirStat . isDirectory ( ) ) . toBe ( true ) ;
307+ console . log (
308+ '✅ Verification successful: Directory structure and all file contents match!' ,
309+ ) ;
178310 } ) ;
179311} ) ;
0 commit comments