11import { execFileSync } from "child_process" ;
2+ import { rmSync } from "fs" ;
23import {
34 createWriteStream ,
45 ensureDir ,
@@ -194,30 +195,52 @@ export class RemoteLogs {
194195 */
195196 private async downloadLogsForTarget (
196197 artifactDownloadUrl : { url : string ; bytes : number ; id : string } ,
197- downloadDir : string ,
198+ logsDir : string ,
198199 progress : ProgressCallback ,
199200 ) {
200201 const { url, bytes, id : artifactDiskId } = artifactDownloadUrl ;
201- const downloadPath = path . join ( this . workingDirectory , artifactDiskId ) ;
202- if ( existsSync ( downloadPath ) && readdirSync ( downloadPath ) . length > 0 ) {
202+ const artifactDownloadPath = path . join (
203+ this . workingDirectory ,
204+ artifactDiskId ,
205+ ) ;
206+ if (
207+ existsSync ( artifactDownloadPath ) &&
208+ readdirSync ( artifactDownloadPath ) . length > 0
209+ ) {
203210 void extLogger . log (
204211 `Skipping download to existing '${ artifactDiskId } '...` ,
205212 ) ;
206213 } else {
207- await ensureDir ( downloadPath ) ;
214+ await ensureDir ( artifactDownloadPath ) ;
208215 void extLogger . log (
209- `Downloading from ${ artifactDiskId } (bytes: ${ bytes } ) ${ downloadPath } ...` ,
216+ `Downloading from ${ artifactDiskId } (bytes: ${ bytes } ) ${ artifactDownloadPath } ...` ,
210217 ) ;
211- await this . fetchAndUnzip ( url , downloadPath , progress ) ;
218+ // this is incredibly unstable in practice: so retry up to 5 times
219+ // XXX is there no generic retry utility in this project?
220+ let retry = 0 ;
221+ while ( retry < 5 ) {
222+ try {
223+ await this . fetchAndUnzip ( url , artifactDownloadPath , progress ) ;
224+ break ;
225+ } catch ( e ) {
226+ if ( retry >= 5 ) {
227+ throw e ;
228+ }
229+ void extLogger . log (
230+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
231+ `Failed to download and unzip ${ artifactDiskId } : ${ ( e as any ) . message ?? "no error message" } . Trying again...` ,
232+ ) ;
233+ rmSync ( artifactDownloadPath ) ;
234+ retry ++ ;
235+ }
236+ }
212237 }
213- if ( existsSync ( downloadDir ) && readdirSync ( downloadDir ) . length > 0 ) {
214- void extLogger . log (
215- `Skipping log extraction to existing '${ downloadDir } '...` ,
216- ) ;
238+ if ( existsSync ( logsDir ) && readdirSync ( logsDir ) . length > 0 ) {
239+ void extLogger . log ( `Skipping log extraction to existing '${ logsDir } '...` ) ;
217240 } else {
218- await ensureDir ( downloadDir ) ;
241+ await ensureDir ( logsDir ) ;
219242 // find the lone tar.gz file in the unzipped directory
220- const unzippedFiles = readdirSync ( downloadPath ) ;
243+ const unzippedFiles = readdirSync ( artifactDownloadPath ) ;
221244 const tarGzFiles = unzippedFiles . filter ( ( f ) => f . endsWith ( ".tar.gz" ) ) ;
222245 if ( tarGzFiles . length !== 1 ) {
223246 throw new Error (
@@ -226,11 +249,20 @@ export class RemoteLogs {
226249 ) } `,
227250 ) ;
228251 }
229- await this . untargz (
230- path . join ( downloadPath , tarGzFiles [ 0 ] ) ,
231- downloadDir ,
232- progress ,
233- ) ;
252+ try {
253+ await this . untargz (
254+ path . join ( artifactDownloadPath , tarGzFiles [ 0 ] ) ,
255+ logsDir ,
256+ progress ,
257+ ) ;
258+ } catch ( e ) {
259+ // historically, this is due to corruption of the tarball. Remove it and ask the user to try again.
260+ await remove ( artifactDownloadPath ) ;
261+ throw new Error (
262+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
263+ `Failed to untar ${ tarGzFiles [ 0 ] } into ${ logsDir } : ${ ( e as any ) . message ?? "no error message" } ! Please try the command again.` ,
264+ ) ;
265+ }
234266 }
235267 }
236268
0 commit comments