@@ -150,6 +150,7 @@ async function setupStreamConfiguration(
150150
151151 setupConfig . streamConfig = {
152152 name : setupConfig . streamName ,
153+ format : { type : "json" as const , ...( ! schema && { unstructured : true } ) } ,
153154 http : {
154155 enabled : httpEnabled ,
155156 authentication : httpAuth ,
@@ -321,9 +322,19 @@ async function loadSchemaFromFile(): Promise<SchemaField[]> {
321322
322323 return parsedSchema . fields ;
323324 } catch ( error ) {
324- throw new UserError (
325+ logger . error (
325326 `Failed to read schema file: ${ error instanceof Error ? error . message : String ( error ) } `
326327 ) ;
328+
329+ const retry = await confirm ( "Would you like to try again?" , {
330+ defaultValue : true ,
331+ } ) ;
332+
333+ if ( retry ) {
334+ return loadSchemaFromFile ( ) ;
335+ } else {
336+ throw new UserError ( "Schema file loading cancelled" ) ;
337+ }
327338 }
328339}
329340
@@ -370,9 +381,12 @@ async function setupR2Sink(
370381 ) ;
371382 }
372383
373- const path = await prompt ( "File prefix (optional):" , {
374- defaultValue : "" ,
375- } ) ;
384+ const path = await prompt (
385+ "The base prefix in your bucket where data will be written (optional):" ,
386+ {
387+ defaultValue : "" ,
388+ }
389+ ) ;
376390
377391 const timePartitionPattern = await prompt (
378392 "Time partition pattern (optional):" ,
@@ -391,7 +405,6 @@ async function setupR2Sink(
391405 } ) ;
392406
393407 let compression ;
394- let targetRowGroupSize ;
395408 if ( format === "parquet" ) {
396409 compression = await select ( "Compression:" , {
397410 choices : [
@@ -401,21 +414,20 @@ async function setupR2Sink(
401414 { title : "zstd" , value : "zstd" } ,
402415 { title : "lz4" , value : "lz4" } ,
403416 ] ,
404- defaultOption : 0 ,
405- fallbackOption : 0 ,
406- } ) ;
407-
408- targetRowGroupSize = await prompt ( "Target row group size (MB):" , {
409- defaultValue : "128" ,
417+ defaultOption : 3 ,
418+ fallbackOption : 3 ,
410419 } ) ;
411420 }
412421
413- const fileSizeMB = await prompt ( "Maximum file size (MB):" , {
422+ const fileSizeMB = await prompt ( "Roll file when size reaches (MB):" , {
414423 defaultValue : "100" ,
415424 } ) ;
416- const intervalSeconds = await prompt ( "Maximum time interval (seconds):" , {
417- defaultValue : "300" ,
418- } ) ;
425+ const intervalSeconds = await prompt (
426+ "Roll file when time reaches (seconds):" ,
427+ {
428+ defaultValue : "300" ,
429+ }
430+ ) ;
419431
420432 const useOAuth = await confirm (
421433 "Automatically generate credentials needed to write to your R2 bucket?" ,
@@ -457,9 +469,6 @@ async function setupR2Sink(
457469 ...( compression && {
458470 compression : compression as ParquetFormat [ "compression" ] ,
459471 } ) ,
460- ...( targetRowGroupSize && {
461- row_group_bytes : parseInt ( targetRowGroupSize ) * 1024 * 1024 ,
462- } ) ,
463472 } ;
464473 }
465474
@@ -506,24 +515,22 @@ async function setupDataCatalogSink(setupConfig: SetupConfig): Promise<void> {
506515 fallbackOption : 0 ,
507516 } ) ;
508517
509- const targetRowGroupSize = await prompt ( "Target row group size (MB):" , {
510- defaultValue : "128" ,
511- } ) ;
512-
513- const fileSizeMB = await prompt ( "Maximum file size (MB):" , {
518+ const fileSizeMB = await prompt ( "Roll file when size reaches (MB):" , {
514519 defaultValue : "100" ,
515520 } ) ;
516- const intervalSeconds = await prompt ( "Maximum time interval (seconds):" , {
517- defaultValue : "300" ,
518- } ) ;
521+ const intervalSeconds = await prompt (
522+ "Roll file when time reaches (seconds):" ,
523+ {
524+ defaultValue : "300" ,
525+ }
526+ ) ;
519527
520528 setupConfig . sinkConfig = {
521529 name : setupConfig . sinkName ,
522530 type : "r2_data_catalog" ,
523531 format : {
524532 type : "parquet" ,
525533 compression : compression as ParquetFormat [ "compression" ] ,
526- row_group_bytes : parseInt ( targetRowGroupSize ) * 1024 * 1024 ,
527534 } ,
528535 config : {
529536 bucket,
0 commit comments