@@ -31,6 +31,7 @@ use arrow_schema::{Field, Fields, Schema};
31
31
use chrono:: { NaiveDateTime , Timelike , Utc } ;
32
32
use derive_more:: { Deref , DerefMut } ;
33
33
use itertools:: Itertools ;
34
+ use once_cell:: sync:: Lazy ;
34
35
use parquet:: {
35
36
arrow:: ArrowWriter ,
36
37
basic:: Encoding ,
@@ -39,6 +40,7 @@ use parquet::{
39
40
schema:: types:: ColumnPath ,
40
41
} ;
41
42
use rand:: distributions:: DistString ;
43
+ use regex:: Regex ;
42
44
use relative_path:: RelativePathBuf ;
43
45
use tracing:: { debug, error, info, trace, warn} ;
44
46
@@ -64,6 +66,10 @@ use super::{
64
66
LogStream , ARROW_FILE_EXTENSION ,
65
67
} ;
66
68
69
+ static ARROWS_NAME_STRUCTURE : Lazy < Regex > = Lazy :: new ( || {
70
+ Regex :: new ( r"^[[:alnum:]]+\.(?P<front>\S+)\.\d+\.data\.arrows$" ) . expect ( "Validated regex" )
71
+ } ) ;
72
+
67
73
#[ derive( Debug , thiserror:: Error ) ]
68
74
#[ error( "Stream not found: {0}" ) ]
69
75
pub struct StreamNotFound ( pub String ) ;
@@ -281,10 +287,14 @@ impl Stream {
281
287
}
282
288
283
289
fn arrow_path_to_parquet ( path : & Path , random_string : & str ) -> PathBuf {
284
- let filename = path. file_stem ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) ;
285
- let ( _, filename) = filename. split_once ( '.' ) . unwrap ( ) ;
286
- assert ! ( filename. contains( '.' ) , "contains the delim `.`" ) ;
287
- let filename_with_random_number = format ! ( "{filename}.{random_string}.arrows" ) ;
290
+ let filename = path. file_name ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) ;
291
+ let filename = ARROWS_NAME_STRUCTURE
292
+ . captures ( filename)
293
+ . unwrap ( )
294
+ . get ( 1 )
295
+ . unwrap ( )
296
+ . as_str ( ) ;
297
+ let filename_with_random_number = format ! ( "{filename}.data.{random_string}.arrows" ) ;
288
298
let mut parquet_path = path. to_owned ( ) ;
289
299
parquet_path. set_file_name ( filename_with_random_number) ;
290
300
parquet_path. set_extension ( "parquet" ) ;
@@ -298,7 +308,7 @@ impl Stream {
298
308
self . stream_name
299
309
) ;
300
310
301
- let time_partition = self . get_time_partition ( ) ;
311
+ let time_partition: Option < String > = self . get_time_partition ( ) ;
302
312
let custom_partition = self . get_custom_partition ( ) ;
303
313
304
314
// read arrow files on disk
0 commit comments