@@ -22,13 +22,17 @@ use actix_web::{middleware, web, App, HttpServer};
22
22
use actix_web_httpauth:: extractors:: basic:: BasicAuth ;
23
23
use actix_web_httpauth:: middleware:: HttpAuthentication ;
24
24
use actix_web_static_files:: ResourceFiles ;
25
+ use chrono:: { DateTime , NaiveDateTime , Timelike , Utc } ;
25
26
use clokwerk:: { AsyncScheduler , Scheduler , TimeUnits } ;
27
+ use filetime:: FileTime ;
26
28
use log:: warn;
27
29
use openssl:: ssl:: { SslAcceptor , SslFiletype , SslMethod } ;
28
30
29
31
include ! ( concat!( env!( "OUT_DIR" ) , "/generated.rs" ) ) ;
30
32
33
+ use std:: fs;
31
34
use std:: panic:: { catch_unwind, AssertUnwindSafe } ;
35
+ use std:: path:: Path ;
32
36
use std:: thread:: { self , JoinHandle } ;
33
37
use std:: time:: Duration ;
34
38
use tokio:: sync:: oneshot;
@@ -51,7 +55,7 @@ mod validator;
51
55
use error:: Error ;
52
56
use option:: CONFIG ;
53
57
use s3:: S3 ;
54
- use storage:: ObjectStorage ;
58
+ use storage:: { ObjectStorage , StorageDir } ;
55
59
56
60
// Global configurations
57
61
const MAX_EVENT_PAYLOAD_SIZE : usize = 102400 ;
@@ -69,6 +73,10 @@ async fn main() -> anyhow::Result<()> {
69
73
warn ! ( "could not populate local metadata. {:?}" , e) ;
70
74
}
71
75
76
+ // Move all exiting data.parquet file to their respective tmp directory
77
+ // they will be synced to object store on next s3 sync cycle
78
+ startup_sync ( ) ;
79
+
72
80
let ( localsync_handler, mut localsync_outbox, localsync_inbox) = run_local_sync ( ) ;
73
81
let ( mut s3sync_handler, mut s3sync_outbox, mut s3sync_inbox) = s3_sync ( ) ;
74
82
@@ -98,6 +106,66 @@ async fn main() -> anyhow::Result<()> {
98
106
}
99
107
}
100
108
109
+ fn startup_sync ( ) {
110
+ if !Path :: new ( & CONFIG . parseable . local_disk_path ) . exists ( ) {
111
+ return ;
112
+ }
113
+
114
+ for stream in metadata:: STREAM_INFO . list_streams ( ) {
115
+ let dir = StorageDir :: new ( & stream) ;
116
+ // if data.parquet file is not present then skip this stream
117
+ if !dir. parquet_path_exists ( ) {
118
+ continue ;
119
+ }
120
+ if let Err ( e) = dir. create_temp_dir ( ) {
121
+ log:: error!(
122
+ "Error creating tmp directory for {} due to error [{}]" ,
123
+ & stream,
124
+ e
125
+ ) ;
126
+ continue ;
127
+ }
128
+ // create prefix for this file from its last modified time
129
+ let path = dir. data_path . join ( "data.parquet" ) ;
130
+
131
+ // metadata.modified gives us system time
132
+ // This may not work on all platfomns
133
+ let metadata = match fs:: metadata ( & path) {
134
+ Ok ( meta) => meta,
135
+ Err ( err) => {
136
+ log:: warn!(
137
+ "Failed to get file metadata for {} due to {:?}. Skipping!" ,
138
+ path. display( ) ,
139
+ err
140
+ ) ;
141
+ continue ;
142
+ }
143
+ } ;
144
+
145
+ let last_modified = FileTime :: from_last_modification_time ( & metadata) ;
146
+ let last_modified = NaiveDateTime :: from_timestamp ( last_modified. unix_seconds ( ) , 0 ) ;
147
+ let last_modified: DateTime < Utc > = DateTime :: from_utc ( last_modified, Utc ) ;
148
+
149
+ let uri = utils:: date_to_prefix ( last_modified. date ( ) )
150
+ + & utils:: hour_to_prefix ( last_modified. hour ( ) )
151
+ + & utils:: minute_to_prefix (
152
+ last_modified. minute ( ) ,
153
+ storage:: OBJECT_STORE_DATA_GRANULARITY ,
154
+ )
155
+ . unwrap ( ) ;
156
+ let local_uri = str:: replace ( & uri, "/" , "." ) ;
157
+ let hostname = utils:: hostname_unchecked ( ) ;
158
+ let parquet_file_local = format ! ( "{}{}.data.parquet" , local_uri, hostname) ;
159
+ if let Err ( err) = dir. move_parquet_to_temp ( parquet_file_local) {
160
+ log:: warn!(
161
+ "Failed to move parquet file at {} to tmp directory due to error {}" ,
162
+ path. display( ) ,
163
+ err
164
+ )
165
+ }
166
+ }
167
+ }
168
+
101
169
fn s3_sync ( ) -> ( JoinHandle < ( ) > , oneshot:: Receiver < ( ) > , oneshot:: Sender < ( ) > ) {
102
170
let ( outbox_tx, outbox_rx) = oneshot:: channel :: < ( ) > ( ) ;
103
171
let ( inbox_tx, inbox_rx) = oneshot:: channel :: < ( ) > ( ) ;
0 commit comments