@@ -126,7 +126,7 @@ def download_file(object)
126126 )
127127
128128 Rails . logger . info ( "Downloaded #{ object . key } to #{ local_file_path } " )
129- if object . key . end_with? ( ".gz" )
129+ if object . key . end_with? ( ".gz" ) && ! object . key . end_with? ( ".parquet.gz" )
130130 ungzipped_file_path = ungzip_file ( local_file_path )
131131 Rails . logger . info ( "Ungzipped to #{ ungzipped_file_path } " )
132132 end
@@ -284,48 +284,61 @@ def upload_file(file_path)
284284 relative_path = Pathname . new ( file_path ) . relative_path_from ( Pathname . new ( root_dir ) ) . to_s
285285 s3_key = s3_prefix . present? ? File . join ( s3_prefix , File . basename ( relative_path ) ) : relative_path
286286
287- # Create a temporary gzipped version of the file
288- gzipped_file_path = "#{ file_path } .gz"
289-
290287 begin
291- Rails . logger . info ( "Compressing and uploading #{ file_path } to s3://#{ s3_bucket } /#{ s3_key } " )
288+ Rails . logger . info ( "Uploading #{ file_path } to s3://#{ s3_bucket } /#{ s3_key } " )
292289
293- # Compress the file
294- Zlib :: GzipWriter . open ( gzipped_file_path ) do | gz |
290+ if file_path . end_with? ( ".parquet" )
291+ # Upload parquet files directly without compression
295292 File . open ( file_path , "rb" ) do |file |
296- gz . write ( file . read )
293+ s3 . put_object (
294+ bucket : s3_bucket ,
295+ key : s3_key ,
296+ body : file
297+ )
298+ end
299+ Rails . logger . info ( "Successfully uploaded #{ file_path } to s3://#{ s3_bucket } /#{ s3_key } " )
300+ else
301+ # Create a temporary gzipped version of the file
302+ gzipped_file_path = "#{ file_path } .gz"
303+
304+ # Compress the file
305+ Zlib ::GzipWriter . open ( gzipped_file_path ) do |gz |
306+ File . open ( file_path , "rb" ) do |file |
307+ gz . write ( file . read )
308+ end
297309 end
298- end
299310
300- # Upload the gzipped file
301- File . open ( gzipped_file_path , "rb" ) do |file |
302- s3 . put_object (
303- bucket : s3_bucket ,
304- key : "#{ s3_key } .gz" ,
305- body : file ,
306- content_encoding : "gzip" ,
307- )
308- end
311+ # Upload the gzipped file
312+ File . open ( gzipped_file_path , "rb" ) do |file |
313+ s3 . put_object (
314+ bucket : s3_bucket ,
315+ key : "#{ s3_key } .gz" ,
316+ body : file ,
317+ content_encoding : "gzip" ,
318+ )
319+ end
320+
321+ Rails . logger . info ( "Successfully uploaded #{ file_path } to s3://#{ s3_bucket } /#{ s3_key } .gz" )
309322
310- Rails . logger . info ( "Successfully uploaded #{ file_path } to s3://#{ s3_bucket } /#{ s3_key } .gz" )
323+ # Clean up temporary gzipped file
324+ File . delete ( gzipped_file_path ) if File . exist? ( gzipped_file_path )
325+ end
311326 rescue Aws ::S3 ::Errors ::ServiceError , StandardError => e
312327 Rails . logger . error ( "Failed to upload #{ file_path } : #{ e . message } " )
313328 raise e
314- ensure
315- # Clean up temporary gzipped file
316- File . delete ( gzipped_file_path ) if File . exist? ( gzipped_file_path )
317329 end
318330 end
319331
320332 def should_upload? ( file_path )
321333 relative_path = Pathname . new ( file_path ) . relative_path_from ( Pathname . new ( root_dir ) ) . to_s
322334 s3_key = s3_prefix . present? ? File . join ( s3_prefix , relative_path ) : relative_path
335+ s3_key = "#{ s3_key } .gz" unless file_path . end_with? ( ".parquet" )
323336
324337 begin
325338 # Check if file exists in S3
326339 response = s3 . head_object (
327340 bucket : s3_bucket ,
328- key : " #{ s3_key } .gz" ,
341+ key : s3_key ,
329342 )
330343
331344 # Compare modification times
0 commit comments