11import logging
22from functools import reduce
33from itertools import product
4- from os import remove
5- from os .path import join as path_join , abspath
4+ from os import path , remove
65
76from click import command , argument , option , style
87from cloudinary import api
98
109from cloudinary_cli .utils .api_utils import query_cld_folder , upload_file , download_file
1110from cloudinary_cli .utils .file_utils import walk_dir , delete_empty_dirs , get_destination_folder
12- from cloudinary_cli .utils .json_utils import print_json
13- from cloudinary_cli .utils .utils import logger , run_tasks_concurrently , get_user_action
11+ from cloudinary_cli .utils .json_utils import print_json , read_json_from_file , write_json_to_file
12+ from cloudinary_cli .utils .utils import logger , run_tasks_concurrently , get_user_action , invert_dict
1413
1514_DEFAULT_DELETION_BATCH_SIZE = 30
1615_DEFAULT_CONCURRENT_WORKERS = 30
1716
17+ _SYNC_META_FILE = '.cld-sync'
18+
1819
1920@command ("sync" ,
2021 short_help = "Synchronize between a local directory and a Cloudinary folder." ,
@@ -50,31 +51,52 @@ class SyncDir:
5051 def __init__ (self , local_dir , remote_dir , include_hidden , concurrent_workers , force , keep_deleted ,
5152 deletion_batch_size ):
5253 self .local_dir = local_dir
53- self .remote_dir = remote_dir
54+ self .remote_dir = remote_dir . strip ( '/' )
5455 self .include_hidden = include_hidden
5556 self .concurrent_workers = concurrent_workers
5657 self .force = force
5758 self .keep_unique = keep_deleted
5859 self .deletion_batch_size = deletion_batch_size
5960
61+ self .sync_meta_file = path .join (self .local_dir , _SYNC_META_FILE )
62+
6063 self .verbose = logger .getEffectiveLevel () < logging .INFO
6164
62- self .local_files = walk_dir (abspath (self .local_dir ), include_hidden )
65+ self .local_files = walk_dir (path . abspath (self .local_dir ), include_hidden )
6366 logger .info (f"Found { len (self .local_files )} items in local folder '{ local_dir } '" )
6467
6568 self .remote_files = query_cld_folder (self .remote_dir )
6669 logger .info (f"Found { len (self .remote_files )} items in Cloudinary folder '{ self .remote_dir } '" )
6770
6871 local_file_names = self .local_files .keys ()
6972 remote_file_names = self .remote_files .keys ()
73+ """
74+ Cloudinary is a very permissive service. When uploading files that contain invalid characters,
75+ unicode characters, etc, Cloudinary does the best effort to store those files.
76+
77+ Usually Cloudinary sanitizes those file names and strips invalid characters. Although it is good best effort for
78+ a general use case, when syncing local folder with Cloudinary, it is not the best option, since directories will
79+ be always out-of-sync.
80+
81+ To overcome this limitation, cloudinary-cli keeps .cld-sync hidden file in the sync directory that contains a
82+ mapping of the diverse file names. This file keeps tracking on the files and allows syncing in both directions.
83+ """
84+ self .diverse_file_names = read_json_from_file (self .sync_meta_file , does_not_exist_ok = True )
85+ inverted_diverse_file_names = invert_dict (self .diverse_file_names )
86+
87+ cloudinarized_local_file_names = [self .diverse_file_names .get (f , f ) for f in local_file_names ]
88+ self .recovered_remote_files = {inverted_diverse_file_names .get (f , f ): dt for f , dt in self .remote_files .items ()}
89+
90+ self .unique_remote_file_names = remote_file_names - cloudinarized_local_file_names
91+ self .unique_local_file_names = local_file_names - self .recovered_remote_files .keys ()
7092
71- self .unique_remote_file_names = remote_file_names - local_file_names
72- self .unique_local_file_names = local_file_names - remote_file_names
7393 common_file_names = local_file_names - self .unique_local_file_names
7494
75- self .out_of_sync_file_names = self ._get_out_of_sync_file_names (common_file_names )
95+ self .out_of_sync_local_file_names = self ._get_out_of_sync_file_names (common_file_names )
96+ self .out_of_sync_remote_file_names = set (self .diverse_file_names .get (f , f ) for f in
97+ self .out_of_sync_local_file_names )
7698
77- skipping = len (common_file_names ) - len (self .out_of_sync_file_names )
99+ skipping = len (common_file_names ) - len (self .out_of_sync_local_file_names )
78100
79101 if skipping :
80102 logger .info (f"Skipping { skipping } items" )
@@ -83,12 +105,16 @@ def _get_out_of_sync_file_names(self, common_file_names):
83105 logger .debug ("\n Calculating differences...\n " )
84106 out_of_sync_file_names = set ()
85107 for f in common_file_names :
86- if self .local_files [f ]['etag' ] != self .remote_files [f ]['etag' ]:
87- logger .warning (f"{ f } is out of sync" )
88- logger .debug (f"Local etag: { self .local_files [f ]['etag' ]} . Remote etag: { self .remote_files [f ]['etag' ]} " )
108+ local_etag = self .local_files [f ]['etag' ]
109+ remote_etag = self .recovered_remote_files [f ]['etag' ]
110+ if local_etag != remote_etag :
111+ logger .warning (f"{ f } is out of sync" +
112+ (f" with '{ self .diverse_file_names [f ]} " if f in self .diverse_file_names else "" ))
113+ logger .debug (f"Local etag: { local_etag } . Remote etag: { remote_etag } " )
89114 out_of_sync_file_names .add (f )
90115 continue
91- logger .debug (f"{ f } is in sync" )
116+ logger .debug (f"'{ f } ' is in sync" +
117+ (f" with '{ self .diverse_file_names [f ]} " if f in self .diverse_file_names else "" ))
92118
93119 return out_of_sync_file_names
94120
@@ -97,7 +123,7 @@ def push(self):
97123 logger .info ("Aborting..." )
98124 return False
99125
100- files_to_push = self .unique_local_file_names | self .out_of_sync_file_names
126+ files_to_push = self .unique_local_file_names | self .out_of_sync_local_file_names
101127 if not files_to_push :
102128 return True
103129
@@ -109,14 +135,37 @@ def push(self):
109135 'invalidate' : True ,
110136 'resource_type' : 'auto'
111137 }
138+ upload_results = {}
112139 uploads = []
113140 for file in files_to_push :
114141 folder = get_destination_folder (self .remote_dir , file )
115142
116- uploads .append ((self .local_files [file ]['path' ], {** options , 'folder' : folder }))
143+ uploads .append ((self .local_files [file ]['path' ], {** options , 'folder' : folder }, upload_results ))
117144
118145 run_tasks_concurrently (upload_file , uploads , self .concurrent_workers )
119146
147+ self .save_sync_meta_file (upload_results )
148+
149+ def save_sync_meta_file (self , upload_results ):
150+ diverse_filenames = {}
151+ for local_path , remote_path in upload_results .items ():
152+ local = path .relpath (local_path , self .local_dir )
153+ remote = path .relpath (remote_path , self .remote_dir )
154+ if local != remote :
155+ diverse_filenames [local ] = remote
156+
157+ # filter out outdated meta file entries
158+ current_diverse_files = {k : v for k , v in self .diverse_file_names .items () if k in self .local_files .keys ()}
159+
160+ if diverse_filenames or current_diverse_files != self .diverse_file_names :
161+ current_diverse_files .update (diverse_filenames )
162+ try :
163+ write_json_to_file (current_diverse_files , self .sync_meta_file )
164+ logger .debug (f"Updated '{ self .sync_meta_file } ' file" )
165+ except Exception as e :
166+ # Meta file is not critical for the sync itself, in case we cannot write it, we just log a warning
167+ logger .warning (f"Failed updating '{ self .sync_meta_file } ' file: { e } " )
168+
120169 def _handle_unique_remote_files (self ):
121170 handled = self ._handle_files_deletion (len (self .unique_remote_file_names ), "remote" )
122171 if handled is not None :
@@ -155,7 +204,7 @@ def pull(self):
155204 if not self ._handle_unique_local_files ():
156205 return False
157206
158- files_to_pull = self .unique_remote_file_names | self .out_of_sync_file_names
207+ files_to_pull = self .unique_remote_file_names | self .out_of_sync_remote_file_names
159208
160209 if not files_to_pull :
161210 return True
@@ -164,7 +213,7 @@ def pull(self):
164213 downloads = []
165214 for file in files_to_pull :
166215 remote_file = self .remote_files [file ]
167- local_path = abspath (path_join (self .local_dir , file ))
216+ local_path = path . abspath (path . join (self .local_dir , file ))
168217
169218 downloads .append ((remote_file , local_path ))
170219
@@ -177,9 +226,9 @@ def _handle_unique_local_files(self):
177226
178227 logger .info (f"Deleting { len (self .unique_local_file_names )} local files..." )
179228 for file in self .unique_local_file_names :
180- path = abspath (self .local_files [file ]['path' ])
181- remove (path )
182- logger .info (f"Deleted '{ path } '" )
229+ full_path = path . abspath (self .local_files [file ]['path' ])
230+ remove (full_path )
231+ logger .info (f"Deleted '{ full_path } '" )
183232
184233 logger .info ("Deleting empty folders..." )
185234 delete_empty_dirs (self .local_dir )
0 commit comments