@@ -23,35 +23,53 @@ def connect_google_drive(credentials_file, impersonate_user=None):
2323 print (f"Failed to connect to Google Drive: { e } " )
2424
2525def download_file (drive , file_obj , base_path ):
26+ print (f"Downloading file: { file_obj ['name' ]} to { base_path } " )
2627 try :
2728 file_name = file_obj ['name' ]
2829 file_id = file_obj ['id' ]
2930
3031 folder_path = base_path
32+
33+ # Handle parents (folders)
3134 if 'parents' in file_obj :
3235 for parent_id in file_obj ['parents' ]:
3336 parent_folder = drive .files ().get (fileId = parent_id ).execute ()
34- if parent_folder ['name' ] == 'My Drive' :
35- continue
36- folder_path = os .path .join (folder_path , parent_folder ['name' ])
37+ parent_folder_name = parent_folder ['name' ]
38+
39+ # Update folder_path to include the parent folder
40+ folder_path = os .path .join (folder_path , parent_folder_name )
3741
38- file_path = os .path .join (folder_path , file_name )
42+ # Update folder_path to include the current file's name
43+ folder_path = os .path .join (folder_path , file_name )
3944
4045 if 'mimeType' in file_obj and file_obj ['mimeType' ] == 'application/vnd.google-apps.folder' :
41- if not os .path .exists (file_path ):
42- os .makedirs (file_path )
46+ if not os .path .exists (folder_path ):
47+ os .makedirs (folder_path )
4348 folder_files = drive .files ().list (q = f"'{ file_id } ' in parents" ).execute ().get ('files' , [])
4449 for folder_file in folder_files :
4550 download_file (drive , folder_file , folder_path )
4651 else :
47- download_url = drive .files ().get_media (fileId = file_id ).execute ()
48- with open (file_path , 'wb' ) as fh :
49- fh .write (download_url )
50-
51- system .print_debug (f"File downloaded to: { file_path } " )
52+ try :
53+ # Check if the file is a Google Docs type
54+ if 'application/vnd.google-apps' in file_obj .get ('mimeType' , '' ):
55+ # For Google Docs Editors files, use export instead of GetMedia
56+ response = drive .files ().export (fileId = file_id , mimeType = 'application/pdf' ).execute ()
57+ with open (folder_path , 'wb' ) as f :
58+ f .write (response )
59+ else :
60+ # For other file types, use GetMedia
61+ content = drive .files ().get_media (fileId = file_id ).execute ()
62+ with open (folder_path , 'wb' ) as f :
63+ f .write (content )
64+ except Exception as e :
65+ print (f"Failed to write file: { e } " )
66+
67+ system .print_debug (f"File downloaded to: { folder_path } " )
5268 except Exception as e :
5369 print (f"Failed to download file: { e } " )
5470
71+
72+
5573def list_files (drive , impersonate_user = None ):
5674 try :
5775 query = "'root' in parents"
@@ -88,20 +106,13 @@ def execute(args):
88106 if drive :
89107 files = list_files (drive , impersonate_user )
90108 for file_obj in files :
91- download_file (drive , file_obj , "data/google_drive" )
109+
110+ if 'mimeType' in file_obj and file_obj ['mimeType' ] == 'application/vnd.google-apps.document' or file_obj ['mimeType' ] == 'application/vnd.google-apps.spreadsheet' or file_obj ['mimeType' ] == 'application/vnd.google-apps.presentation' or file_obj ['mimeType' ] == 'application/vnd.google-apps.drawing' or file_obj ['mimeType' ] == 'application/vnd.google-apps.script' :
111+ file_obj ['name' ] = file_obj ['name' ] + '-runtime.pdf'
112+
92113 file_id = file_obj ['id' ]
93114 file_name = file_obj ['name' ]
94- if 'mimeType' in file_obj and file_obj ['mimeType' ] == 'application/vnd.google-apps.folder' :
95- continue
96-
97- parent_folder_ids = file_obj .get ('parents' , [])
98115 folder_path = "data/google_drive"
99- if parent_folder_ids :
100- for parent_id in parent_folder_ids :
101- parent_folder = drive .files ().get (fileId = parent_id ).execute ()
102- if parent_folder ['name' ] == 'My Drive' :
103- continue
104- folder_path = os .path .join (folder_path , parent_folder ['name' ])
105116
106117 file_path = os .path .join (folder_path , file_name )
107118
@@ -115,9 +126,10 @@ def execute(args):
115126 is_cache_enabled = True
116127
117128 if is_cache_enabled :
118- download_file (drive , file_obj , "data/google_drive" )
129+ download_file (drive , file_obj , "data/google_drive/ " )
119130
120- matches = system .read_match_strings (file_path , 'gdrive' )
131+ matches = system .read_match_strings (file_path , 'gdrive_workspace' )
132+ file_name = file_name .replace ('-runtime.pdf' , '' )
121133 if matches :
122134 for match in matches :
123135 results .append ({
@@ -136,8 +148,8 @@ def execute(args):
136148 else :
137149 system .print_error ("No Google Drive connection details found in connection file" )
138150
139- if not is_cache_enabled :
140- os .system ("rm -rf data/google_drive" )
151+ """ if not is_cache_enabled:
152+ os.system("rm -rf data/google_drive")"""
141153
142154 return results
143155
0 commit comments