1- use std:: sync:: Arc ;
1+ use std:: {
2+ collections:: HashMap ,
3+ sync:: { Arc , LazyLock } ,
4+ } ;
25
3- use futures:: future:: try_join;
46use google_drive3:: {
57 api:: Scope ,
68 yup_oauth2:: { read_service_account_key, ServiceAccountAuthenticator } ,
@@ -10,11 +12,62 @@ use http_body_util::BodyExt;
1012use hyper_rustls:: HttpsConnector ;
1113use hyper_util:: client:: legacy:: connect:: HttpConnector ;
1214use indexmap:: IndexSet ;
13- use log:: debug ;
15+ use log:: warn ;
1416
1517use crate :: ops:: sdk:: * ;
1618
19+ struct ExportMimeType {
20+ text : & ' static str ,
21+ binary : & ' static str ,
22+ }
23+
1724const FOLDER_MIME_TYPE : & ' static str = "application/vnd.google-apps.folder" ;
25+ const FILE_MIME_TYPE : & ' static str = "application/vnd.google-apps.file" ;
26+ static EXPORT_MIME_TYPES : LazyLock < HashMap < & ' static str , ExportMimeType > > = LazyLock :: new ( || {
27+ HashMap :: from ( [
28+ (
29+ "application/vnd.google-apps.document" ,
30+ ExportMimeType {
31+ text : "text/markdown" ,
32+ binary : "application/pdf" ,
33+ } ,
34+ ) ,
35+ (
36+ "application/vnd.google-apps.spreadsheet" ,
37+ ExportMimeType {
38+ text : "text/csv" ,
39+ binary : "application/pdf" ,
40+ } ,
41+ ) ,
42+ (
43+ "application/vnd.google-apps.presentation" ,
44+ ExportMimeType {
45+ text : "text/plain" ,
46+ binary : "application/pdf" ,
47+ } ,
48+ ) ,
49+ (
50+ "application/vnd.google-apps.drawing" ,
51+ ExportMimeType {
52+ text : "image/svg+xml" ,
53+ binary : "image/png" ,
54+ } ,
55+ ) ,
56+ (
57+ "application/vnd.google-apps.script" ,
58+ ExportMimeType {
59+ text : "application/vnd.google-apps.script+json" ,
60+ binary : "application/vnd.google-apps.script+json" ,
61+ } ,
62+ ) ,
63+ ] )
64+ } ) ;
65+
66+ fn is_supported_file_type ( mime_type : & str ) -> bool {
67+ !mime_type. starts_with ( "application/vnd.google-apps." )
68+ || EXPORT_MIME_TYPES . contains_key ( mime_type)
69+ || mime_type == FILE_MIME_TYPE
70+ }
1871
1972#[ derive( Debug , Deserialize ) ]
2073pub struct Spec {
@@ -91,11 +144,21 @@ impl Executor {
91144 let ( _, files) = list_call. doit ( ) . await ?;
92145 if let Some ( files) = files. files {
93146 for file in files {
94- if let Some ( id) = file. id {
95- if file. mime_type . as_ref ( ) == Some ( & FOLDER_MIME_TYPE . to_string ( ) ) {
96- Box :: pin ( self . traverse_folder ( & id, visited_folder_ids, result) ) . await ?;
97- } else {
98- result. insert ( KeyValue :: Str ( Arc :: from ( id) ) ) ;
147+ match ( file. id , file. mime_type ) {
148+ ( Some ( id) , Some ( mime_type) ) => {
149+ if mime_type == FOLDER_MIME_TYPE {
150+ Box :: pin ( self . traverse_folder ( & id, visited_folder_ids, result) )
151+ . await ?;
152+ } else if is_supported_file_type ( & mime_type) {
153+ result. insert ( KeyValue :: Str ( Arc :: from ( id) ) ) ;
154+ } else {
155+ warn ! ( "Skipping file with unsupported mime type: id={id}, mime_type={mime_type}, name={:?}" , file. name) ;
156+ }
157+ }
158+ ( id, mime_type) => {
159+ warn ! (
160+ "Skipping file with incomplete metadata: id={id:?}, mime_type={mime_type:?}" ,
161+ ) ;
99162 }
100163 }
101164 }
@@ -121,17 +184,32 @@ impl SourceExecutor for Executor {
121184 async fn get_value ( & self , key : & KeyValue ) -> Result < Option < FieldValues > > {
122185 let file_id = key. str_value ( ) ?;
123186
124- let filename = async {
125- let ( _, file) = self
126- . drive_hub
187+ let ( _, file) = self
188+ . drive_hub
189+ . files ( )
190+ . get ( file_id)
191+ . add_scope ( Scope :: Readonly )
192+ . doit ( )
193+ . await ?;
194+
195+ let resp_body = if let Some ( export_mime_type) = file
196+ . mime_type
197+ . as_ref ( )
198+ . and_then ( |mime_type| EXPORT_MIME_TYPES . get ( mime_type. as_str ( ) ) )
199+ {
200+ let target_mime_type = if self . binary {
201+ export_mime_type. binary
202+ } else {
203+ export_mime_type. text
204+ } ;
205+ self . drive_hub
127206 . files ( )
128- . get ( file_id)
207+ . export ( & file_id, target_mime_type )
129208 . add_scope ( Scope :: Readonly )
130209 . doit ( )
131- . await ?;
132- anyhow:: Ok ( file. name . unwrap_or_default ( ) )
133- } ;
134- let body = async {
210+ . await ?
211+ . into_body ( )
212+ } else {
135213 let ( resp, _) = self
136214 . drive_hub
137215 . files ( )
@@ -140,13 +218,11 @@ impl SourceExecutor for Executor {
140218 . param ( "alt" , "media" )
141219 . doit ( )
142220 . await ?;
143- let content = resp. into_body ( ) . collect ( ) . await ?;
144- anyhow:: Ok ( content)
221+ resp. into_body ( )
145222 } ;
146- let ( filename, content) = try_join ( filename, body) . await ?;
147-
223+ let content = resp_body. collect ( ) . await ?;
148224 let mut fields = Vec :: with_capacity ( 2 ) ;
149- fields. push ( filename . into ( ) ) ;
225+ fields. push ( file . name . unwrap_or_default ( ) . into ( ) ) ;
150226 if self . binary {
151227 fields. push ( content. to_bytes ( ) . to_vec ( ) . into ( ) ) ;
152228 } else {
0 commit comments