Skip to content

Commit fe38eba

Browse files
ateucherjennybcgithub-actions[bot]
authored
Update file types supported by drive_download() (#465)
* Add default extension for markdown files * Run data-raw/mime-types.R * Add failing test for #465 * If mimetype does not have an extension in mime_tbl.csv NA is appended to the filename * Don't add extension if none in mime_types.csv * One script per csv file * Revisit the prep of the table for MIME types <--> extensions * Update snapshot * Test the utility instead The test won't work as written anymore, using the "text/x-markdown" MIME type. Instead of modifying it, I tested the filepath utility itself. * Make Air happy * Update data-raw/mime-types-google.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Add a new example markdown file * Add back a test of drive_download() * Add a test for upload of md to doc * Add NEWS bullet --------- Co-authored-by: Jenny Bryan <jenny.f.bryan@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 231dce1 commit fe38eba

File tree

14 files changed

+452
-155
lines changed

14 files changed

+452
-155
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# googledrive (development version)
22

3+
* `drive_upload()` and `drive_download()` support the conversion of a local
4+
markdown file to a Google Doc and vice versa (#465, @ateucher).
5+
36
# googledrive 2.1.1
47

58
* `drive_auth(subject =)` is a new argument that can be used with

R/utils-paths.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ file_ext_safe <- function(x) {
185185

186186
## add an extension if it is not already present
187187
apply_extension <- function(path, ext) {
188+
if (is.na(ext) || ext == "") {
189+
return(path)
190+
}
188191
ext_orig <- file_ext_safe(path)
189192
if (!identical(ext, ext_orig)) {
190193
path <- paste(path, ext, sep = ".")

data-raw/extension-mime-type-defaults.csv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ text/csv,csv
2525
application/vnd.oasis.opendocument.presentation,odp
2626
text/richtext,rtx
2727
application/zip,zip
28-
image/svg+xml,svg
28+
image/svg+xml,svg
29+
text/markdown,md
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# Generate a table associating file extensions or "type" with MIME types.
2+
# Used in drive_mime_type() to do these sorts of translations:
3+
# Input type Input MIME type
4+
# ---------------------------------|--------------|-----------------------------------------
5+
# Casual name for native Drive type "spreadsheet" "application/vnd.google-apps.spreadsheet"
6+
# File extension "jpeg" "image/jpeg"
7+
# MIME type "image/gif" "image/gif"
8+
9+
# So we need to muster file types, file extensions, and MIME types.
10+
11+
library(here)
12+
library(tidyverse)
13+
library(httr)
14+
library(rvest)
15+
16+
# The following table lists MIME types that are specific Google Workspace and
17+
# Google Drive
18+
url <- "https://developers.google.com/drive/api/v3/mime-types"
19+
20+
google_mime_types <- GET(url) %>%
21+
content() %>%
22+
html_table(fill = TRUE) %>%
23+
flatten() %>%
24+
as_tibble() %>%
25+
select(
26+
mime_type = `MIME Type`,
27+
description = Description
28+
) %>%
29+
mutate(description = na_if(description, ""))
30+
nrow(google_mime_types) # 20
31+
32+
# Another table we've made for the import and export MIME types for native Drive
33+
# files.
34+
import_and_export <- read_csv(
35+
file = here("inst", "extdata", "data", "translate_mime_types.csv")
36+
)
37+
38+
# Take the local MIME types that are supported for import or export, get rid of
39+
# duplicates, and add them to the Google Drive/Workspace-specific MIME types.
40+
mime_tbl <- import_and_export %>%
41+
select(mime_type = mime_type_local) %>%
42+
distinct() %>%
43+
bind_rows(google_mime_types)
44+
nrow(mime_tbl)
45+
# 69 rows, i.e. 69 MIME types
46+
47+
# mime::mimemap is a good source of associations between MIME types and file
48+
# extensions.
49+
mime_ext <- mime::mimemap %>%
50+
enframe(name = "ext", value = "mime_type") %>%
51+
select(mime_type, ext)
52+
nrow(mime_ext) # 1548
53+
mime_ext %>%
54+
summarise(across(everything(), n_distinct))
55+
# # A tibble: 1 × 2
56+
# mime_type ext
57+
# <int> <int>
58+
# 1 1203 1548
59+
60+
mime_tbl_2 <- mime_ext %>%
61+
right_join(mime_tbl, by = "mime_type")
62+
nrow(mime_tbl_2) # 88
63+
# 69 -> 88 rows, because some MIME types are associated with multiple extensions
64+
65+
# Example: JPEGs
66+
mime_tbl_2 |>
67+
filter(str_detect(mime_type, "image/jpeg"))
68+
# # A tibble: 4 × 3
69+
# mime_type ext description
70+
# <chr> <chr> <chr>
71+
# 1 image/jpeg jpeg NA
72+
# 2 image/jpeg jpg NA
73+
# 3 image/jpeg jpe NA
74+
# 4 image/jpeg jfif NA
75+
76+
# weird that "text/rtf" appears in the Google Drive/Workspace world, but is not
77+
# covered by mime::mimemap
78+
mime_tbl_2 |>
79+
filter(str_detect(mime_type, "rtf"))
80+
# # A tibble: 2 × 3
81+
# mime_type ext description
82+
# <chr> <chr> <chr>
83+
# 1 application/rtf rtf NA
84+
# 2 text/rtf NA NA
85+
86+
mime_tbl_2 |>
87+
group_by(mime_type) |>
88+
count(sort = TRUE) |>
89+
filter(n > 1)
90+
# # A tibble: 8 × 2
91+
# # Groups: mime_type [8]
92+
# mime_type n
93+
# <chr> <int>
94+
# 1 application/vnd.ms-excel 6
95+
# 2 text/plain 5
96+
# 3 image/jpeg 4
97+
# 4 text/html 3
98+
# 5 video/mp4 3
99+
# 6 application/vnd.ms-powerpoint 2
100+
# 7 image/svg+xml 2
101+
# 8 text/markdown 2
102+
103+
# We're going to need to resolve such situations where we can by declaring a
104+
# default extension for such MIME types.
105+
106+
# Proposal: any MIME type that appears in the official export (and maybe
107+
# import?) list should have an associated file extension.
108+
# Note that this is *not* about import/export, but rather about upload/download.
109+
# We're just referring to import/export as a semi-authoritative source of
110+
# important MIME types.
111+
112+
mime_tbl_2 |>
113+
arrange(mime_type) |>
114+
print(n = Inf)
115+
116+
mime_tbl_2 |>
117+
filter(is.na(ext)) |>
118+
arrange(mime_type) |>
119+
print(n = Inf)
120+
# rows that catch my eye
121+
# ...
122+
# 34 image/jpg NA NA
123+
# ...
124+
# 36 image/x-bmp NA NA
125+
# 37 image/x-png NA NA
126+
# 38 text/richtext NA NA
127+
# 39 text/rtf NA NA
128+
# 40 text/x-markdown NA NA
129+
130+
# Fixup for special Google Drive/Workspace MIME types
131+
google_prefix <- "application/vnd.google-apps."
132+
mime_tbl_3 <- mime_tbl_2 %>%
133+
mutate(
134+
human_type = ifelse(
135+
grepl(google_prefix, mime_type, fixed = TRUE),
136+
sub(google_prefix, "", mime_type, fixed = TRUE),
137+
ext
138+
)
139+
)
140+
141+
mime_tbl_3 |>
142+
arrange(mime_type) |>
143+
print(n = Inf)
144+
145+
mime_tbl_3 |>
146+
count(is.na(ext))
147+
# # A tibble: 2 × 2
148+
# `is.na(ext)` n
149+
# <lgl> <int>
150+
# 1 FALSE 48
151+
# 2 TRUE 40
152+
153+
# Where did this csv come from? these must be my choices
154+
default_ext <- here("data-raw", "extension-mime-type-defaults.csv") %>%
155+
read_csv() %>%
156+
mutate(default = TRUE)
157+
158+
mime_tbl_4 <- mime_tbl_3 %>%
159+
left_join(default_ext) %>%
160+
mutate(
161+
default = case_when(
162+
is.na(ext) ~ NA,
163+
is.na(default) ~ FALSE,
164+
TRUE ~ TRUE
165+
)
166+
)
167+
168+
mime_tbl_5 <- mime_tbl_4 %>%
169+
add_row(
170+
# TODO(jennybc): consider also "application/vnd.google.colaboratory"
171+
mime_type = "application/vnd.google.colab",
172+
ext = "ipynb",
173+
description = "Colab notebook",
174+
human_type = "colab",
175+
default = TRUE
176+
)
177+
178+
# I want to set up extension affiliation for MIME types:
179+
# text/richtext --> rtf
180+
# text/rtf --> rtf
181+
# text/x-markdown --> md
182+
# fmt: skip
183+
patch <- tribble(
184+
~mime_type, ~ext, ~default,
185+
"text/richtext", "rtf", TRUE,
186+
"text/rtf", "rtf", TRUE,
187+
"text/x-markdown", "md", TRUE
188+
189+
)
190+
mime_tbl_6 <- mime_tbl_5 %>%
191+
rows_patch(patch, by = "mime_type") |>
192+
arrange(mime_type, ext)
193+
194+
write_csv(mime_tbl_6, file = here("inst", "extdata", "data", "mime_tbl.csv"))

data-raw/mime-types-google.R

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Generate a table of MIME types that maps between types that are specific to
2+
# Google Workspace and Google Drive and other MIME types
3+
4+
# For example, what MIME types can be uploaded and converted to a Sheet?
5+
# Excel or csv, etc.
6+
7+
# What MIME types can a Sheet be exported to as a local file?
8+
# Excel or csv or even pdf
9+
10+
# Google Workspace and Google Drive supported MIME types
11+
# Example: application/vnd.google-apps.spreadsheet
12+
# https://developers.google.com/drive/api/v3/mime-types
13+
14+
# https://developers.google.com/drive/api/v3/manage-downloads
15+
16+
# Export MIME types for Google Workspace documents
17+
# https://developers.google.com/workspace/drive/api/guides/ref-export-formats
18+
19+
library(tidyverse)
20+
library(here)
21+
library(googledrive)
22+
23+
# it doesn't matter who you auth as, but you need to auth as somebody
24+
googledrive:::drive_auth_testing()
25+
26+
# MIME types for local file <--> Drive file
27+
about <- drive_about()
28+
fmts <- about[c("importFormats", "exportFormats")]
29+
30+
imports <- fmts %>%
31+
pluck("importFormats") %>%
32+
enframe(
33+
name = "mime_type_local",
34+
value = "mime_type_google"
35+
) %>%
36+
unnest_longer(mime_type_google) %>%
37+
mutate(action = "import")
38+
39+
exports <- fmts %>%
40+
pluck("exportFormats") %>%
41+
enframe(
42+
name = "mime_type_google",
43+
value = "mime_type_local"
44+
) %>%
45+
unnest_longer(mime_type_local) %>%
46+
mutate(action = "export")
47+
48+
translate_mime_types <- bind_rows(imports, exports)
49+
50+
# where did this csv come from? these must be my decisions, because the
51+
# drive.files.export endpoint has `mimeType` as a required query parameter, i.e.
52+
# I see no basis for saying that the Drive API has default export MIME types
53+
defaults <- here("data-raw", "export-mime-type-defaults.csv") %>%
54+
read_csv() %>%
55+
mutate(
56+
action = "export",
57+
default = TRUE
58+
)
59+
60+
translate_mime_types <- translate_mime_types %>%
61+
left_join(defaults) %>%
62+
mutate(
63+
default = case_when(
64+
action == "import" ~ NA,
65+
is.na(default) ~ FALSE,
66+
TRUE ~ TRUE
67+
)
68+
)
69+
70+
# be intentional about row order so diffs are easier to make sense of
71+
# I think it also makes sense to set column order accordingly
72+
translate_mime_types <- translate_mime_types %>%
73+
arrange(action, mime_type_google, mime_type_local) %>%
74+
select(action, mime_type_google, everything())
75+
76+
write_csv(
77+
translate_mime_types,
78+
file = here("inst", "extdata", "data", "translate_mime_types.csv")
79+
)

0 commit comments

Comments
 (0)