Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# googledrive (development version)

* `drive_upload()` and `drive_download()` support the conversion of a local
markdown file to a Google Doc and vice versa (#465, @ateucher).

# googledrive 2.1.1

* `drive_auth(subject =)` is a new argument that can be used with
Expand Down
3 changes: 3 additions & 0 deletions R/utils-paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ file_ext_safe <- function(x) {

## add an extension if it is not already present
apply_extension <- function(path, ext) {
if (is.na(ext) || ext == "") {
return(path)
}
ext_orig <- file_ext_safe(path)
if (!identical(ext, ext_orig)) {
path <- paste(path, ext, sep = ".")
Expand Down
3 changes: 2 additions & 1 deletion data-raw/extension-mime-type-defaults.csv
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ text/csv,csv
application/vnd.oasis.opendocument.presentation,odp
text/richtext,rtx
application/zip,zip
image/svg+xml,svg
image/svg+xml,svg
text/markdown,md
194 changes: 194 additions & 0 deletions data-raw/mime-types-and-file-extensions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
# Generate a table associating file extensions or "type" with MIME types.
# Used in drive_mime_type() to do these sorts of translations:
# Input type Input MIME type
# ---------------------------------|--------------|-----------------------------------------
# Casual name for native Drive type "spreadsheet" "application/vnd.google-apps.spreadsheet"
# File extension "jpeg" "image/jpeg"
# MIME type "image/gif" "image/gif"

# So we need to muster file types, file extensions, and MIME types.

library(here)
library(tidyverse)
library(httr)
library(rvest)

# The following table lists MIME types that are specific Google Workspace and
# Google Drive
url <- "https://developers.google.com/drive/api/v3/mime-types"

google_mime_types <- GET(url) %>%
content() %>%
html_table(fill = TRUE) %>%
flatten() %>%
as_tibble() %>%
select(
mime_type = `MIME Type`,
description = Description
) %>%
mutate(description = na_if(description, ""))
nrow(google_mime_types) # 20

# Another table we've made for the import and export MIME types for native Drive
# files.
import_and_export <- read_csv(
file = here("inst", "extdata", "data", "translate_mime_types.csv")
)

# Take the local MIME types that are supported for import or export, get rid of
# duplicates, and add them to the Google Drive/Workspace-specific MIME types.
mime_tbl <- import_and_export %>%
select(mime_type = mime_type_local) %>%
distinct() %>%
bind_rows(google_mime_types)
nrow(mime_tbl)
# 69 rows, i.e. 69 MIME types

# mime::mimemap is a good source of associations between MIME types and file
# extensions.
mime_ext <- mime::mimemap %>%
enframe(name = "ext", value = "mime_type") %>%
select(mime_type, ext)
nrow(mime_ext) # 1548
mime_ext %>%
summarise(across(everything(), n_distinct))
# # A tibble: 1 × 2
# mime_type ext
# <int> <int>
# 1 1203 1548

mime_tbl_2 <- mime_ext %>%
right_join(mime_tbl, by = "mime_type")
nrow(mime_tbl_2) # 88
# 69 -> 88 rows, because some MIME types are associated with multiple extensions

# Example: JPEGs
mime_tbl_2 |>
filter(str_detect(mime_type, "image/jpeg"))
# # A tibble: 4 × 3
# mime_type ext description
# <chr> <chr> <chr>
# 1 image/jpeg jpeg NA
# 2 image/jpeg jpg NA
# 3 image/jpeg jpe NA
# 4 image/jpeg jfif NA

# weird that "text/rtf" appears in the Google Drive/Workspace world, but is not
# covered by mime::mimemap
mime_tbl_2 |>
filter(str_detect(mime_type, "rtf"))
# # A tibble: 2 × 3
# mime_type ext description
# <chr> <chr> <chr>
# 1 application/rtf rtf NA
# 2 text/rtf NA NA

mime_tbl_2 |>
group_by(mime_type) |>
count(sort = TRUE) |>
filter(n > 1)
# # A tibble: 8 × 2
# # Groups: mime_type [8]
# mime_type n
# <chr> <int>
# 1 application/vnd.ms-excel 6
# 2 text/plain 5
# 3 image/jpeg 4
# 4 text/html 3
# 5 video/mp4 3
# 6 application/vnd.ms-powerpoint 2
# 7 image/svg+xml 2
# 8 text/markdown 2

# We're going to need to resolve such situations where we can by declaring a
# default extension for such MIME types.

# Proposal: any MIME type that appears in the official export (and maybe
# import?) list should have an associated file extension.
# Note that this is *not* about import/export, but rather about upload/download.
# We're just referring to import/export as a semi-authoritative source of
# important MIME types.

mime_tbl_2 |>
arrange(mime_type) |>
print(n = Inf)

mime_tbl_2 |>
filter(is.na(ext)) |>
arrange(mime_type) |>
print(n = Inf)
# rows that catch my eye
# ...
# 34 image/jpg NA NA
# ...
# 36 image/x-bmp NA NA
# 37 image/x-png NA NA
# 38 text/richtext NA NA
# 39 text/rtf NA NA
# 40 text/x-markdown NA NA

# Fixup for special Google Drive/Workspace MIME types
google_prefix <- "application/vnd.google-apps."
mime_tbl_3 <- mime_tbl_2 %>%
mutate(
human_type = ifelse(
grepl(google_prefix, mime_type, fixed = TRUE),
sub(google_prefix, "", mime_type, fixed = TRUE),
ext
)
)

mime_tbl_3 |>
arrange(mime_type) |>
print(n = Inf)

mime_tbl_3 |>
count(is.na(ext))
# # A tibble: 2 × 2
# `is.na(ext)` n
# <lgl> <int>
# 1 FALSE 48
# 2 TRUE 40

# Where did this csv come from? these must be my choices
default_ext <- here("data-raw", "extension-mime-type-defaults.csv") %>%
read_csv() %>%
mutate(default = TRUE)

mime_tbl_4 <- mime_tbl_3 %>%
left_join(default_ext) %>%
mutate(
default = case_when(
is.na(ext) ~ NA,
is.na(default) ~ FALSE,
TRUE ~ TRUE
)
)

mime_tbl_5 <- mime_tbl_4 %>%
add_row(
# TODO(jennybc): consider also "application/vnd.google.colaboratory"
mime_type = "application/vnd.google.colab",
ext = "ipynb",
description = "Colab notebook",
human_type = "colab",
default = TRUE
)

# I want to set up extension affiliation for MIME types:
# text/richtext --> rtf
# text/rtf --> rtf
# text/x-markdown --> md
# fmt: skip
patch <- tribble(
~mime_type, ~ext, ~default,
"text/richtext", "rtf", TRUE,
"text/rtf", "rtf", TRUE,
"text/x-markdown", "md", TRUE

)
mime_tbl_6 <- mime_tbl_5 %>%
rows_patch(patch, by = "mime_type") |>
arrange(mime_type, ext)

write_csv(mime_tbl_6, file = here("inst", "extdata", "data", "mime_tbl.csv"))
79 changes: 79 additions & 0 deletions data-raw/mime-types-google.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Generate a table of MIME types that maps between types that are specific to
# Google Workspace and Google Drive and other MIME types

# For example, what MIME types can be uploaded and converted to a Sheet?
# Excel or csv, etc.

# What MIME types can a Sheet be exported to as a local file?
# Excel or csv or even pdf

# Google Workspace and Google Drive supported MIME types
# Example: application/vnd.google-apps.spreadsheet
# https://developers.google.com/drive/api/v3/mime-types

# https://developers.google.com/drive/api/v3/manage-downloads

# Export MIME types for Google Workspace documents
# https://developers.google.com/workspace/drive/api/guides/ref-export-formats

library(tidyverse)
library(here)
library(googledrive)

# it doesn't matter who you auth as, but you need to auth as somebody
googledrive:::drive_auth_testing()

# MIME types for local file <--> Drive file
about <- drive_about()
fmts <- about[c("importFormats", "exportFormats")]

imports <- fmts %>%
pluck("importFormats") %>%
enframe(
name = "mime_type_local",
value = "mime_type_google"
) %>%
unnest_longer(mime_type_google) %>%
mutate(action = "import")

exports <- fmts %>%
pluck("exportFormats") %>%
enframe(
name = "mime_type_google",
value = "mime_type_local"
) %>%
unnest_longer(mime_type_local) %>%
mutate(action = "export")

translate_mime_types <- bind_rows(imports, exports)

# where did this csv come from? these must be my decisions, because the
# drive.files.export endpoint has `mimeType` as a required query parameter, i.e.
# I see no basis for saying that the Drive API has default export MIME types
defaults <- here("data-raw", "export-mime-type-defaults.csv") %>%
read_csv() %>%
mutate(
action = "export",
default = TRUE
)

translate_mime_types <- translate_mime_types %>%
left_join(defaults) %>%
mutate(
default = case_when(
action == "import" ~ NA,
is.na(default) ~ FALSE,
TRUE ~ TRUE
)
)

# be intentional about row order so diffs are easier to make sense of
# I think it also makes sense to set column order accordingly
translate_mime_types <- translate_mime_types %>%
arrange(action, mime_type_google, mime_type_local) %>%
select(action, mime_type_google, everything())

write_csv(
translate_mime_types,
file = here("inst", "extdata", "data", "translate_mime_types.csv")
)
Loading
Loading