tidyverse · jennybc · Sep 9, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 10, 2025
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 # googledrive (development version)
 
+* `drive_upload()` and `drive_download()` support the conversion of a local
+  markdown file to a Google Doc and vice versa (#465, @ateucher).
+
 # googledrive 2.1.1
 
 * `drive_auth(subject =)` is a new argument that can be used with

diff --git a/R/utils-paths.R b/R/utils-paths.R
@@ -185,6 +185,9 @@ file_ext_safe <- function(x) {
 
 ## add an extension if it is not already present
 apply_extension <- function(path, ext) {
+  if (is.na(ext) || ext == "") {
+    return(path)
+  }
   ext_orig <- file_ext_safe(path)
   if (!identical(ext, ext_orig)) {
     path <- paste(path, ext, sep = ".")

diff --git a/data-raw/extension-mime-type-defaults.csv b/data-raw/extension-mime-type-defaults.csv
@@ -25,4 +25,5 @@ text/csv,csv
 application/vnd.oasis.opendocument.presentation,odp
 text/richtext,rtx
 application/zip,zip
-image/svg+xml,svg
+image/svg+xml,svg
+text/markdown,md
diff --git a/data-raw/mime-types-and-file-extensions.R b/data-raw/mime-types-and-file-extensions.R
@@ -0,0 +1,194 @@
+# Generate a table associating file extensions or "type" with MIME types.
+# Used in drive_mime_type() to do these sorts of translations:
+# Input type                        Input          MIME type
+# ---------------------------------|--------------|-----------------------------------------
+# Casual name for native Drive type "spreadsheet"  "application/vnd.google-apps.spreadsheet"
+# File extension                    "jpeg"         "image/jpeg"
+# MIME type                         "image/gif"    "image/gif"
+
+# So we need to muster file types, file extensions, and MIME types.
+
+library(here)
+library(tidyverse)
+library(httr)
+library(rvest)
+
+# The following table lists MIME types that are specific Google Workspace and
+# Google Drive
+url <- "https://developers.google.com/drive/api/v3/mime-types"
+
+google_mime_types <- GET(url) %>%
+  content() %>%
+  html_table(fill = TRUE) %>%
+  flatten() %>%
+  as_tibble() %>%
+  select(
+    mime_type = `MIME Type`,
+    description = Description
+  ) %>%
+  mutate(description = na_if(description, ""))
+nrow(google_mime_types) # 20
+
+# Another table we've made for the import and export MIME types for native Drive
+# files.
+import_and_export <- read_csv(
+  file = here("inst", "extdata", "data", "translate_mime_types.csv")
+)
+
+# Take the local MIME types that are supported for import or export, get rid of
+# duplicates, and add them to the Google Drive/Workspace-specific MIME types.
+mime_tbl <- import_and_export %>%
+  select(mime_type = mime_type_local) %>%
+  distinct() %>%
+  bind_rows(google_mime_types)
+nrow(mime_tbl)
+# 69 rows, i.e. 69 MIME types
+
+# mime::mimemap is a good source of associations between MIME types and  file
+# extensions.
+mime_ext <- mime::mimemap %>%
+  enframe(name = "ext", value = "mime_type") %>%
+  select(mime_type, ext)
+nrow(mime_ext) # 1548
+mime_ext %>%
+  summarise(across(everything(), n_distinct))
+# # A tibble: 1 × 2
+#   mime_type   ext
+#       <int> <int>
+# 1      1203  1548
+
+mime_tbl_2 <- mime_ext %>%
+  right_join(mime_tbl, by = "mime_type")
+nrow(mime_tbl_2) # 88
+# 69 -> 88 rows, because some MIME types are associated with multiple extensions
+
+# Example: JPEGs
+mime_tbl_2 |>
+  filter(str_detect(mime_type, "image/jpeg"))
+# # A tibble: 4 × 3
+#   mime_type  ext   description
+#   <chr>      <chr> <chr>
+# 1 image/jpeg jpeg  NA
+# 2 image/jpeg jpg   NA
+# 3 image/jpeg jpe   NA
+# 4 image/jpeg jfif  NA
+
+# weird that "text/rtf" appears in the Google Drive/Workspace world, but is not
+# covered by mime::mimemap
+mime_tbl_2 |>
+  filter(str_detect(mime_type, "rtf"))
+# # A tibble: 2 × 3
+#   mime_type       ext   description
+#   <chr>           <chr> <chr>
+# 1 application/rtf rtf   NA
+# 2 text/rtf        NA    NA
+
+mime_tbl_2 |>
+  group_by(mime_type) |>
+  count(sort = TRUE) |>
+  filter(n > 1)
+# # A tibble: 8 × 2
+# # Groups:   mime_type [8]
+#   mime_type                         n
+#   <chr>                         <int>
+# 1 application/vnd.ms-excel          6
+# 2 text/plain                        5
+# 3 image/jpeg                        4
+# 4 text/html                         3
+# 5 video/mp4                         3
+# 6 application/vnd.ms-powerpoint     2
+# 7 image/svg+xml                     2
+# 8 text/markdown                     2
+
+# We're going to need to resolve such situations where we can by declaring a
+# default extension for such MIME types.
+
+# Proposal: any MIME type that appears in the official export (and maybe
+# import?) list should have an associated file extension.
+# Note that this is *not* about import/export, but rather about upload/download.
+# We're just referring to import/export as a semi-authoritative source of
+# important MIME types.
+
+mime_tbl_2 |>
+  arrange(mime_type) |>
+  print(n = Inf)
+
+mime_tbl_2 |>
+  filter(is.na(ext)) |>
+  arrange(mime_type) |>
+  print(n = Inf)
+# rows that catch my eye
+# ...
+# 34 image/jpg                                                  NA    NA
+# ...
+# 36 image/x-bmp                                                NA    NA
+# 37 image/x-png                                                NA    NA
+# 38 text/richtext                                              NA    NA
+# 39 text/rtf                                                   NA    NA
+# 40 text/x-markdown                                            NA    NA
+
+# Fixup for special Google Drive/Workspace MIME types
+google_prefix <- "application/vnd.google-apps."
+mime_tbl_3 <- mime_tbl_2 %>%
+  mutate(
+    human_type = ifelse(
+      grepl(google_prefix, mime_type, fixed = TRUE),
+      sub(google_prefix, "", mime_type, fixed = TRUE),
+      ext
+    )
+  )
+
+mime_tbl_3 |>
+  arrange(mime_type) |>
+  print(n = Inf)
+
+mime_tbl_3 |>
+  count(is.na(ext))
+# # A tibble: 2 × 2
+#   `is.na(ext)`     n
+#   <lgl>        <int>
+# 1 FALSE           48
+# 2 TRUE            40
+
+# Where did this csv come from? these must be my choices
+default_ext <- here("data-raw", "extension-mime-type-defaults.csv") %>%
+  read_csv() %>%
+  mutate(default = TRUE)
+
+mime_tbl_4 <- mime_tbl_3 %>%
+  left_join(default_ext) %>%
+  mutate(
+    default = case_when(
+      is.na(ext) ~ NA,
+      is.na(default) ~ FALSE,
+      TRUE ~ TRUE
+    )
+  )
+
+mime_tbl_5 <- mime_tbl_4 %>%
+  add_row(
+    # TODO(jennybc): consider also "application/vnd.google.colaboratory"
+    mime_type = "application/vnd.google.colab",
+    ext = "ipynb",
+    description = "Colab notebook",
+    human_type = "colab",
+    default = TRUE
+  )
+
+# I want to set up extension affiliation for MIME types:
+# text/richtext --> rtf
+# text/rtf      --> rtf
+# text/x-markdown --> md
+# fmt: skip
+patch <- tribble(
+  ~mime_type, ~ext, ~default,
+  "text/richtext", "rtf", TRUE,
+  "text/rtf", "rtf", TRUE,
+  "text/x-markdown", "md", TRUE
+
+)
+mime_tbl_6 <- mime_tbl_5 %>%
+  rows_patch(patch, by = "mime_type") |>
+  arrange(mime_type, ext)
+
+write_csv(mime_tbl_6, file = here("inst", "extdata", "data", "mime_tbl.csv"))
diff --git a/data-raw/mime-types-google.R b/data-raw/mime-types-google.R
@@ -0,0 +1,79 @@
+# Generate a table of MIME types that maps between types that are specific to
+# Google Workspace and Google Drive and other MIME types
+
+# For example, what MIME types can be uploaded and converted to a Sheet?
+# Excel or csv, etc.
+
+# What MIME types can a Sheet be exported to as a local file?
+# Excel or csv or even pdf
+
+# Google Workspace and Google Drive supported MIME types
+# Example: application/vnd.google-apps.spreadsheet
+# https://developers.google.com/drive/api/v3/mime-types
+
+# https://developers.google.com/drive/api/v3/manage-downloads
+
+# Export MIME types for Google Workspace documents
+# https://developers.google.com/workspace/drive/api/guides/ref-export-formats
+
+library(tidyverse)
+library(here)
+library(googledrive)
+
+# it doesn't matter who you auth as, but you need to auth as somebody
+googledrive:::drive_auth_testing()
+
+# MIME types for local file <--> Drive file
+about <- drive_about()
+fmts <- about[c("importFormats", "exportFormats")]
+
+imports <- fmts %>%
+  pluck("importFormats") %>%
+  enframe(
+    name = "mime_type_local",
+    value = "mime_type_google"
+  ) %>%
+  unnest_longer(mime_type_google) %>%
+  mutate(action = "import")
+
+exports <- fmts %>%
+  pluck("exportFormats") %>%
+  enframe(
+    name = "mime_type_google",
+    value = "mime_type_local"
+  ) %>%
+  unnest_longer(mime_type_local) %>%
+  mutate(action = "export")
+
+translate_mime_types <- bind_rows(imports, exports)
+
+# where did this csv come from? these must be my decisions, because the
+# drive.files.export endpoint has `mimeType` as a required query parameter, i.e.
+# I see no basis for saying that the Drive API has default export MIME types
+defaults <- here("data-raw", "export-mime-type-defaults.csv") %>%
+  read_csv() %>%
+  mutate(
+    action = "export",
+    default = TRUE
+  )
+
+translate_mime_types <- translate_mime_types %>%
+  left_join(defaults) %>%
+  mutate(
+    default = case_when(
+      action == "import" ~ NA,
+      is.na(default) ~ FALSE,
+      TRUE ~ TRUE
+    )
+  )
+
+# be intentional about row order so diffs are easier to make sense of
+# I think it also makes sense to set column order accordingly
+translate_mime_types <- translate_mime_types %>%
+  arrange(action, mime_type_google, mime_type_local) %>%
+  select(action, mime_type_google, everything())
+
+write_csv(
+  translate_mime_types,
+  file = here("inst", "extdata", "data", "translate_mime_types.csv")
+)