Skip to content

Commit 5423d1b

Browse files
AddisonSchillercslzchen
authored andcommitted
Look for mfr query param from mfr in v1 provider
We can export .gdoc as a pdf if we know its coming from mfr. Added `alt_export` functions and properties to Gdrive utils and metadata. Gdrive download now looks for 'mfr' in its kwargs and will request file as pdf if used properly.
1 parent f6e049a commit 5423d1b

File tree

6 files changed

+95
-14
lines changed

6 files changed

+95
-14
lines changed

tests/providers/googledrive/test_metadata.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def test_file_metadata_drive(self, basepath, root_provider_fixtures):
4444
assert parsed.materialized_path == str(path)
4545
assert parsed.is_google_doc is False
4646
assert parsed.export_name == item['title']
47+
assert parsed.alt_export_name == 'PART_1420130849837.pdf'
4748

4849
def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures):
4950
item = root_provider_fixtures['file_forward_slash']
@@ -66,6 +67,7 @@ def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures):
6667
assert parsed.materialized_path == str(path)
6768
assert parsed.is_google_doc is False
6869
assert parsed.export_name == item['title']
70+
assert parsed.alt_export_name == 'PART_1420130849837.pdf'
6971

7072
def test_file_metadata_docs(self, basepath, root_provider_fixtures):
7173
item = root_provider_fixtures['docs_file_metadata']
@@ -80,6 +82,7 @@ def test_file_metadata_docs(self, basepath, root_provider_fixtures):
8082
}
8183
assert parsed.is_google_doc is True
8284
assert parsed.export_name == item['title'] + '.docx'
85+
assert parsed.alt_export_name == 'version-test.pdf'
8386

8487
def test_folder_metadata(self, root_provider_fixtures):
8588
item = root_provider_fixtures['folder_metadata']

tests/providers/googledrive/test_provider.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,7 @@ class TestDownload:
588588
ds.DRIVE_IGNORE_VERSION)
589589

590590
GDOC_EXPORT_MIME_TYPE = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
591+
GDOC_ALT_EXPORT_MIME_TYPE = 'application/pdf'
591592

592593
@pytest.mark.asyncio
593594
@pytest.mark.aiohttpretty
@@ -619,6 +620,36 @@ async def test_download_editable_gdoc_no_revision(self, provider, sharing_fixtur
619620
assert aiohttpretty.has_call(method='GET', uri=revisions_url)
620621
assert aiohttpretty.has_call(method='GET', uri=download_file_url)
621622

623+
@pytest.mark.asyncio
624+
@pytest.mark.aiohttpretty
625+
async def test_download_editable_gdoc_as_mfr(self, provider, sharing_fixtures):
626+
metadata_body = sharing_fixtures['editable_gdoc']['metadata']
627+
path = GoogleDrivePath(
628+
'/sharing/editable_gdoc',
629+
_ids=['1', '2', metadata_body['id']]
630+
)
631+
632+
metadata_query = provider._build_query(path.identifier)
633+
metadata_url = provider.build_url('files', path.identifier)
634+
aiohttpretty.register_json_uri('GET', metadata_url, body=metadata_body)
635+
636+
revisions_body = sharing_fixtures['editable_gdoc']['revisions']
637+
revisions_url = provider.build_url('files', metadata_body['id'], 'revisions')
638+
aiohttpretty.register_json_uri('GET', revisions_url, body=revisions_body)
639+
640+
file_content = b'we love you conrad'
641+
download_file_url = metadata_body['exportLinks'][self.GDOC_ALT_EXPORT_MIME_TYPE]
642+
aiohttpretty.register_uri('GET', download_file_url, body=file_content, auto_length=True)
643+
644+
result = await provider.download(path, mfr='true')
645+
assert result.name == 'editable_gdoc.pdf'
646+
647+
content = await result.read()
648+
assert content == file_content
649+
assert aiohttpretty.has_call(method='GET', uri=metadata_url)
650+
assert aiohttpretty.has_call(method='GET', uri=revisions_url)
651+
assert aiohttpretty.has_call(method='GET', uri=download_file_url)
652+
622653
@pytest.mark.asyncio
623654
@pytest.mark.aiohttpretty
624655
async def test_download_editable_gdoc_good_revision(self, provider, sharing_fixtures):
@@ -1577,6 +1608,19 @@ async def test_intra_copy_file(self, provider, root_provider_fixtures):
15771608

15781609
class TestOperationsOrMisc:
15791610

1611+
def test_misc_utils(self):
1612+
metadata = {
1613+
'mimeType': 'application/vnd.google-apps.drawing',
1614+
'exportLinks': {
1615+
'image/jpeg': 'badurl.osf.899'
1616+
}
1617+
}
1618+
ext = drive_utils.get_alt_download_extension(metadata)
1619+
link = drive_utils.get_alt_export_link(metadata)
1620+
1621+
assert ext == '.jpg'
1622+
assert link == 'badurl.osf.899'
1623+
15801624
@pytest.mark.asyncio
15811625
@pytest.mark.aiohttpretty
15821626
async def test_can_duplicate_names(self, provider):

waterbutler/providers/googledrive/metadata.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ def export_name(self):
136136
title += ext
137137
return title
138138

139+
@property
140+
def alt_export_name(self):
141+
title = self._file_title
142+
if self.is_google_doc:
143+
ext = utils.get_alt_download_extension(self.raw)
144+
title += ext
145+
return title
146+
139147
@property
140148
def _file_title(self):
141149
return self.raw['title']

waterbutler/providers/googledrive/provider.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,18 @@ async def download(self, # type: ignore
234234

235235
metadata = await self.metadata(path, revision=revision)
236236

237+
if kwargs.get('mfr', None) and kwargs['mfr'].lower() == 'true':
238+
download_url = drive_utils.get_alt_export_link(metadata.raw) # type: ignore
239+
export_name = metadata.alt_export_name
240+
else:
241+
242+
# TODO figure out metadata.raw.get('downloadUrl')
243+
download_url = metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw) # type: ignore
244+
export_name = metadata.export_name # type: ignore
245+
237246
download_resp = await self.make_request(
238247
'GET',
239-
metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), # type: ignore
248+
download_url,
240249
range=range,
241250
expects=(200, 206),
242251
throws=exceptions.DownloadError,
@@ -251,7 +260,7 @@ async def download(self, # type: ignore
251260
if download_resp.headers.get('Content-Type'):
252261
# TODO: Add these properties to base class officially, instead of as one-off
253262
stream.content_type = download_resp.headers['Content-Type'] # type: ignore
254-
stream.name = metadata.export_name # type: ignore
263+
stream.name = export_name # type: ignore
255264
return stream
256265

257266
async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \

waterbutler/providers/googledrive/utils.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
'ext': '.gdoc',
55
'download_ext': '.docx',
66
'type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
7+
'alt_download_ext': '.pdf',
8+
'alt_type': 'application/pdf',
79
},
810
{
911
'mime_type': 'application/vnd.google-apps.drawing',
@@ -37,28 +39,42 @@ def is_docs_file(metadata):
3739

3840

3941
def get_mimetype_from_ext(ext):
40-
for format in DOCS_FORMATS:
41-
if format['ext'] == ext:
42-
return format['mime_type']
42+
for format_type in DOCS_FORMATS:
43+
if format_type['ext'] == ext:
44+
return format_type['mime_type']
4345

4446

4547
def get_format(metadata):
46-
for format in DOCS_FORMATS:
47-
if format['mime_type'] == metadata['mimeType']:
48-
return format
48+
for format_type in DOCS_FORMATS:
49+
if format_type['mime_type'] == metadata['mimeType']:
50+
return format_type
4951
return DOCS_DEFAULT_FORMAT
5052

5153

5254
def get_extension(metadata):
53-
format = get_format(metadata)
54-
return format['ext']
55+
format_type = get_format(metadata)
56+
return format_type['ext']
5557

5658

5759
def get_download_extension(metadata):
58-
format = get_format(metadata)
59-
return format['download_ext']
60+
format_type = get_format(metadata)
61+
return format_type['download_ext']
62+
63+
64+
def get_alt_download_extension(metadata):
65+
format_type = get_format(metadata)
66+
return format_type.get('alt_download_ext', None) or format_type['download_ext']
67+
68+
69+
def get_alt_export_link(metadata):
70+
format_type = get_format(metadata)
71+
export_links = metadata['exportLinks']
72+
if format_type.get('alt_type'):
73+
return export_links.get(format_type['alt_type'])
74+
else:
75+
return export_links[format_type['type']]
6076

6177

6278
def get_export_link(metadata):
63-
format = get_format(metadata)
64-
return metadata['exportLinks'][format['type']]
79+
format_type = get_format(metadata)
80+
return metadata['exportLinks'][format_type['type']]

waterbutler/server/api/v1/provider/metadata.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ async def download_file(self):
7171
range=request_range,
7272
accept_url='direct' not in self.request.query_arguments,
7373
mode=self.get_query_argument('mode', default=None),
74+
mfr=self.get_query_argument('mfr', default=None)
7475
)
7576

7677
if isinstance(stream, str):

0 commit comments

Comments
 (0)