Skip to content

Commit 13e8eb1

Browse files
authored
Merge pull request #790 from CitrineInformatics/feature/raw_download
adding a read method to read from S3
2 parents 3251328 + 4274a80 commit 13e8eb1

File tree

3 files changed

+89
-1
lines changed

3 files changed

+89
-1
lines changed

src/citrine/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.44.0'
1+
__version__ = '1.44.1'

src/citrine/resources/file_link.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,37 @@ def download(self, *, file_link: Union[str, UUID, FileLink], local_path: Union[s
687687
download_response = requests.get(final_url)
688688
write_file_locally(download_response.content, final_path)
689689

690+
def read(self, *, file_link: Union[str, UUID, FileLink]):
691+
"""
692+
Read the file associated with a given FileLink.
693+
694+
Parameters
695+
----------
696+
file_link: FileLink, str, UUID
697+
Resource referencing the file.
698+
699+
Returns
700+
-------
701+
I/O stream
702+
The contents of the file.
703+
704+
"""
705+
file_link = self._resolve_file_link(file_link)
706+
707+
if self._is_external_url(file_link.url): # Pull it from where ever it lives
708+
final_url = file_link.url
709+
elif self._validate_local_url(file_link.url):
710+
# The "/content-link" route returns a pre-signed url to download the file.
711+
content_link = self._get_path_from_file_link(file_link, action='content-link')
712+
content_link_response = self.session.get_resource(content_link)
713+
pre_signed_url = content_link_response['pre_signed_read_link']
714+
final_url = rewrite_s3_links_locally(pre_signed_url, self.session.s3_endpoint_url)
715+
else: # Unrecognized
716+
raise ValueError(f"URL was malformed for a local file resource ({file_link.url}).")
717+
718+
download_response = requests.get(final_url)
719+
return download_response.content
720+
690721
def process(self, *, file_link: Union[FileLink, str, UUID],
691722
processing_type: FileProcessingType,
692723
wait_for_response: bool = True,

tests/resources/test_file_link.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,63 @@ def _checked_write(path, content):
410410
with pytest.raises(ValueError, match="malformed"):
411411
collection.download(file_link=bad_file, local_path=target_dir)
412412

413+
def test_read(collection: FileCollection, session):
414+
"""
415+
Test that reading a file works as expected.
416+
417+
"""
418+
# Given
419+
filename = 'diagram.pdf'
420+
url = f"projects/{collection.project_id}/datasets/{collection.dataset_id}/files/{uuid4()}/versions/{uuid4()}"
421+
file = FileLink.build(FileLinkDataFactory(url=url, filename=filename))
422+
pre_signed_url = "http://files.citrine.io/secret-codes/jiifema987pjfsda" # arbitrary
423+
session.set_response({
424+
'pre_signed_read_link': pre_signed_url,
425+
})
426+
427+
with requests_mock.mock() as mock_get:
428+
mock_get.get(pre_signed_url, text="lorem ipsum")
429+
# When
430+
io = collection.read(file_link=file)
431+
assert io.decode('UTF-8') == 'lorem ipsum'
432+
# When
433+
assert mock_get.call_count == 1
434+
expected_call = FakeCall(
435+
method='GET',
436+
path=url + '/content-link'
437+
)
438+
assert expected_call == session.last_call
439+
440+
441+
442+
bad_url = f"bin/uuid3/versions/uuid4"
443+
bad_file = FileLink.build(FileLinkDataFactory(url=bad_url, filename=filename))
444+
with pytest.raises(ValueError, match="malformed"):
445+
collection.read(file_link=bad_file)
446+
447+
448+
def test_external_file_read(collection: FileCollection, session):
449+
"""
450+
Test that reading a file works as expected for external files.
451+
452+
"""
453+
# Given
454+
filename = 'spreadsheet.xlsx'
455+
url = "http://customer.com/data-lake/files/123/versions/456"
456+
file = FileLink.build(FileLinkDataFactory(url=url, filename=filename))
457+
458+
459+
with requests_mock.mock() as mock_get:
460+
mock_get.get(url, text='010111011')
461+
462+
# When
463+
io = collection.read(file_link=file)
464+
assert io.decode('UTF-8') == '010111011'
465+
466+
# When
467+
assert mock_get.call_count == 1
468+
469+
# assert local_path.read_text() == '010111011'
413470

414471
def test_external_file_download(collection: FileCollection, session, tmpdir):
415472
"""

0 commit comments

Comments
 (0)