2
2
3
3
import boto3 # type:ignore[import-untyped]
4
4
import botocore # type:ignore[import-untyped]
5
- from bson import ObjectId
6
- from PIL import Image
7
-
8
- from .document import ImageDocument , StoredDocument
9
5
10
6
11
7
class ObjectStorage :
12
- """A class used store image documents ."""
8
+ """A class used to store binary data ."""
13
9
14
10
root_location : str
15
- """The root location to use in the object store."""
11
+ """The default root location to use in the object store."""
16
12
17
13
url_prefixes : list [str ] | None
18
14
"""The url prefixes used by the object store, for reading data from a url."""
19
15
20
- def save_image (self , image : ImageDocument ) -> StoredDocument :
21
- """Save an image document to the object store."""
16
+ def save_data (self , data : io . BytesIO , object_name : str ) -> None :
17
+ """Save data to the object store."""
22
18
raise NotImplementedError
23
19
24
- def load_image (self , document : StoredDocument ) -> ImageDocument :
25
- """Load an image document from the object store."""
20
+ def read_data (self , object_name : str ) -> io . BytesIO :
21
+ """Read data from the object store."""
26
22
raise NotImplementedError
27
23
28
- def read_from_url (self , url : str ) -> io .BytesIO :
29
- """Read data from a url into a BytesIO object ."""
24
+ def load_url (self , url : str ) -> io .BytesIO :
25
+ """Load data from a url."""
30
26
raise NotImplementedError
31
27
32
- def delete_image (self , document : StoredDocument ) -> None :
33
- """Remove an image document from the object store."""
28
+ def delete_data (self , object_name : str ) -> None :
29
+ """Delete data from the object store."""
34
30
raise NotImplementedError
35
31
36
- def close (self ) -> None :
32
+ def close (self ):
37
33
"""Close the object store."""
38
- raise NotImplementedError
34
+ pass
39
35
40
36
41
37
class S3Storage (ObjectStorage ):
@@ -59,41 +55,26 @@ def __init__(
59
55
self .client = client or boto3 .client ("s3" , region_name = region_name )
60
56
self .root_location = bucket_name
61
57
62
- def save_image (self , image : ImageDocument ) -> StoredDocument :
63
- object_name = f"{ ObjectId ()} .png"
64
- fd = io .BytesIO ()
65
- image .image .save (fd , "png" )
66
- fd .seek (0 )
67
- self .client .upload_fileobj (fd , self .root_location , object_name )
68
- return StoredDocument (
69
- root_location = self .root_location ,
70
- object_name = object_name ,
71
- page_number = image .page_number ,
72
- source_url = image .source_url ,
73
- name = image .name ,
74
- metadata = image .metadata ,
75
- )
76
-
77
- def load_image (self , document : StoredDocument ) -> ImageDocument :
58
+ def save_data (self , data : io .BytesIO , object_name : str ) -> None :
59
+ """Save data to the object store."""
60
+ self .client .upload_fileobj (data , self .root_location , object_name )
61
+
62
+ def read_data (self , object_name : str ) -> io .BytesIO :
63
+ """Read data using the object store."""
78
64
buffer = io .BytesIO ()
79
- self .client .download_fileobj (document .root_location , document .object_name , buffer )
80
- image = Image .open (buffer )
81
- return ImageDocument (
82
- image = image ,
83
- source_url = document .source_url ,
84
- page_number = document .page_number ,
85
- metadata = document .metadata ,
86
- name = document .name ,
87
- )
88
-
89
- def read_from_url (self , url : str ) -> io .BytesIO :
90
- bucket , key = url .replace ("s3://" , "" ).split ("/" )
65
+ self .client .download_fileobj (self .root_location , object_name , buffer )
66
+ return buffer
67
+
68
+ def load_url (self , url : str ) -> io .BytesIO :
69
+ """Load data from a url."""
70
+ bucket , _ , object_name = url .replace ("s3://" , "" ).partition ("/" )
91
71
buffer = io .BytesIO ()
92
- self .client .download_fileobj (bucket , key , buffer )
72
+ self .client .download_fileobj (bucket , object_name , buffer )
93
73
return buffer
94
74
95
- def delete_image (self , document : StoredDocument ) -> None :
96
- self .client .delete_object (Bucket = document .root_location , Key = document .object_name )
75
+ def delete_data (self , object_name : str ) -> None :
76
+ """Delete data from the object store."""
77
+ self .client .delete_object (Bucket = self .root_location , Key = object_name )
97
78
98
79
def close (self ) -> None :
99
80
self .client .close ()
@@ -106,29 +87,21 @@ class MemoryStorage(ObjectStorage):
106
87
107
88
def __init__ (self ) -> None :
108
89
self .root_location = "foo"
109
- self .storage : dict [str , ImageDocument ] = dict ()
110
-
111
- def save_image (self , image : ImageDocument ) -> StoredDocument :
112
- object_name = str (ObjectId ())
113
- self .storage [object_name ] = image
114
- return StoredDocument (
115
- root_location = self .root_location ,
116
- name = image .name ,
117
- object_name = object_name ,
118
- source_url = image .source_url ,
119
- page_number = image .page_number ,
120
- )
121
-
122
- def load_image (self , document : StoredDocument ) -> ImageDocument :
123
- return self .storage [document .object_name ]
124
-
125
- def read_from_url (self , url : str ) -> io .BytesIO :
126
- with open (url .replace ("file://" , "" ), "rb" ) as fid :
127
- data = fid .read ()
128
- return io .BytesIO (data )
90
+ self .storage : dict [str , io .BytesIO ] = dict ()
129
91
130
- def delete_image (self , document : StoredDocument ) -> None :
131
- self .storage .pop (document .object_name , None )
92
+ def save_data (self , data : io .BytesIO , object_name : str ) -> None :
93
+ """Save data to the object store."""
94
+ self .storage [object_name ] = data
132
95
133
- def close (self ):
134
- pass
96
+ def read_data (self , object_name : str ) -> io .BytesIO :
97
+ """Read data using the object store."""
98
+ return self .storage [object_name ]
99
+
100
+ def load_url (self , url : str ) -> io .BytesIO :
101
+ """Load data from a url."""
102
+ with open (url .replace ("file://" , "" ), "rb" ) as fid :
103
+ return io .BytesIO (fid .read ())
104
+
105
+ def delete_data (self , object_name : str ) -> None :
106
+ """Delete data from the object store."""
107
+ self .storage .pop (object_name , None )
0 commit comments