2
2
3
3
import boto3 # type:ignore[import-untyped]
4
4
import botocore # type:ignore[import-untyped]
5
- from bson import ObjectId
6
- from PIL import Image
7
-
8
- from .document import ImageDocument , StoredDocument
9
5
10
6
11
7
class ObjectStorage :
12
- """A class used store image documents ."""
8
+ """A class used to store binary data ."""
13
9
14
10
root_location : str
15
- """The root location to use in the object store."""
11
+ """The default root location to use in the object store."""
12
+
13
+ url_prefixes : list [str ] | None
14
+ """The url prefixes used by the object store, for reading data from a url."""
16
15
17
- def save_image (self , image : ImageDocument ) -> StoredDocument :
18
- """Save an image document to the object store."""
16
+ def save_data (self , data : io . BytesIO , object_name : str ) -> None :
17
+ """Save data to the object store."""
19
18
raise NotImplementedError
20
19
21
- def load_image (self , document : StoredDocument ) -> ImageDocument :
22
- """Load an image document from the object store."""
20
+ def read_data (self , object_name : str ) -> io . BytesIO :
21
+ """Read data from the object store."""
23
22
raise NotImplementedError
24
23
25
- def delete_image (self , document : StoredDocument ) -> None :
26
- """Remove an image document from the object store ."""
24
+ def load_url (self , url : str ) -> io . BytesIO :
25
+ """Load data from a url ."""
27
26
raise NotImplementedError
28
27
29
- def close (self ) -> None :
30
- """Close the object store."""
28
+ def delete_data (self , object_name : str ) -> None :
29
+ """Delete data from the object store."""
31
30
raise NotImplementedError
32
31
32
+ def close (self ):
33
+ """Close the object store."""
34
+ pass
35
+
33
36
34
37
class S3Storage (ObjectStorage ):
35
38
"""An object store using an S3 bucket."""
36
39
40
+ url_prefixes = ["s3://" ]
41
+
37
42
def __init__ (
38
43
self ,
39
44
bucket_name : str ,
@@ -50,35 +55,26 @@ def __init__(
50
55
self .client = client or boto3 .client ("s3" , region_name = region_name )
51
56
self .root_location = bucket_name
52
57
53
- def save_image (self , image : ImageDocument ) -> StoredDocument :
54
- object_name = str (ObjectId ())
55
- fd = io .BytesIO ()
56
- image .image .save (fd , "png" )
57
- fd .seek (0 )
58
- self .client .upload_fileobj (fd , self .root_location , object_name )
59
- return StoredDocument (
60
- root_location = self .root_location ,
61
- object_name = object_name ,
62
- page_number = image .page_number ,
63
- source_url = image .source_url ,
64
- name = image .name ,
65
- metadata = image .metadata ,
66
- )
67
-
68
- def load_image (self , document : StoredDocument ) -> ImageDocument :
58
+ def save_data (self , data : io .BytesIO , object_name : str ) -> None :
59
+ """Save data to the object store."""
60
+ self .client .upload_fileobj (data , self .root_location , object_name )
61
+
62
+ def read_data (self , object_name : str ) -> io .BytesIO :
63
+ """Read data using the object store."""
64
+ buffer = io .BytesIO ()
65
+ self .client .download_fileobj (self .root_location , object_name , buffer )
66
+ return buffer
67
+
68
+ def load_url (self , url : str ) -> io .BytesIO :
69
+ """Load data from a url."""
70
+ bucket , _ , object_name = url .replace ("s3://" , "" ).partition ("/" )
69
71
buffer = io .BytesIO ()
70
- self .client .download_fileobj (document .root_location , document .object_name , buffer )
71
- image = Image .open (buffer )
72
- return ImageDocument (
73
- image = image ,
74
- source_url = document .source_url ,
75
- page_number = document .page_number ,
76
- metadata = document .metadata ,
77
- name = document .name ,
78
- )
79
-
80
- def delete_image (self , document : StoredDocument ) -> None :
81
- self .client .delete_object (Bucket = document .root_location , Key = document .object_name )
72
+ self .client .download_fileobj (bucket , object_name , buffer )
73
+ return buffer
74
+
75
+ def delete_data (self , object_name : str ) -> None :
76
+ """Delete data from the object store."""
77
+ self .client .delete_object (Bucket = self .root_location , Key = object_name )
82
78
83
79
def close (self ) -> None :
84
80
self .client .close ()
@@ -87,26 +83,25 @@ def close(self) -> None:
87
83
class MemoryStorage (ObjectStorage ):
88
84
"""An in-memory object store"""
89
85
86
+ url_prefixes = ["file://" ]
87
+
90
88
def __init__ (self ) -> None :
91
89
self .root_location = "foo"
92
- self .storage : dict [str , ImageDocument ] = dict ()
90
+ self .storage : dict [str , io . BytesIO ] = dict ()
93
91
94
- def save_image (self , image : ImageDocument ) -> StoredDocument :
95
- object_name = str (ObjectId ())
96
- self .storage [object_name ] = image
97
- return StoredDocument (
98
- root_location = self .root_location ,
99
- name = image .name ,
100
- object_name = object_name ,
101
- source_url = image .source_url ,
102
- page_number = image .page_number ,
103
- )
92
+ def save_data (self , data : io .BytesIO , object_name : str ) -> None :
93
+ """Save data to the object store."""
94
+ self .storage [object_name ] = data
104
95
105
- def load_image (self , document : StoredDocument ) -> ImageDocument :
106
- return self .storage [document .object_name ]
96
+ def read_data (self , object_name : str ) -> io .BytesIO :
97
+ """Read data using the object store."""
98
+ return self .storage [object_name ]
107
99
108
- def delete_image (self , document : StoredDocument ) -> None :
109
- self .storage .pop (document .object_name , None )
100
+ def load_url (self , url : str ) -> io .BytesIO :
101
+ """Load data from a url."""
102
+ with open (url .replace ("file://" , "" ), "rb" ) as fid :
103
+ return io .BytesIO (fid .read ())
110
104
111
- def close (self ):
112
- pass
105
+ def delete_data (self , object_name : str ) -> None :
106
+ """Delete data from the object store."""
107
+ self .storage .pop (object_name , None )
0 commit comments