@@ -93,7 +93,7 @@ def _stream_from_stream(self, stream):
9393 if self .record_size :
9494 self ._jsonld ['contentSize' ] = str (size )
9595
96- def _stream_from_url (self , url ) -> Generator [ tuple [ str , bytes ], None , None ] :
96+ def _stream_from_url (self , url , chunk_size = 8192 ) :
9797 if self .fetch_remote or self .validate_url :
9898 if self .validate_url :
9999 if url .startswith ("http" ):
@@ -109,30 +109,29 @@ def _stream_from_url(self, url) -> Generator[tuple[str, bytes], None, None]:
109109 size = 0
110110 self ._jsonld ['contentUrl' ] = str (url )
111111 with urllib .request .urlopen (url ) as response :
112- chunk_size = 8192
113112 while chunk := response .read (chunk_size ):
114113 yield self .id , chunk
115114 size += len (chunk )
116115
117116 if self .record_size :
118117 self ._jsonld ['contentSize' ] = str (size )
119118
120- def _stream_from_file (self , path ):
119+ def _stream_from_file (self , path , chunk_size = 8192 ):
121120 size = 0
122121 with open (path , 'rb' ) as f :
123- for chunk in f :
122+ while chunk := f . read ( chunk_size ) :
124123 yield self .id , chunk
125124 size += len (chunk )
126125 if self .record_size :
127126 self ._jsonld ['contentSize' ] = str (size )
128127
129- def stream (self ) -> Generator [tuple [str , bytes ], None , None ]:
128+ def stream (self , chunk_size = 8192 ) -> Generator [tuple [str , bytes ], None , None ]:
130129 if isinstance (self .source , (BytesIO , StringIO )):
131130 yield from self ._stream_from_stream (self .source )
132131 elif is_url (str (self .source )):
133- yield from self ._stream_from_url (self .source )
132+ yield from self ._stream_from_url (self .source , chunk_size )
134133 elif self .source is None :
135134 # Allows to record a File entity whose @id does not exist, see #73
136135 warnings .warn (f"No source for { self .id } " )
137136 else :
138- yield from self ._stream_from_file (self .source )
137+ yield from self ._stream_from_file (self .source , chunk_size )
0 commit comments