1- from PIL import Image
2- from bs4 import BeautifulSoup
3- from urllib .parse import urljoin , urlparse
4- from pydantic import Field , ConfigDict
5- from typing import List , Optional
61from io import BytesIO
7-
8- from loguru import logger
2+ from typing import List , Optional
3+ from urllib . parse import urljoin
94
105import requests
6+ from bs4 import BeautifulSoup
7+ from loguru import logger
8+ from PIL import Image
9+ from pydantic import ConfigDict , Field
1110
1211from guidellm .config import settings
1312from guidellm .core .serializable import Serializable
@@ -19,14 +18,14 @@ class ImageDescriptor(Serializable):
1918 A class to represent image data in serializable format.
2019 """
2120 model_config = ConfigDict (arbitrary_types_allowed = True )
22-
21+
2322 url : Optional [str ] = Field (description = "url address for image." )
2423 image : Image .Image = Field (description = "PIL image" , exclude = True )
2524 filename : Optional [int ] = Field (
2625 default = None ,
2726 description = "Image filename." ,
2827 )
29-
28+
3029
3130def load_images (data : str ) -> List [ImageDescriptor ]:
3231 """
@@ -45,25 +44,25 @@ def load_images(data: str) -> List[ImageDescriptor]:
4544 response = requests .get (data , timeout = settings .request_timeout )
4645 response .raise_for_status ()
4746
48- soup = BeautifulSoup (response .text , ' html.parser' )
47+ soup = BeautifulSoup (response .text , " html.parser" )
4948 for img_tag in soup .find_all ("img" ):
5049 img_url = img_tag .get ("src" )
5150
5251 if img_url :
5352 # Handle relative URLs
5453 img_url = urljoin (data , img_url )
55-
54+
5655 # Download the image
5756 logger .debug ("Loading image: {}" , img_url )
5857 img_response = requests .get (img_url )
5958 img_response .raise_for_status ()
60-
59+
6160 # Load image into Pillow
6261 images .append (
6362 ImageDescriptor (
64- url = img_url ,
63+ url = img_url ,
6564 image = Image .open (BytesIO (img_response .content )),
6665 )
6766 )
6867
69- return images
68+ return images
0 commit comments