1- """Generate action descriptions."""
1+ """Generate natural language descriptions from actions ."""
22
33from pprint import pformat
4-
54from loguru import logger
6- import cv2
5+ from PIL import Image , ImageDraw
76import numpy as np
87
98from openadapt .db import crud
9+ from openadapt .plotting import get_font
10+ from openadapt .utils import get_scaling_factor
11+
12+ scaling_factor = get_scaling_factor ()
1013
1114
1215def embed_description (
13- image : np . ndarray ,
16+ image : Image . Image ,
1417 description : str ,
15- x : int = None ,
16- y : int = None ,
17- ) -> np . ndarray :
18+ x : int = 0 ,
19+ y : int = 0 ,
20+ ) -> Image . Image :
1821 """Embed a description into an image at the specified location.
1922
2023 Args:
21- image (np.ndarray ): The image to annotate.
24+ image (Image.Image ): The image to annotate.
2225 description (str): The text to embed.
23- x (int, optional): The x-coordinate. Defaults to None (centered) .
24- y (int, optional): The y-coordinate. Defaults to None (centered) .
26+ x (int, optional): The x-coordinate. Defaults to 0 .
27+ y (int, optional): The y-coordinate. Defaults to 0 .
2528
2629 Returns:
27- np.ndarray : The annotated image.
30+ Image.Image : The annotated image.
2831 """
29- font = cv2 .FONT_HERSHEY_SIMPLEX
30- font_scale = 1
31- font_color = (255 , 255 , 255 ) # White
32- line_type = 1
32+ draw = ImageDraw .Draw (image )
33+ font_size = 30 # Set font size (2x the default size)
34+ font = get_font ("Arial.ttf" , font_size )
3335
3436 # Split description into multiple lines
35- max_width = 60 # Maximum characters per line
37+ max_width = image . width
3638 words = description .split ()
3739 lines = []
3840 current_line = []
@@ -45,36 +47,28 @@ def embed_description(
4547 if current_line :
4648 lines .append (" " .join (current_line ))
4749
48- # Default to center if coordinates are not provided
49- if x is None or y is None :
50- x = image .shape [1 ] // 2
51- y = image .shape [0 ] // 2
50+ # Adjust coordinates for scaling factor
51+ x = int (x * scaling_factor )
52+ y = int (y * scaling_factor )
5253
53- # Draw semi-transparent background and text
54+ # Calculate text dimensions and draw semi-transparent background and text
5455 for i , line in enumerate (lines ):
55- text_size , _ = cv2 .getTextSize (line , font , font_scale , line_type )
56- text_x = max (0 , min (x - text_size [0 ] // 2 , image .shape [1 ] - text_size [0 ]))
57- text_y = y + i * 20
56+ bbox = draw .textbbox ((0 , 0 ), line , font = font )
57+ text_width , text_height = bbox [2 ] - bbox [0 ], bbox [3 ] - bbox [1 ]
58+ text_x = max (0 , min (x - text_width // 2 , image .width - text_width ))
59+ text_y = y + i * text_height
5860
5961 # Draw background
60- cv2 .rectangle (
61- image ,
62- (text_x - 15 , text_y - 25 ),
63- (text_x + text_size [0 ] + 15 , text_y + 15 ),
64- (0 , 0 , 0 ),
65- - 1 ,
62+ background_box = (
63+ text_x - 15 ,
64+ text_y - 5 ,
65+ text_x + text_width + 15 ,
66+ text_y + text_height + 5 ,
6667 )
68+ draw .rectangle (background_box , fill = (0 , 0 , 0 , 128 ))
6769
6870 # Draw text
69- cv2 .putText (
70- image ,
71- line ,
72- (text_x , text_y ),
73- font ,
74- font_scale ,
75- font_color ,
76- line_type ,
77- )
71+ draw .text ((text_x , text_y ), line , fill = (255 , 255 , 255 ), font = font )
7872
7973 return image
8074
@@ -88,25 +82,22 @@ def main() -> None:
8882 for action in action_events :
8983 description , image = action .prompt_for_description (return_image = True )
9084
91- # Convert image to numpy array for OpenCV compatibility
92- image = np .array (image )
85+ # Convert image to PIL.Image for compatibility
86+ image = Image . fromarray ( np .array (image ) )
9387
9488 if action .mouse_x is not None and action .mouse_y is not None :
9589 # Use the mouse coordinates for mouse events
9690 annotated_image = embed_description (
9791 image ,
9892 description ,
99- x = int (action .mouse_x ) * 2 ,
100- y = int (action .mouse_y ) * 2 ,
10193 )
10294 else :
10395 # Center the text for other events
10496 annotated_image = embed_description (image , description )
10597
10698 logger .info (f"{ action = } " )
10799 logger .info (f"{ description = } " )
108- cv2 .imshow ("Annotated Image" , annotated_image )
109- cv2 .waitKey (0 )
100+ annotated_image .show () # Opens the annotated image using the default viewer
110101 descriptions .append (description )
111102
112103 logger .info (f"descriptions=\n { pformat (descriptions )} " )
0 commit comments