1010from .registry import register_evaluator , BaseEvaluator
1111
1212from .whowhat_metrics import VideoSimilarity
13+ # from diffusers.utils import export_to_video
1314
1415
16+ # let's agreed default parameter will be:
17+ # width: 704, height: 480, guidance_scale: 3, guidance_rescale: 0.3
1518default_data = [
19+ # small resolution
1620 {
17- "prompt" : "cowboy running in slow motion in a field " ,
21+ "prompt" : "octopus figure skating, cartoon " ,
1822 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
19- "width" : 480 ,
20- "height" : 704 ,
23+ "width" : 256 ,
24+ "height" : 128 ,
2125 "guidance_scale" : 3 ,
2226 "guidance_rescale" : 0.3 ,
2327 },
28+ # small resolution
2429 {
25- "prompt" : "House in front of a lake and the wind blowing through the trees " ,
30+ "prompt" : "slow motion, hydrogen bond energy, atom, 4k, cinematic -gs 24 -motion 2 -ar 16:9 -fps 24 " ,
2631 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
27- "width" : 1216 ,
28- "height" : 704 ,
32+ "width" : 256 ,
33+ "height" : 256 ,
2934 "guidance_scale" : 3 ,
3035 "guidance_rescale" : 0.3 ,
3136 },
37+ # middle/common resolution
3238 {
33- "prompt" : "slow motion, hydrogen bond energy, atom, 4k, cinematic -gs 24 - motion 2 -ar 16:9 -fps 24 " ,
39+ "prompt" : "cowboy running in slow motion in a field " ,
3440 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
35- "width" : 256 ,
36- "height" : 256 ,
41+ "width" : 704 ,
42+ "height" : 480 ,
3743 "guidance_scale" : 3 ,
3844 "guidance_rescale" : 0.3 ,
3945 },
46+ # big resolution
4047 {
41- "prompt" : "fight naruto vs saske " ,
48+ "prompt" : "House in front of a lake and the wind blowing through the trees " ,
4249 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
43- "width" : 480 ,
50+ "width" : 1216 ,
4451 "height" : 704 ,
4552 "guidance_scale" : 3 ,
53+ "guidance_rescale" : 0.3 ,
54+ },
55+ # guidance_rescale 0
56+ {
57+ "prompt" : "fight naruto vs saske " ,
58+ "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
59+ "width" : 704 ,
60+ "height" : 480 ,
61+ "guidance_scale" : 3 ,
4662 "guidance_rescale" : 0 ,
4763 },
64+ # guidance_scale 1
4865 {
4966 "prompt" : "reporter in front of the TV cameras talking about the joker " ,
5067 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
51- "width" : 480 ,
52- "height" : 704 ,
68+ "width" : 704 ,
69+ "height" : 480 ,
5370 "guidance_scale" : 1 ,
5471 "guidance_rescale" : 0.3 ,
5572 },
73+ # guidance_scale 1 guidance_rescale 0
5674 {
5775 "prompt" : "Realistic night silhouette of a white Lwxux LX III 2008 with headlights on driving on in the fog in the dark " ,
5876 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
59- "width" : 480 ,
60- "height" : 704 ,
77+ "width" : 704 ,
78+ "height" : 480 ,
6179 "guidance_scale" : 1 ,
6280 "guidance_rescale" : 0 ,
6381 },
82+ # guidance_scale 1 guidance_rescale 0
6483 {
6584 "prompt" : "indian womens wahsing clothes at river side " ,
6685 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
67- "width" : 480 ,
68- "height" : 704 ,
69- "guidance_scale" : 3 ,
70- "guidance_rescale" : 0.3 ,
71- },
72- {
73- "prompt" : "octopus figure skating, cartoon " ,
74- "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
75- "width" : 480 ,
76- "height" : 704 ,
86+ "width" : 256 ,
87+ "height" : 128 ,
7788 "guidance_scale" : 3 ,
7889 "guidance_rescale" : 0.3 ,
7990 },
91+ # big prompt
8092 {
8193 "prompt" : "Levitating woman uses magic and fairy dusty spews forth from her fingers. cinematic shot photos taken by ARRI, photos taken "
8294 + "by sony, photos taken by canon, photos taken by nikon, photos taken by sony, photos taken by hasselblad " ,
8395 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
84- "width" : 480 ,
85- "height" : 704 ,
96+ "width" : 704 ,
97+ "height" : 480 ,
8698 "guidance_scale" : 3 ,
8799 "guidance_rescale" : 0.3 ,
88100 },
101+ # big prompt, small resolution
89102 {
90103 "prompt" : "A mythical river adventure in the Yellow River basin during ancient times, where majestic dragons soar through the turbulent waters, "
91104 + "casting a vibrant glow on the submerged landscapes, blending a sense of awe and fantasy, Sculpture, intricate clay model with luminescent "
92105 + "elements, --ar 16:9 --v 5 " ,
93106 "negative_prompt" : "worst quality, inconsistent motion, blurry, jittery, distorted" ,
94- "width" : 480 ,
95- "height" : 704 ,
107+ "width" : 256 ,
108+ "height" : 128 ,
96109 "guidance_scale" : 3 ,
97110 "guidance_rescale" : 0.3 ,
98111 },
@@ -104,8 +117,8 @@ class Text2VideoEvaluator(BaseEvaluator):
104117 DEF_NUM_FRAMES = 25
105118 DEF_NUM_INF_STEPS = 25
106119 DEF_FRAME_RATE = 25
107- DEF_WIDTH = 480
108- DEF_HEIGHT = 704
120+ DEF_WIDTH = 704
121+ DEF_HEIGHT = 480
109122 DEF_GUIDANCE_SCALE = 3
110123 DEF_GUIDANCE_RESCALE = 0.3
111124
@@ -119,7 +132,7 @@ def __init__(
119132 num_frames = 25 ,
120133 crop_prompts = True ,
121134 num_samples = None ,
122- gen_image_fn = None ,
135+ gen_video_fn = None ,
123136 seed = 42 ,
124137 is_genai = False ,
125138 ) -> None :
@@ -136,20 +149,20 @@ def __init__(
136149 self .similarity = VideoSimilarity ()
137150 self .last_cmp = None
138151 self .gt_dir = os .path .dirname (gt_data )
139- self .generation_fn = gen_image_fn
152+ self .generation_fn = gen_video_fn
140153 self .is_genai = is_genai
141154 self .num_frames = num_frames or self .DEF_NUM_FRAMES
142155 self .frame_rate = self .DEF_FRAME_RATE
143156
144157 if base_model :
145- self .gt_data = self ._generate_data (base_model , gen_image_fn , os .path .join (self .gt_dir , "reference" ))
158+ self .gt_data = self ._generate_data (base_model , gen_video_fn , os .path .join (self .gt_dir , "reference" ))
146159 else :
147160 self .gt_data = pd .read_csv (gt_data , keep_default_na = False )
148161
149162 def get_generation_fn (self ):
150163 return self .generation_fn
151164
152- def score (self , model_or_data , gen_image_fn = None , output_dir = None , ** kwargs ):
165+ def score (self , model_or_data , gen_video_fn = None , output_dir = None , ** kwargs ):
153166 if output_dir is None :
154167 video_folder = os .path .join (self .gt_dir , "target" )
155168 else :
@@ -158,7 +171,7 @@ def score(self, model_or_data, gen_image_fn=None, output_dir=None, **kwargs):
158171 if isinstance (model_or_data , str ) and os .path .exists (model_or_data ):
159172 predictions = pd .read_csv (model_or_data , keep_default_na = False )
160173 else :
161- predictions = self ._generate_data (model_or_data , gen_image_fn , video_folder )
174+ predictions = self ._generate_data (model_or_data , gen_video_fn , video_folder )
162175 self .predictions = predictions
163176
164177 all_metrics_per_prompt = {}
@@ -185,8 +198,8 @@ def worst_examples(self, top_k: int = 5, metric="similarity"):
185198
186199 return res
187200
188- def _generate_data (self , model , gen_image_fn = None , videos_dir = "reference" ):
189- def default_gen_image_fn (
201+ def _generate_data (self , model , gen_video_fn = None , videos_dir = "reference" ):
202+ def default_gen_video_fn (
190203 model ,
191204 prompt ,
192205 negative_prompt ,
@@ -214,8 +227,8 @@ def default_gen_image_fn(
214227 )
215228 return output .frames [0 ]
216229
217- # generation_fn = gen_image_fn or default_gen_image_fn
218- generation_fn = default_gen_image_fn
230+ # generation_fn = gen_video_fn or default_gen_video_fn
231+ generation_fn = default_gen_video_fn
219232
220233 if self .test_data :
221234 if isinstance (self .test_data , str ):
@@ -262,6 +275,9 @@ def default_gen_image_fn(
262275 frame_path = os .path .join (video_path , f"{ number } .png" )
263276 frame .save (frame_path )
264277 videos .append (video_path )
278+ # video_path = os.path.join(videos_dir, f"video_{i}.mp4")
279+ # export_to_video(frames, video_path, self.frame_rate)
280+ # videos.append(video_path)
265281
266282 res_data ["videos" ] = videos
267283 df = pd .DataFrame (res_data )
0 commit comments