Skip to content

Commit 7c1c20e

Browse files
author
Ye Shaokai
committed
Merge branch 'shaokai/dev' of github.com:yeshaokai/LLaVA-NeXT into shaokai/dev
2 parents 223b0e8 + 5084697 commit 7c1c20e

File tree

1 file changed

+29
-12
lines changed

1 file changed

+29
-12
lines changed

llava/action/generate_comparison_dpo.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def select_train_subset(self):
7070
def init_data(self):
7171
ret = {}
7272
csv_reader = csv.reader(open(self.annotation_file))
73+
print ('loading data from ', self.annotation_file)
7374
_ = next(csv_reader) # skip the header
7475

7576
indices = self.select_train_subset()
@@ -141,6 +142,11 @@ def predict_images(self, images, parsed_item):
141142

142143
system_prompt = time_instruction + task_related_prompt
143144

145+
suffix = " Note that you need to use first person perspective. Make sure you do not mention you are watching a video or an image."
146+
147+
system_prompt += suffix
148+
149+
144150
format_prompt = """
145151
**Return only a JSON object** with the following two properties:
146152
@@ -150,9 +156,7 @@ def predict_images(self, images, parsed_item):
150156

151157
if 'o1' in self.gpt_model:
152158
system_prompt += format_prompt
153-
154-
print (system_prompt)
155-
159+
156160
if 'o1-mini' == self.gpt_model:
157161
system_role = "user"
158162
temperature = 1
@@ -270,13 +274,26 @@ def create_comparison_data(positive_filename, negative_filename, out_filename):
270274
video_root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
271275
anno_root = '/data/shaokai/epic-kitchens-100-annotations/'
272276
clip_length = 8
277+
gpt_model = 'gpt-4o'
273278

274-
# cap = CaptionInference(video_root,
275-
# os.path.join(anno_root, 'EPIC_100_train.csv'),
276-
# clip_length,
277-
# debug = False,
278-
# fraction = 0.01)
279-
# cap.multi_process_run(n_samples = -1, filename = f'gpt4o_inference_{clip_length}frame_1percent.json')
280-
281-
282-
create_comparison_data('gpt4o_inference_8frame_1percent.json', 'gpt4o_inference_1frame_1percent.json', 'comparison_data_1percent.jsonl')
279+
cap = CaptionInference(
280+
gpt_model,
281+
video_root,
282+
os.path.join(anno_root, 'EPIC_100_train.csv'),
283+
clip_length,
284+
debug = False,
285+
fraction = 0.1)
286+
cap.multi_process_run(n_samples = -1, filename = f'gpt4o_inference_{clip_length}frame_10percent.json')
287+
288+
clip_length = 1
289+
cap = CaptionInference(
290+
gpt_model,
291+
video_root,
292+
os.path.join(anno_root, 'EPIC_100_train.csv'),
293+
clip_length,
294+
debug = False,
295+
fraction = 0.1)
296+
cap.multi_process_run(n_samples = -1, filename = f'gpt4o_inference_{clip_length}frame_10percent.json')
297+
298+
299+
create_comparison_data('gpt4o_inference_8frame_10percent.json', 'gpt4o_inference_1frame_10percent.json', 'comparison_data_10percent.jsonl')

0 commit comments

Comments
 (0)