@@ -146,7 +146,7 @@ def replace_model_name(old_name, tstamp):
146146 "dumbledore-v3" : "llama-3.2-vision-90b-instruct" ,
147147 "potter-v1" : "llama-3.2-vision-11b-instruct" ,
148148 "sharp-game-player-v1" : "llama-3.2-3b-instruct" ,
149- "zeus-flare-thunder-v1" : "llama-3.2-1b-instruct" ,
149+ "zeus-flare-thunder-v1" : "llama-3.2-1b-instruct" ,
150150 "qwen-vl-max-0809" : "qwen2-vl-72b" ,
151151 "gemini-1.5-pro-002-test-sp" : "gemini-1.5-pro-002" ,
152152 "gemini-1.5-flash-test-5" : "gemini-1.5-flash-002" ,
@@ -185,7 +185,7 @@ def replace_model_name(old_name, tstamp):
185185 return old_name
186186 else :
187187 return old_name + "-old"
188- if old_name == "eureka-chatbot" :
188+ if old_name == "eureka-chatbot" :
189189 if tstamp > 1721651521 :
190190 return "eureka-chatbot-v2"
191191 else :
@@ -293,12 +293,17 @@ def process_data_txt2img(
293293 if row ["models" ][0 ] is None or row ["models" ][1 ] is None :
294294 count_dict ["invalid" ] += 1
295295 continue
296- if not isinstance (row ["models" ][0 ], str ) or not isinstance (row ["models" ][1 ], str ):
296+ if not isinstance (row ["models" ][0 ], str ) or not isinstance (
297+ row ["models" ][1 ], str
298+ ):
297299 count_dict ["invalid" ] += 1
298300 continue
299301
300302 # Resolve model names
301- models_public = [remove_html (row ["models" ][0 ]), remove_html (row ["models" ][1 ])]
303+ models_public = [
304+ remove_html (row ["models" ][0 ]),
305+ remove_html (row ["models" ][1 ]),
306+ ]
302307 if "model_name" in row ["states" ][0 ]:
303308 models_hidden = [
304309 row ["states" ][0 ]["model_name" ],
@@ -356,12 +361,12 @@ def process_data_txt2img(
356361 "anony_bothbad_vote" : 0 ,
357362 "anony_leftvote" : 0 ,
358363 "anony_rightvote" : 0 ,
359- "sanitized_id" : shortuuid .uuid ()
364+ "sanitized_id" : shortuuid .uuid (),
360365 }
361366 all_ips [ip ]["count" ] += 1
362367 if flag_anony :
363368 all_ips [ip ]["anony_count" ] += 1
364- all_ips [ip ]["anony_" + row ["type" ]] += 1
369+ all_ips [ip ]["anony_" + row ["type" ]] += 1
365370
366371 if sanitize_ip :
367372 user_id = f"{ all_ips [ip ]['sanitized_id' ]} "
@@ -389,6 +394,7 @@ def process_data_txt2img(
389394 )
390395 return battles , count_dict , count_leak , all_ips
391396
397+
392398def process_data (
393399 data ,
394400 exclude_model_names ,
@@ -433,12 +439,17 @@ def process_data(
433439 if row ["models" ][0 ] is None or row ["models" ][1 ] is None :
434440 count_dict ["invalid" ] += 1
435441 continue
436- if not isinstance (row ["models" ][0 ], str ) or not isinstance (row ["models" ][1 ], str ):
442+ if not isinstance (row ["models" ][0 ], str ) or not isinstance (
443+ row ["models" ][1 ], str
444+ ):
437445 count_dict ["invalid" ] += 1
438446 continue
439447
440448 # Resolve model names
441- models_public = [remove_html (row ["models" ][0 ]), remove_html (row ["models" ][1 ])]
449+ models_public = [
450+ remove_html (row ["models" ][0 ]),
451+ remove_html (row ["models" ][1 ]),
452+ ]
442453 if "model_name" in row ["states" ][0 ]:
443454 models_hidden = [
444455 row ["states" ][0 ]["model_name" ],
@@ -484,7 +495,6 @@ def process_data(
484495 print (state ["messages" ][state ["offset" ]][1 ])
485496 raise ValueError
486497
487-
488498 # Drop conversations if the model names are leaked
489499 messages = ""
490500 for i in range (2 ):
@@ -576,12 +586,12 @@ def process_data(
576586 "anony_bothbad_vote" : 0 ,
577587 "anony_leftvote" : 0 ,
578588 "anony_rightvote" : 0 ,
579- "sanitized_id" : shortuuid .uuid ()
589+ "sanitized_id" : shortuuid .uuid (),
580590 }
581591 all_ips [ip ]["count" ] += 1
582592 if flag_anony :
583593 all_ips [ip ]["anony_count" ] += 1
584- all_ips [ip ]["anony_" + row ["type" ]] += 1
594+ all_ips [ip ]["anony_" + row ["type" ]] += 1
585595
586596 if sanitize_ip :
587597 user_id = f"{ all_ips [ip ]['sanitized_id' ]} "
@@ -607,13 +617,25 @@ def process_data(
607617 )
608618
609619 user_tokens = sum (
610- [conv ["num_tokens" ] for conv in conversation_a if conv ["role" ] == "user" ]
620+ [
621+ conv ["num_tokens" ]
622+ for conv in conversation_a
623+ if conv ["role" ] == "user"
624+ ]
611625 )
612626 assistant_a_tokens = sum (
613- [conv ["num_tokens" ] for conv in conversation_a if conv ["role" ] == "assistant" ]
627+ [
628+ conv ["num_tokens" ]
629+ for conv in conversation_a
630+ if conv ["role" ] == "assistant"
631+ ]
614632 )
615633 assistant_b_tokens = sum (
616- [conv ["num_tokens" ] for conv in conversation_b if conv ["role" ] == "assistant" ]
634+ [
635+ conv ["num_tokens" ]
636+ for conv in conversation_b
637+ if conv ["role" ] == "assistant"
638+ ]
617639 )
618640 context_tokens_a = sum ([conv ["num_tokens" ] for conv in conversation_a [:- 1 ]])
619641 context_tokens_b = sum ([conv ["num_tokens" ] for conv in conversation_b [:- 1 ]])
@@ -702,30 +724,36 @@ def clean_battle_data(
702724 all_ips [ip ]["count" ] += sub_all_ips [ip ]["count" ]
703725 all_ips [ip ]["anony_count" ] += sub_all_ips [ip ]["anony_count" ]
704726 all_ips [ip ]["anony_tievote" ] += sub_all_ips [ip ]["anony_tievote" ]
705- all_ips [ip ]["anony_bothbad_vote" ] += sub_all_ips [ip ]["anony_bothbad_vote" ]
727+ all_ips [ip ]["anony_bothbad_vote" ] += sub_all_ips [ip ][
728+ "anony_bothbad_vote"
729+ ]
706730 all_ips [ip ]["anony_leftvote" ] += sub_all_ips [ip ]["anony_leftvote" ]
707731 all_ips [ip ]["anony_rightvote" ] += sub_all_ips [ip ]["anony_rightvote" ]
708732
709733 battles .sort (key = lambda x : x ["tstamp" ])
710734 last_updated_tstamp = battles [- 1 ]["tstamp" ]
711-
735+
712736 battles = pd .DataFrame (battles )
713-
737+
714738 # drop rows with same question_id
715739 print (f"before drop dups #battles: { len (battles )} " )
716740 battles = battles .drop_duplicates (subset = ["question_id" ], keep = "first" )
717741 battles = battles .reset_index (drop = True )
718742 print (f"#battles: { len (battles )} " )
719743
720- battles = battles [battles ["anony" ]].reset_index (drop = True ) if anony_only else battles
744+ battles = (
745+ battles [battles ["anony" ]].reset_index (drop = True ) if anony_only else battles
746+ )
721747 if run_dedup and not (vision or txt2img ):
722748 print ("Running deduplication..." )
723749 battles = utils .dedup_process (battles )
724750 num_dedup_battles = sum (battles ["dedup_tag" ].apply (lambda x : x ["sampled" ]))
725751 print (f"#dedup_battles: { num_dedup_battles } " )
726752 else :
727753 print ("Skip deduplication..." )
728- dedup_tags = np .array ([{"high_freq" : False , "sampled" : True } for _ in range (len (battles ))])
754+ dedup_tags = np .array (
755+ [{"high_freq" : False , "sampled" : True } for _ in range (len (battles ))]
756+ )
729757 battles ["dedup_tag" ] = dedup_tags
730758
731759 last_updated_datetime = datetime .datetime .fromtimestamp (
@@ -746,7 +774,9 @@ def clean_battle_data(
746774 for votetype in ["tievote" , "bothbad_vote" , "leftvote" , "rightvote" ]:
747775 vote_key = "anony_" + votetype
748776 userid_key = "sanitized_id" if sanitize_ip else "ip"
749- top_30_users = sorted (all_ips .values (), key = lambda x : x [vote_key ], reverse = True )[:30 ]
777+ top_30_users = sorted (
778+ all_ips .values (), key = lambda x : x [vote_key ], reverse = True
779+ )[:30 ]
750780 top_30_ip_id = ["arena_user_" + ip [userid_key ] for ip in top_30_users ]
751781 battles_top_30_ips = battles [battles ["judge" ].isin (top_30_ip_id )]
752782 print (f"Top 30 IPs #battles: { len (battles_top_30_ips )} " )
@@ -755,13 +785,15 @@ def clean_battle_data(
755785 for user in top_30_users :
756786 user_ip = user ["ip" ]
757787 user_id = "arena_user_" + user [userid_key ]
758-
788+
759789 ip_battles = battles_top_30_ips [battles_top_30_ips ["judge" ] == user_id ]
760790 win_count = len (ip_battles [ip_battles ["winner" ] == "model_a" ])
761791 tie_count = len (ip_battles [ip_battles ["winner" ] == "tie" ])
762792 loss_count = len (ip_battles [ip_battles ["winner" ] == "model_b" ])
763- print (f"{ user_id } : model_a { win_count } , tie { tie_count } , mobel_b { loss_count } , { user_ip } " )
764-
793+ print (
794+ f"{ user_id } : model_a { win_count } , tie { tie_count } , mobel_b { loss_count } , { user_ip } "
795+ )
796+
765797 return battles
766798
767799
@@ -784,8 +816,14 @@ def clean_battle_data(
784816 ban_ip_list = json .load (open (args .ban_ip_file )) if args .ban_ip_file else None
785817
786818 battles = clean_battle_data (
787- log_files , args .exclude_model_names or [], ban_ip_list , args .sanitize_ip , anony_only = args .anony_only ,
788- run_dedup = args .run_dedup , vision = args .vision , txt2img = args .txt2img
819+ log_files ,
820+ args .exclude_model_names or [],
821+ ban_ip_list ,
822+ args .sanitize_ip ,
823+ anony_only = args .anony_only ,
824+ run_dedup = args .run_dedup ,
825+ vision = args .vision ,
826+ txt2img = args .txt2img ,
789827 )
790828 last_updated_tstamp = battles .iloc [- 1 ]["tstamp" ]
791829 cutoff_date = datetime .datetime .fromtimestamp (
@@ -801,7 +839,9 @@ def clean_battle_data(
801839 print (f"Write cleaned data to { output } " )
802840
803841 if not args .txt2img :
804- battles = battles .drop (columns = ["conversation_a" , "conversation_b" , "question_id" ])
842+ battles = battles .drop (
843+ columns = ["conversation_a" , "conversation_b" , "question_id" ]
844+ )
805845 print ("Samples:" )
806846 print (battles [:5 ])
807847
0 commit comments