@@ -425,7 +425,7 @@ <h4 class="title is-5" style="border-bottom: 2px solid #333; padding-bottom: 12p
425425 < div class ="container is-max-desktop ">
426426
427427 < h2 class ="title is-4 has-text-centered ">
428- Table 2: Ablation — Effect of label flipping (λ = 100).
428+ Table 2: Ablation — Effect of Label Flipping
429429 </ h2 >
430430
431431 < div class ="table-container ">
@@ -436,141 +436,174 @@ <h2 class="title is-4 has-text-centered">
436436 < tr style ="border-top: 2px solid black; border-bottom: 1px solid black; ">
437437 < th class ="has-text-left "> Model</ th >
438438 < th class ="has-text-left "> Method</ th >
439+ < th class ="has-text-centered "> Flip</ th >
440+ < th class ="has-text-centered "> Fact. ↑</ th >
439441 < th class ="has-text-centered "> Hal. ↓</ th >
440- < th class ="has-text-centered "> Fact.</ th >
441442 < th class ="has-text-centered "> Win ↑</ th >
442443 </ tr >
443444 </ thead >
444445
445446 < tbody >
446447
447- <!-- ================= Gemma-2-9B ================= -->
448- < tr style ="background-color: #f9f9f9; ">
449- < td colspan ="5 " style ="font-weight: bold; padding-top: 10px; ">
450- Gemma-2-9B
451- </ td >
448+ <!-- ================= Qwen2.5-14B ================= -->
449+ < tr style ="background-color:#f9f9f9; ">
450+ < td colspan ="6 " style ="font-weight:bold; "> Qwen2.5-14B</ td >
452451 </ tr >
453452 < tr >
454- < td > </ td >
455- < td > Standard DPO</ td >
456- < td class ="has-text-centered "> 0.082</ td >
457- < td class ="has-text-centered "> 8.12</ td >
453+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✗</ td >
454+ < td class ="has-text-centered "> 7.90</ td >
455+ < td class ="has-text-centered "> 0.080</ td >
458456 < td class ="has-text-centered "> —</ td >
459457 </ tr >
460- < tr style ="background-color: #fff6db; ">
461- < td > </ td >
462- < td > < strong > F-DPO</ strong > (No Flip)</ td >
463- < td class ="has-text-centered "> 0.088</ td >
464- < td class ="has-text-centered "> 8.06</ td >
465- < td class ="has-text-centered "> 0.49</ td >
458+ < tr style ="background-color:#fff6db; ">
459+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✓</ td >
460+ < td class ="has-text-centered "> 8.33</ td >
461+ < td class ="has-text-centered "> 0.036</ td >
462+ < td class ="has-text-centered "> 0.65</ td >
466463 </ tr >
467- < tr style ="background-color: #e6f4ea; ">
468- < td > </ td >
469- < td > < strong > F-DPO</ strong > (+Flip)</ td >
470- < td class ="has-text-centered "> < strong > 0.064</ strong > </ td >
471- < td class ="has-text-centered "> < strong > 8.31</ strong > </ td >
472- < td class ="has-text-centered "> < strong > 0.57</ strong > </ td >
464+ < tr style ="background-color:#fff6db; ">
465+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✗</ td >
466+ < td class ="has-text-centered "> 8.49</ td >
467+ < td class ="has-text-centered "> 0.032</ td >
468+ < td class ="has-text-centered "> 0.70</ td >
469+ </ tr >
470+ < tr style ="background-color:#e6f4ea; ">
471+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✓</ td >
472+ < td class ="has-text-centered "> < strong > 8.84</ strong > </ td >
473+ < td class ="has-text-centered "> < strong > 0.008</ strong > </ td >
474+ < td class ="has-text-centered "> < strong > 0.78</ strong > </ td >
473475 </ tr >
474476
475- <!-- ================= Qwen2.5-14B ================= -->
476- < tr style ="background-color: #f9f9f9; ">
477- < td colspan ="5 " style ="font-weight: bold; padding-top: 10px; ">
478- Qwen2.5-14B
479- </ td >
477+ <!-- ================= Qwen3-8B ================= -->
478+ < tr style ="background-color:#f9f9f9; ">
479+ < td colspan ="6 " style ="font-weight:bold; "> Qwen3-8B</ td >
480480 </ tr >
481481 < tr >
482- < td > </ td >
483- < td > Standard DPO</ td >
484- < td class ="has-text-centered "> 0.084</ td >
485- < td class ="has-text-centered "> 7.92</ td >
482+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✗</ td >
483+ < td class ="has-text-centered "> 6.14</ td >
484+ < td class ="has-text-centered "> 0.302</ td >
486485 < td class ="has-text-centered "> —</ td >
487486 </ tr >
488- < tr style ="background-color: #fff6db; ">
489- < td > </ td >
490- < td > < strong > F-DPO</ strong > (No Flip)</ td >
491- < td class ="has-text-centered "> 0.032</ td >
492- < td class ="has-text-centered "> 8.49</ td >
493- < td class ="has-text-centered "> 0.70</ td >
487+ < tr style ="background-color:#fff6db; ">
488+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✓</ td >
489+ < td class ="has-text-centered "> 6.32</ td >
490+ < td class ="has-text-centered "> 0.280</ td >
491+ < td class ="has-text-centered "> 0.53</ td >
494492 </ tr >
495- < tr style ="background-color: #e6f4ea; ">
496- < td > </ td >
497- < td > < strong > F-DPO</ strong > (+Flip)</ td >
498- < td class ="has-text-centered "> < strong > 0.008</ strong > </ td >
499- < td class ="has-text-centered "> < strong > 8.84</ strong > </ td >
500- < td class ="has-text-centered "> < strong > 0.78</ strong > </ td >
493+ < tr style ="background-color:#fff6db; ">
494+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✗</ td >
495+ < td class ="has-text-centered "> 7.14</ td >
496+ < td class ="has-text-centered "> 0.150</ td >
497+ < td class ="has-text-centered "> 0.66</ td >
498+ </ tr >
499+ < tr style ="background-color:#e6f4ea; ">
500+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✓</ td >
501+ < td class ="has-text-centered "> < strong > 7.90</ strong > </ td >
502+ < td class ="has-text-centered "> < strong > 0.084</ strong > </ td >
503+ < td class ="has-text-centered "> < strong > 0.70</ strong > </ td >
504+ </ tr >
505+
506+ <!-- ================= Qwen2-7B ================= -->
507+ < tr style ="background-color:#f9f9f9; ">
508+ < td colspan ="6 " style ="font-weight:bold; "> Qwen2-7B</ td >
509+ </ tr >
510+ < tr >
511+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✗</ td >
512+ < td class ="has-text-centered "> 6.50</ td >
513+ < td class ="has-text-centered "> 0.238</ td >
514+ < td class ="has-text-centered "> —</ td >
515+ </ tr >
516+ < tr style ="background-color:#fff6db; ">
517+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✓</ td >
518+ < td class ="has-text-centered "> 6.95</ td >
519+ < td class ="has-text-centered "> 0.176</ td >
520+ < td class ="has-text-centered "> 0.62</ td >
521+ </ tr >
522+ < tr style ="background-color:#fff6db; ">
523+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✗</ td >
524+ < td class ="has-text-centered "> 7.14</ td >
525+ < td class ="has-text-centered "> 0.150</ td >
526+ < td class ="has-text-centered "> 0.66</ td >
527+ </ tr >
528+ < tr style ="background-color:#e6f4ea; ">
529+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✓</ td >
530+ < td class ="has-text-centered "> < strong > 7.60</ strong > </ td >
531+ < td class ="has-text-centered "> < strong > 0.082</ strong > </ td >
532+ < td class ="has-text-centered "> < strong > 0.70</ strong > </ td >
501533 </ tr >
502534
503535 <!-- ================= LLaMA-3-8B ================= -->
504- < tr style ="background-color: #f9f9f9; ">
505- < td colspan ="5 " style ="font-weight: bold; padding-top: 10px; ">
506- LLaMA-3-8B
507- </ td >
536+ < tr style ="background-color:#f9f9f9; ">
537+ < td colspan ="6 " style ="font-weight:bold; "> LLaMA-3-8B</ td >
508538 </ tr >
509539 < tr >
510- < td > </ td >
511- < td > Standard DPO </ td >
540+ < td > </ td > < td > Standard DPO </ td > < td class =" has-text-centered " > ✗ </ td >
541+ < td class =" has-text-centered " > 6.00 </ td >
512542 < td class ="has-text-centered "> 0.290</ td >
513- < td class ="has-text-centered "> 6.05</ td >
514543 < td class ="has-text-centered "> —</ td >
515544 </ tr >
516- < tr style ="background-color: #fff6db; ">
517- < td > </ td >
518- < td > < strong > F-DPO</ strong > (No Flip)</ td >
519- < td class ="has-text-centered "> 0.234</ td >
545+ < tr style ="background-color:#fff6db; ">
546+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✓</ td >
547+ < td class ="has-text-centered "> 6.35</ td >
548+ < td class ="has-text-centered "> 0.260</ td >
549+ < td class ="has-text-centered "> 0.59</ td >
550+ </ tr >
551+ < tr style ="background-color:#fff6db; ">
552+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✗</ td >
520553 < td class ="has-text-centered "> 6.50</ td >
554+ < td class ="has-text-centered "> 0.234</ td >
521555 < td class ="has-text-centered "> 0.56</ td >
522556 </ tr >
523- < tr style ="background-color: #e6f4ea; ">
524- < td > </ td >
525- < td > < strong > F-DPO</ strong > (+Flip)</ td >
526- < td class ="has-text-centered "> < strong > 0.122</ strong > </ td >
527- < td class ="has-text-centered "> < strong > 7.10</ strong > </ td >
557+ < tr style ="background-color:#e6f4ea; ">
558+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✓</ td >
559+ < td class ="has-text-centered "> < strong > 7.00</ strong > </ td >
560+ < td class ="has-text-centered "> < strong > 0.154</ strong > </ td >
528561 < td class ="has-text-centered "> < strong > 0.72</ strong > </ td >
529562 </ tr >
530563
531- <!-- ================= Qwen3-8B ================= -->
532- < tr style ="background-color: #f9f9f9; ">
533- < td colspan ="5 " style ="font-weight: bold; padding-top: 10px; ">
534- Qwen3-8B
535- </ td >
564+ <!-- ================= Gemma-2-9B ================= -->
565+ < tr style ="background-color:#f9f9f9; ">
566+ < td colspan ="6 " style ="font-weight:bold; "> Gemma-2-9B</ td >
536567 </ tr >
537568 < tr >
538- < td > </ td >
539- < td > Standard DPO</ td >
540- < td class ="has-text-centered "> 0.300</ td >
541- < td class ="has-text-centered "> 6.08</ td >
569+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✗</ td >
570+ < td class ="has-text-centered "> 8.04</ td >
571+ < td class ="has-text-centered "> 0.092</ td >
542572 < td class ="has-text-centered "> —</ td >
543573 </ tr >
544- < tr style ="background-color: #fff6db; ">
545- < td > </ td >
546- < td > < strong > F-DPO</ strong > (No Flip)</ td >
547- < td class ="has-text-centered "> 0.150</ td >
548- < td class ="has-text-centered "> 7.14</ td >
549- < td class ="has-text-centered "> 0.66</ td >
574+ < tr style ="background-color:#e6f4ea; ">
575+ < td > </ td > < td > Standard DPO</ td > < td class ="has-text-centered "> ✓</ td >
576+ < td class ="has-text-centered "> < strong > 8.27</ strong > </ td >
577+ < td class ="has-text-centered "> < strong > 0.064</ strong > </ td >
578+ < td class ="has-text-centered "> 0.53</ td >
550579 </ tr >
551- < tr style ="background-color: #e6f4ea; border-bottom: 2px solid black; ">
552- < td > </ td >
553- < td > < strong > F-DPO</ strong > (+Flip)</ td >
554- < td class ="has-text-centered "> < strong > 0.080</ strong > </ td >
555- < td class ="has-text-centered "> < strong > 7.90</ strong > </ td >
556- < td class ="has-text-centered "> < strong > 0.70</ strong > </ td >
580+ < tr style ="background-color:#fff6db; ">
581+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✗</ td >
582+ < td class ="has-text-centered "> 8.06</ td >
583+ < td class ="has-text-centered "> 0.088</ td >
584+ < td class ="has-text-centered "> 0.49</ td >
585+ </ tr >
586+ < tr style ="background-color:#e6f4ea; border-bottom:2px solid black; ">
587+ < td > </ td > < td > < strong > F-DPO</ strong > </ td > < td class ="has-text-centered "> ✓</ td >
588+ < td class ="has-text-centered "> < strong > 8.26</ strong > </ td >
589+ < td class ="has-text-centered "> < strong > 0.068</ strong > </ td >
590+ < td class ="has-text-centered "> < strong > 0.57</ strong > </ td >
557591 </ tr >
558592
559593 </ tbody >
560594 </ table >
561595 </ div >
562596
563597 < p class ="has-text-centered is-size-7 ">
564- < span style ="background-color:#e6f4ea;padding:2px 6px; "> Green</ span > :
565- best per model;
566- < span style ="background-color:#fff6db;padding:2px 6px; "> Yellow</ span > :
567- improvement over Standard DPO;
568- “—” indicates not applicable.
598+ < span style ="background-color:#e6f4ea;padding:2px 6px; "> Green</ span > : best per model
599+ < span style ="background-color:#fff6db;padding:2px 6px; "> Yellow</ span > : second-best / improvement
600+ “—” indicates not applicable
569601 </ p >
570602
571603 </ div >
572604</ section >
573605
606+
574607</ section >
575608
576609<!--BibTex citation -->
0 commit comments