Skip to content

Commit 8f2ed38

Browse files
authored
Merge pull request #93 from sbintuitions/feat/jmteb_v2
[Feature] JMTEB v2リリースに伴うリーダーボード更新&対応モデル追加
2 parents 01d7c7a + 0a709f6 commit 8f2ed38

File tree

75 files changed

+5012
-1247
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+5012
-1247
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
{
2+
"Classification": {
3+
"amazon_counterfactual_classification": {
4+
"macro_f1": 0.718621425743256
5+
},
6+
"amazon_review_classification": {
7+
"macro_f1": 0.5664555524508175
8+
},
9+
"japanese_sentiment_classification": {
10+
"macro_f1": 0.9441075327867781
11+
},
12+
"massive_intent_classification": {
13+
"macro_f1": 0.7868184551588373
14+
},
15+
"massive_scenario_classification": {
16+
"macro_f1": 0.8970320222457714
17+
},
18+
"sib200_japanese_classification": {
19+
"macro_f1": 0.8424907003170607
20+
},
21+
"wrime_classification": {
22+
"macro_f1": 0.4316630478439933
23+
}
24+
},
25+
"Reranking": {
26+
"esci": {
27+
"ndcg@10": 0.9327323748768209
28+
},
29+
"jacwir_reranking": {
30+
"ndcg@10": 0.8955144849023412
31+
},
32+
"jqara": {
33+
"ndcg@10": 0.5391637817603238
34+
},
35+
"miracl_reranking": {
36+
"ndcg@10": 0.8596271423829606
37+
},
38+
"mldr_reranking": {
39+
"ndcg@10": 0.9778261029468881
40+
}
41+
},
42+
"Retrieval": {
43+
"jacwir_retrieval": {
44+
"ndcg@10": 0.851348898788452
45+
},
46+
"jagovfaqs_22k": {
47+
"ndcg@10": 0.6906829361885021
48+
},
49+
"jaqket": {
50+
"ndcg@10": 0.5659460589444328
51+
},
52+
"mintaka_retrieval": {
53+
"ndcg@10": 0.32175483024897333
54+
},
55+
"miracl_retrieval": {
56+
"ndcg@10": 0.734809783755516
57+
},
58+
"mldr_retrieval": {
59+
"ndcg@10": 0.5126063501865914
60+
},
61+
"mrtydi": {
62+
"ndcg@10": 0.45179452203971654
63+
},
64+
"nlp_journal_abs_article": {
65+
"ndcg@10": 0.9521915103722084
66+
},
67+
"nlp_journal_abs_intro": {
68+
"ndcg@10": 0.9752948774973371
69+
},
70+
"nlp_journal_title_abs": {
71+
"ndcg@10": 0.9602075886902439
72+
},
73+
"nlp_journal_title_intro": {
74+
"ndcg@10": 0.9197525363243463
75+
}
76+
},
77+
"STS": {
78+
"jsick": {
79+
"spearman": 0.7926524802982091
80+
},
81+
"jsts": {
82+
"spearman": 0.8020865982595183
83+
}
84+
},
85+
"Clustering": {
86+
"livedoor_news": {
87+
"v_measure_score": 0.5475619174246511
88+
},
89+
"mewsc16": {
90+
"v_measure_score": 0.4200457612686986
91+
},
92+
"sib200_japanese_clustering": {
93+
"v_measure_score": 0.3991288954568376
94+
}
95+
}
96+
}
Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,96 @@
11
{
22
"Classification": {
33
"amazon_counterfactual_classification": {
4-
"macro_f1": 0.7809527709426081
4+
"macro_f1": 0.7769528027441275
55
},
66
"amazon_review_classification": {
7-
"macro_f1": 0.5155899232320224
7+
"macro_f1": 0.5146406875677701
8+
},
9+
"japanese_sentiment_classification": {
10+
"macro_f1": 0.8844781754440035
811
},
912
"massive_intent_classification": {
10-
"macro_f1": 0.7879373479249787
13+
"macro_f1": 0.7872353730798753
1114
},
1215
"massive_scenario_classification": {
13-
"macro_f1": 0.8662625888023707
16+
"macro_f1": 0.8639715373498098
17+
},
18+
"sib200_japanese_classification": {
19+
"macro_f1": 0.8350488266987821
20+
},
21+
"wrime_classification": {
22+
"macro_f1": 0.3815230965003785
1423
}
1524
},
1625
"Reranking": {
1726
"esci": {
18-
"ndcg@10": 0.9095168116460639
27+
"ndcg@10": 0.909518320556229
28+
},
29+
"jacwir_reranking": {
30+
"ndcg@10": 0.5981293078380808
31+
},
32+
"jqara": {
33+
"ndcg@10": 0.3719557553111225
34+
},
35+
"miracl_reranking": {
36+
"ndcg@10": 0.6789908587925922
37+
},
38+
"mldr_reranking": {
39+
"ndcg@10": 0.8281088898171538
1940
}
2041
},
2142
"Retrieval": {
43+
"jacwir_retrieval": {
44+
"ndcg@10": 0.4085978545476503
45+
},
2246
"jagovfaqs_22k": {
23-
"ndcg@10": 0.42314124780036416
47+
"ndcg@10": 0.43879890119990833
2448
},
2549
"jaqket": {
26-
"ndcg@10": 0.36199154051747723
50+
"ndcg@10": 0.3555985699236658
51+
},
52+
"mintaka_retrieval": {
53+
"ndcg@10": 0.1997740482697841
54+
},
55+
"miracl_retrieval": {
56+
"ndcg@10": 0.16521386136598404
57+
},
58+
"mldr_retrieval": {
59+
"ndcg@10": 0.12060735418211223
2760
},
2861
"mrtydi": {
29-
"ndcg@10": 0.07810683176415421
62+
"ndcg@10": 0.07107405961190999
63+
},
64+
"nlp_journal_abs_article": {
65+
"ndcg@10": 0.5430415601583998
3066
},
3167
"nlp_journal_abs_intro": {
32-
"ndcg@10": 0.6077212544951452
68+
"ndcg@10": 0.5585881454407594
3369
},
3470
"nlp_journal_title_abs": {
35-
"ndcg@10": 0.6433890489201118
71+
"ndcg@10": 0.629620778788499
3672
},
3773
"nlp_journal_title_intro": {
38-
"ndcg@10": 0.39317174536190913
74+
"ndcg@10": 0.3517328767423871
3975
}
4076
},
4177
"STS": {
4278
"jsick": {
43-
"spearman": 0.754165277432144
79+
"spearman": 0.7775668305928584
4480
},
4581
"jsts": {
46-
"spearman": 0.7558202366183716
82+
"spearman": 0.7563460117163054
4783
}
4884
},
4985
"Clustering": {
5086
"livedoor_news": {
51-
"v_measure_score": 0.4966545453348478
87+
"v_measure_score": 0.4601335671191492
5288
},
5389
"mewsc16": {
54-
"v_measure_score": 0.3877356318022785
55-
}
56-
},
57-
"PairClassification": {
58-
"paws_x_ja": {
59-
"binary_f1": 0.6237623762376237
90+
"v_measure_score": 0.3922006290468797
91+
},
92+
"sib200_japanese_clustering": {
93+
"v_measure_score": 0.3456006554316726
6094
}
6195
}
6296
}
Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,96 @@
11
{
22
"Classification": {
33
"amazon_counterfactual_classification": {
4-
"macro_f1": 0.776174162517931
4+
"macro_f1": 0.7779156199278396
55
},
66
"amazon_review_classification": {
7-
"macro_f1": 0.5085781180553806
7+
"macro_f1": 0.5111451768867725
8+
},
9+
"japanese_sentiment_classification": {
10+
"macro_f1": 0.8782111274457993
811
},
912
"massive_intent_classification": {
10-
"macro_f1": 0.7718541530739129
13+
"macro_f1": 0.7796973463634825
1114
},
1215
"massive_scenario_classification": {
13-
"macro_f1": 0.8592571786794985
16+
"macro_f1": 0.8634142669499835
17+
},
18+
"sib200_japanese_classification": {
19+
"macro_f1": 0.8506408877596591
20+
},
21+
"wrime_classification": {
22+
"macro_f1": 0.3656175961601361
1423
}
1524
},
1625
"Reranking": {
1726
"esci": {
18-
"ndcg@10": 0.9100551950168166
27+
"ndcg@10": 0.9092446252246911
28+
},
29+
"jacwir_reranking": {
30+
"ndcg@10": 0.605113846464576
31+
},
32+
"jqara": {
33+
"ndcg@10": 0.36840730960684165
34+
},
35+
"miracl_reranking": {
36+
"ndcg@10": 0.693114284522583
37+
},
38+
"mldr_reranking": {
39+
"ndcg@10": 0.8530771666734125
1940
}
2041
},
2142
"Retrieval": {
43+
"jacwir_retrieval": {
44+
"ndcg@10": 0.42431895793525753
45+
},
2246
"jagovfaqs_22k": {
23-
"ndcg@10": 0.42368135774043536
47+
"ndcg@10": 0.43601956332213093
2448
},
2549
"jaqket": {
26-
"ndcg@10": 0.37721850397542034
50+
"ndcg@10": 0.37354035206874886
51+
},
52+
"mintaka_retrieval": {
53+
"ndcg@10": 0.2518443007449429
54+
},
55+
"miracl_retrieval": {
56+
"ndcg@10": 0.14756204576714857
57+
},
58+
"mldr_retrieval": {
59+
"ndcg@10": 0.16862391555076126
2760
},
2861
"mrtydi": {
29-
"ndcg@10": 0.07878085186566607
62+
"ndcg@10": 0.07770347901718931
63+
},
64+
"nlp_journal_abs_article": {
65+
"ndcg@10": 0.5689006657309228
3066
},
3167
"nlp_journal_abs_intro": {
32-
"ndcg@10": 0.636999375405723
68+
"ndcg@10": 0.5911474254499767
3369
},
3470
"nlp_journal_title_abs": {
35-
"ndcg@10": 0.6413498649875696
71+
"ndcg@10": 0.618101892252404
3672
},
3773
"nlp_journal_title_intro": {
38-
"ndcg@10": 0.397250919496823
74+
"ndcg@10": 0.3287673013916751
3975
}
4076
},
4177
"STS": {
4278
"jsick": {
43-
"spearman": 0.7756925231422259
79+
"spearman": 0.7893346270810556
4480
},
4581
"jsts": {
46-
"spearman": 0.7652968548841591
82+
"spearman": 0.7657111966582518
4783
}
4884
},
4985
"Clustering": {
5086
"livedoor_news": {
51-
"v_measure_score": 0.5262387436934941
87+
"v_measure_score": 0.48558605187442483
5288
},
5389
"mewsc16": {
54-
"v_measure_score": 0.37277574537292835
55-
}
56-
},
57-
"PairClassification": {
58-
"paws_x_ja": {
59-
"binary_f1": 0.623321554770318
90+
"v_measure_score": 0.4319848997472401
91+
},
92+
"sib200_japanese_clustering": {
93+
"v_measure_score": 0.3860004176729398
6094
}
6195
}
6296
}

0 commit comments

Comments
 (0)