@@ -1575,6 +1575,70 @@ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,4096,416.11767578
15751575fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,8192,832.22705078125,832.22705078125,832.22705078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
15761576fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,16384,1544.44580078125,1544.44580078125,1544.44580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
15771577fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,32768,2960.8837890625,2960.8837890625,2960.8837890625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1578+ fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,2,40.75366401672363,40.749671173095706,40.75765686035156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1
1579+ fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,4,80.95231628417969,80.95231628417969,80.95231628417969,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1
1580+ fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,8,163.58604431152344,163.58604431152344,163.58604431152344,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1
1581+ fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,16,323.6761474609375,323.6761474609375,323.6761474609375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1
1582+ fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,2,23.71225643157959,23.612825775146483,23.8354434967041,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1
1583+ fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,4,46.86131286621094,46.80355911254883,46.91906661987304,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1
1584+ fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,8,94.54898834228516,94.54898834228516,94.54898834228516,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1
1585+ fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,16,189.99501037597656,189.99501037597656,189.99501037597656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1
1586+ fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,2,42.67263984680176,42.54085083007813,42.80442886352539,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1
1587+ fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,4,82.2446060180664,82.2446060180664,82.2446060180664,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1
1588+ fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,8,167.00416564941406,167.00416564941406,167.00416564941406,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1
1589+ fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,16,327.0911865234375,327.0911865234375,327.0911865234375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1
1590+ fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,2,45.36115264892578,45.241344451904304,45.480960845947266,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1
1591+ fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,4,90.00038146972656,90.00038146972656,90.00038146972656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1
1592+ fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,8,177.22674560546875,177.22674560546875,177.22674560546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1
1593+ fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,16,356.5383605957031,356.5383605957031,356.5383605957031,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1
1594+ fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,2,1.814527988433838,1.8124799728393555,1.8167808055877686,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1
1595+ fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,4,1.84934401512146,1.8472959995269775,1.8524160385131836,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1
1596+ fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,8,1.891327977180481,1.8872319459915161,1.893990397453308,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1
1597+ fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,16,1.9722239971160889,1.9660799503326416,1.9763200283050537,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1
1598+ fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,2,22.014975547790527,21.710438537597657,22.19417533874512,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1
1599+ fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,4,41.83603096008301,41.752165222167974,41.91989669799805,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1
1600+ fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,8,81.66400146484375,81.66400146484375,81.66400146484375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1
1601+ fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,16,162.6429443359375,162.6429443359375,162.6429443359375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1
1602+ fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,2,7344.77685546875,7344.77685546875,7344.77685546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1
1603+ fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,4,7408.80029296875,7408.80029296875,7408.80029296875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1
1604+ fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,8,7536.84716796875,7536.84716796875,7536.84716796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1
1605+ fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,16,7792.94091796875,7792.94091796875,7792.94091796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1
1606+ fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,2,9083.28125,9083.28125,9083.28125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1
1607+ fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,4,13138.3125,13138.3125,13138.3125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1
1608+ fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,8,21250.375,21250.375,21250.375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1
1609+ fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,16,37474.5,37474.5,37474.5,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1
1610+ fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,2,40.72038269042969,40.71178131103516,40.728984069824214,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1
1611+ fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,4,81.69369506835938,81.69369506835938,81.69369506835938,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1
1612+ fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,8,162.79653930664062,162.79653930664062,162.79653930664062,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1
1613+ fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,16,323.6546630859375,323.6546630859375,323.6546630859375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1
1614+ fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,2,23.70047950744629,23.628594589233398,23.732429122924806,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1
1615+ fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,4,47.36921691894531,47.085364532470706,47.65306930541992,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1
1616+ fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,8,94.83366394042969,94.83366394042969,94.83366394042969,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1
1617+ fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,16,190.0963897705078,190.0963897705078,190.0963897705078,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1
1618+ fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,2,42.318336486816406,42.15214080810547,42.48453216552734,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1
1619+ fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,4,82.4616928100586,82.4616928100586,82.4616928100586,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1
1620+ fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,8,163.43756103515625,163.43756103515625,163.43756103515625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1
1621+ fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,16,325.4384765625,325.4384765625,325.4384765625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1
1622+ fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,2,45.99193572998047,45.80761489868165,46.176256561279295,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1
1623+ fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,4,88.57190704345703,88.57190704345703,88.57190704345703,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1
1624+ fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,8,176.94105529785156,176.94105529785156,176.94105529785156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1
1625+ fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,16,356.0478820800781,356.0478820800781,356.0478820800781,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1
1626+ fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,2,1.8242560029029846,1.8102271556854248,1.8309119939804077,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1
1627+ fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,4,1.84934401512146,1.846886396408081,1.8534400463104248,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1
1628+ fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,8,1.891327977180481,1.8892799615859985,1.8933759927749634,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1
1629+ fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,16,1.9752960205078125,1.9722239971160889,1.977344036102295,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1
1630+ fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,2,22.0262393951416,21.80997085571289,22.20482559204102,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1
1631+ fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,4,41.54521560668945,41.224806213378905,41.865625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1
1632+ fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,8,81.21753692626953,81.21753692626953,81.21753692626953,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1
1633+ fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,16,160.82022094726562,160.82022094726562,160.82022094726562,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1
1634+ fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,2,7344.77685546875,7344.77685546875,7344.77685546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1
1635+ fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,4,7408.80029296875,7408.80029296875,7408.80029296875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1
1636+ fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,8,7536.84716796875,7536.84716796875,7536.84716796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1
1637+ fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,16,7792.94091796875,7792.94091796875,7792.94091796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1
1638+ fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,2,9083.28125,9083.28125,9083.28125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1
1639+ fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,4,13138.3125,13138.3125,13138.3125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1
1640+ fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,8,21250.375,21250.375,21250.375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1
1641+ fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,16,37474.5,37474.5,37474.5,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1
15781642llama4_rope,liger,forward,speed,ms,H,hidden size,512,0.08249600231647491,0.08102399855852127,0.08432000130414963,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1
15791643llama4_rope,liger,forward,speed,ms,H,hidden size,2048,0.08169600367546082,0.08037760108709335,0.08329600095748901,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1
15801644llama4_rope,liger,forward,speed,ms,H,hidden size,8192,0.08128000050783157,0.07980799674987793,0.08329600095748901,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1
0 commit comments