|
3 | 3 | [2,[[[["GetBlock",[],["T_transpose","main"],["b0"]],["GetLoops",["b0"],[],["l1","l2","l3","l4"]],["Fuse",["l1","l2","l3","l4"],[1],["l5"]],["SampleCategorical",[],[[32,64,128,256],[0.25,0.25,0.25,0.25]],["v6"]],["Split",["l5","None","v6"],[1],["l7","l8"]],["Bind",["l7"],["blockIdx.x"],[]],["Bind",["l8"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[[3,0]]],[2.7432184125781401896e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,1,32,128]],["TENSOR","float32",[1,32,1,128]]]]]
|
4 | 4 | [3,[[[["GetBlock",[],["T_transpose","main"],["b0"]],["GetBlock",[],["T_reshape","main"],["b1"]],["ReverseComputeInline",["b1"],[],[]],["GetLoops",["b0"],[],["l2","l3","l4","l5"]],["Fuse",["l2","l3","l4","l5"],[1],["l6"]],["SampleCategorical",[],[[32,64,128,256],[0.25,0.25,0.25,0.25]],["v7"]],["Split",["l6","None","v7"],[1],["l8","l9"]],["Bind",["l8"],["blockIdx.x"],[]],["Bind",["l9"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[[5,0]]],[2.7433244154431759014e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,32,1,128]],["TENSOR","float32",[1,1,4096]]]]]
|
5 | 5 | [4,[[[["GetBlock",[],["T_squeeze","main"],["b0"]],["GetLoops",["b0"],[],["l1","l2","l3"]],["Fuse",["l1","l2","l3"],[1],["l4"]],["SampleCategorical",[],[[32,64,128,256],[0.25,0.25,0.25,0.25]],["v5"]],["Split",["l4","None","v5"],[1],["l6","l7"]],["Bind",["l6"],["blockIdx.x"],[]],["Bind",["l7"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[[3,0]]],[2.7473226651480640409e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,1,32,128]],["TENSOR","float32",[1,32,128]]]]]
|
6 |
| -[5,[[[["GetBlock",[],["rxplaceholderred_temp","main"],["b0"]],["GetBlock",[],["rms_norm","main"],["b1"]],["GetBlock",[],["root","main"],["b2"]],["GetConsumers",["b0"],[],["b3"]],["GetLoops",["b3"],[],["l4","l5","l6"]],["SampleCategorical",[],[[4,8,16,32,64,128,256,512],[0.125,0.125,0.125,0.125,0.125,0.125,0.125,0.125]],["v7"]],["Split",["l6","None","v7"],[1],["l8","l9"]],["Bind",["l9"],["threadIdx.x"],[]],["ComputeAt",["b0","l5"],[1,-1],[]],["SetScope",["b0"],[0,"shared"],[]],["GetLoops",["b0"],[],["l10","l11","l12","l13","l14"]],["Split",["l14","None","v7"],[1],["l15","l16"]],["Bind",["l16"],["threadIdx.x"],[]],["SampleCategorical",[],[[0,16,64,512,1024],[0.2000000000000000111,0.2000000000000000111,0.2000000000000000111,0.2000000000000000111,0.2000000000000000111]],["v17"]],["Annotate",["b2","v17"],["meta_schedule.unroll_explicit"],[]],["GetLoops",["b1"],[],["l18","l19","l20","l21"]],["Fuse",["l18","l19"],[1],["l22"]],["Bind",["l22"],["blockIdx.x"],[]],["EnterPostproc",[],[],[]],["GetBlock",[],["root","main"],["b23"]],["Unannotate",["b23"],["meta_schedule.unroll_explicit"],[]],["GetChildBlocks",["b23"],[],["b24","b25"]],["GetLoops",["b24"],[],["l26","l27","l28","l29","l30"]],["Annotate",["l26",512],["pragma_auto_unroll_max_step"],[]],["Annotate",["l26",1],["pragma_unroll_explicit"],[]],["GetLoops",["b25"],[],["l31","l32","l33"]],["Annotate",["l31",512],["pragma_auto_unroll_max_step"],[]],["Annotate",["l31",1],["pragma_unroll_explicit"],[]]],[[5,6],[13,3]]],[2.7532844871794869994e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,1,4096]],["TENSOR","float32",[4096]],["TENSOR","float32",[1,1,4096]]]]] |
| 6 | +[5,[[[["GetBlock",[],["Ared_temp","main"],["b0"]],["GetBlock",[],["rms_norm","main"],["b1"]],["GetBlock",[],["root","main"],["b2"]],["GetConsumers",["b0"],[],["b3"]],["GetLoops",["b3"],[],["l4","l5","l6"]],["SampleCategorical",[],[[4,8,16,32,64,128,256,512],[0.125,0.125,0.125,0.125,0.125,0.125,0.125,0.125]],["v7"]],["Split",["l6","None","v7"],[1],["l8","l9"]],["Bind",["l9"],["threadIdx.x"],[]],["ComputeAt",["b0","l5"],[1,-1],[]],["SetScope",["b0"],[0,"shared"],[]],["GetLoops",["b0"],[],["l10","l11","l12","l13","l14"]],["Split",["l14","None","v7"],[1],["l15","l16"]],["Bind",["l16"],["threadIdx.x"],[]],["SampleCategorical",[],[[0,16,64,512,1024],[0.2000000000000000111,0.2000000000000000111,0.2000000000000000111,0.2000000000000000111,0.2000000000000000111]],["v17"]],["Annotate",["b2","v17"],["meta_schedule.unroll_explicit"],[]],["GetLoops",["b1"],[],["l18","l19","l20","l21"]],["Fuse",["l18","l19"],[1],["l22"]],["Bind",["l22"],["blockIdx.x"],[]],["EnterPostproc",[],[],[]],["GetBlock",[],["root","main"],["b23"]],["Unannotate",["b23"],["meta_schedule.unroll_explicit"],[]],["GetChildBlocks",["b23"],[],["b24","b25"]],["GetLoops",["b24"],[],["l26","l27","l28","l29","l30"]],["Annotate",["l26",512],["pragma_auto_unroll_max_step"],[]],["Annotate",["l26",1],["pragma_unroll_explicit"],[]],["GetLoops",["b25"],[],["l31","l32","l33"]],["Annotate",["l31",512],["pragma_auto_unroll_max_step"],[]],["Annotate",["l31",1],["pragma_unroll_explicit"],[]]],[[5,6],[13,3]]],[2.7532844871794869994e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,1,4096]],["TENSOR","float32",[4096]],["TENSOR","float32",[1,1,4096]]]]] |
7 | 7 | [6,[[[["GetBlock",[],["T_reshape","main"],["b0"]],["GetLoops",["b0"],[],["l1","l2","l3"]],["Fuse",["l1","l2","l3"],[1],["l4"]],["SampleCategorical",[],[[32,64,128,256],[0.25,0.25,0.25,0.25]],["v5"]],["Split",["l4","None","v5"],[1],["l6","l7"]],["Bind",["l6"],["blockIdx.x"],[]],["Bind",["l7"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[[3,3]]],[2.7541613796849541375e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,4096]],["TENSOR","float32",[1,1,4096]]]]]
|
8 | 8 | [7,[[[["GetBlock",[],["T_reshape","main"],["b0"]],["GetLoops",["b0"],[],["l1"]],["Fuse",["l1"],[1],["l2"]],["Split",["l2","None",1],[1],["l3","l4"]],["Bind",["l3"],["blockIdx.x"],[]],["Bind",["l4"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[]],[2.7649979407065505203e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","int32",[1,1]],["TENSOR","int32",[1]]]]]
|
9 | 9 | [8,[[[["GetBlock",[],["T_reshape","main"],["b0"]],["GetLoops",["b0"],[],["l1","l2","l3","l4"]],["Fuse",["l1","l2","l3","l4"],[1],["l5"]],["SampleCategorical",[],[[32,64,128,256],[0.25,0.25,0.25,0.25]],["v6"]],["Split",["l5","None","v6"],[1],["l7","l8"]],["Bind",["l7"],["blockIdx.x"],[]],["Bind",["l8"],["threadIdx.x"],[]],["EnterPostproc",[],[],[]]],[[3,2]]],[2.7951948034769459508e-05],{"host":{"keys":["arm_cpu","cpu"],"kind":"llvm","mcpu":"apple-latest","mtriple":"arm64-apple-macos","tag":""},"keys":["metal","gpu"],"kind":"metal","max_function_args":31,"max_num_threads":256,"max_shared_memory_per_block":32768,"max_threads_per_block":256,"tag":"","thread_warp_size":32},[["TENSOR","float32",[1,1,4096]],["TENSOR","float32",[1,1,32,128]]]]]
|
|
0 commit comments