@@ -564,58 +564,61 @@ <h2>Functions<a class="headerlink" href="#functions" title="Link to this heading
564564< tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.AnnotateDeviceRegions " title ="tilelang.transform.AnnotateDeviceRegions "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> AnnotateDeviceRegions</ span > </ code > </ a > ()</ p > </ td >
565565< td > < p > AnnotateDeviceRegions</ p > </ td >
566566</ tr >
567- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.VectorizeLoop " title ="tilelang.transform.VectorizeLoop "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> VectorizeLoop</ span > </ code > </ a > ([enable_vectorize])</ p > </ td >
567+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.SplitHostDevice " title ="tilelang.transform.SplitHostDevice "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> SplitHostDevice</ span > </ code > </ a > ()</ p > </ td >
568+ < td > < p > Split host/device functions even for empty kernels.</ p > </ td >
569+ </ tr >
570+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.VectorizeLoop " title ="tilelang.transform.VectorizeLoop "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> VectorizeLoop</ span > </ code > </ a > ([enable_vectorize])</ p > </ td >
568571< td > < p > VectorizeLoop</ p > </ td >
569572</ tr >
570- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy " title ="tilelang.transform.InjectPTXAsyncCopy "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> InjectPTXAsyncCopy</ span > </ code > </ a > ()</ p > </ td >
573+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy " title ="tilelang.transform.InjectPTXAsyncCopy "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> InjectPTXAsyncCopy</ span > </ code > </ a > ()</ p > </ td >
571574< td > < p > Rewrite global to shared memory copy on CUDA with asynchronous copy.</ p > </ td >
572575</ tr >
573- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo " title ="tilelang.transform.LowerDeviceStorageAccessInfo "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerDeviceStorageAccessInfo</ span > </ code > </ a > ()</ p > </ td >
576+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo " title ="tilelang.transform.LowerDeviceStorageAccessInfo "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerDeviceStorageAccessInfo</ span > </ code > </ a > ()</ p > </ td >
574577< td > < p > Lower attached storage access information on device.</ p > </ td >
575578</ tr >
576- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LoopVectorizeDynamic " title ="tilelang.transform.LoopVectorizeDynamic "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LoopVectorizeDynamic</ span > </ code > </ a > ()</ p > </ td >
579+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LoopVectorizeDynamic " title ="tilelang.transform.LoopVectorizeDynamic "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LoopVectorizeDynamic</ span > </ code > </ a > ()</ p > </ td >
577580< td > < p > Try to vectorize loop with dynamic shape.</ p > </ td >
578581</ tr >
579- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.ConfigIndexBitwidth " title ="tilelang.transform.ConfigIndexBitwidth "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> ConfigIndexBitwidth</ span > </ code > </ a > ()</ p > </ td >
582+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.ConfigIndexBitwidth " title ="tilelang.transform.ConfigIndexBitwidth "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> ConfigIndexBitwidth</ span > </ code > </ a > ()</ p > </ td >
580583< td > < p > Config index bitwidth.</ p > </ td >
581584</ tr >
582- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.FlattenBuffer " title ="tilelang.transform.FlattenBuffer "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> FlattenBuffer</ span > </ code > </ a > ()</ p > </ td >
585+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.FlattenBuffer " title ="tilelang.transform.FlattenBuffer "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> FlattenBuffer</ span > </ code > </ a > ()</ p > </ td >
583586< td > < p > FlattenBuffer</ p > </ td >
584587</ tr >
585- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.EliminateStorageSyncForMBarrier " title ="tilelang.transform.EliminateStorageSyncForMBarrier "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> EliminateStorageSyncForMBarrier</ span > </ code > </ a > ()</ p > </ td >
588+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.EliminateStorageSyncForMBarrier " title ="tilelang.transform.EliminateStorageSyncForMBarrier "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> EliminateStorageSyncForMBarrier</ span > </ code > </ a > ()</ p > </ td >
586589< td > < p > EliminateStorageSyncForMBarrier</ p > </ td >
587590</ tr >
588- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.MergeSharedMemoryAllocations " title ="tilelang.transform.MergeSharedMemoryAllocations "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> MergeSharedMemoryAllocations</ span > </ code > </ a > ([...])</ p > </ td >
591+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.MergeSharedMemoryAllocations " title ="tilelang.transform.MergeSharedMemoryAllocations "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> MergeSharedMemoryAllocations</ span > </ code > </ a > ([...])</ p > </ td >
589592< td > < p > MergeSharedMemoryAllocations</ p > </ td >
590593</ tr >
591- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerL2Persistent " title ="tilelang.transform.LowerL2Persistent "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerL2Persistent</ span > </ code > </ a > ()</ p > </ td >
594+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerL2Persistent " title ="tilelang.transform.LowerL2Persistent "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerL2Persistent</ span > </ code > </ a > ()</ p > </ td >
592595< td > < p > LowerL2Persistent</ p > </ td >
593596</ tr >
594- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.PersistThreadblock " title ="tilelang.transform.PersistThreadblock "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> PersistThreadblock</ span > </ code > </ a > ()</ p > </ td >
597+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.PersistThreadblock " title ="tilelang.transform.PersistThreadblock "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> PersistThreadblock</ span > </ code > </ a > ()</ p > </ td >
595598< td > < p > PersistThreadblock</ p > </ td >
596599</ tr >
597- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.AlignDynamicSharedMemoryAllocations " title ="tilelang.transform.AlignDynamicSharedMemoryAllocations "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> AlignDynamicSharedMemoryAllocations</ span > </ code > </ a > ([align_bytes])</ p > </ td >
600+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.AlignDynamicSharedMemoryAllocations " title ="tilelang.transform.AlignDynamicSharedMemoryAllocations "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> AlignDynamicSharedMemoryAllocations</ span > </ code > </ a > ([align_bytes])</ p > </ td >
598601< td > < p > AlignDynamicSharedMemoryAllocations</ p > </ td >
599602</ tr >
600- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerSharedBarrier " title ="tilelang.transform.LowerSharedBarrier "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerSharedBarrier</ span > </ code > </ a > ()</ p > </ td >
603+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerSharedBarrier " title ="tilelang.transform.LowerSharedBarrier "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerSharedBarrier</ span > </ code > </ a > ()</ p > </ td >
601604< td > < p > LowerSharedBarrier</ p > </ td >
602605</ tr >
603- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.StorageRewrite " title ="tilelang.transform.StorageRewrite "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> StorageRewrite</ span > </ code > </ a > ()</ p > </ td >
606+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.StorageRewrite " title ="tilelang.transform.StorageRewrite "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> StorageRewrite</ span > </ code > </ a > ()</ p > </ td >
604607< td > < p > StorageRewrite</ p > </ td >
605608</ tr >
606- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerOpaqueBlock " title ="tilelang.transform.LowerOpaqueBlock "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerOpaqueBlock</ span > </ code > </ a > ()</ p > </ td >
609+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerOpaqueBlock " title ="tilelang.transform.LowerOpaqueBlock "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerOpaqueBlock</ span > </ code > </ a > ()</ p > </ td >
607610< td > < p > LowerOpaqueBlock</ p > </ td >
608611</ tr >
609- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerThreadAllreduce " title ="tilelang.transform.LowerThreadAllreduce "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerThreadAllreduce</ span > </ code > </ a > ()</ p > </ td >
612+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerThreadAllreduce " title ="tilelang.transform.LowerThreadAllreduce "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerThreadAllreduce</ span > </ code > </ a > ()</ p > </ td >
610613< td > < p > LowerThreadAllreduce</ p > </ td >
611614</ tr >
612- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerDeviceKernelLaunch " title ="tilelang.transform.LowerDeviceKernelLaunch "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerDeviceKernelLaunch</ span > </ code > </ a > ()</ p > </ td >
615+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerDeviceKernelLaunch " title ="tilelang.transform.LowerDeviceKernelLaunch "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerDeviceKernelLaunch</ span > </ code > </ a > ()</ p > </ td >
613616< td > < p > Create and return a transform pass that lowers device kernel launch constructs to target-specific IR.</ p > </ td >
614617</ tr >
615- < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerSharedTmem " title ="tilelang.transform.LowerSharedTmem "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerSharedTmem</ span > </ code > </ a > ()</ p > </ td >
618+ < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LowerSharedTmem " title ="tilelang.transform.LowerSharedTmem "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LowerSharedTmem</ span > </ code > </ a > ()</ p > </ td >
616619< td > < p > LowerSharedTmem</ p > </ td >
617620</ tr >
618- < tr class ="row-even "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LayoutReducer " title ="tilelang.transform.LayoutReducer "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LayoutReducer</ span > </ code > </ a > ()</ p > </ td >
621+ < tr class ="row-odd "> < td > < p > < a class ="reference internal " href ="#tilelang.transform.LayoutReducer " title ="tilelang.transform.LayoutReducer "> < code class ="xref py py-obj docutils literal notranslate "> < span class ="pre "> LayoutReducer</ span > </ code > </ a > ()</ p > </ td >
619622< td > < p > Return a TVM transform pass that performs layout reduction/normalization.</ p > </ td >
620623</ tr >
621624</ tbody >
@@ -960,6 +963,20 @@ <h3>Returns:<a class="headerlink" href="#returns" title="Link to this heading">
960963</ dl >
961964</ dd > </ dl >
962965
966+ < dl class ="py function ">
967+ < dt class ="sig sig-object py " id ="tilelang.transform.SplitHostDevice ">
968+ < span class ="sig-prename descclassname "> < span class ="pre "> tilelang.transform.</ span > </ span > < span class ="sig-name descname "> < span class ="pre "> SplitHostDevice</ span > </ span > < span class ="sig-paren "> (</ span > < span class ="sig-paren "> )</ span > < a class ="headerlink " href ="#tilelang.transform.SplitHostDevice " title ="Link to this definition "> ¶</ a > </ dt >
969+ < dd > < p > Split host/device functions even for empty kernels.</ p >
970+ < dl class ="field-list simple ">
971+ < dt class ="field-odd "> Returns< span class ="colon "> :</ span > </ dt >
972+ < dd class ="field-odd "> < p > < strong > fpass</ strong > – The result pass</ p >
973+ </ dd >
974+ < dt class ="field-even "> Return type< span class ="colon "> :</ span > </ dt >
975+ < dd class ="field-even "> < p > tvm.transform.Pass</ p >
976+ </ dd >
977+ </ dl >
978+ </ dd > </ dl >
979+
963980< dl class ="py function ">
964981< dt class ="sig sig-object py " id ="tilelang.transform.VectorizeLoop ">
965982< span class ="sig-prename descclassname "> < span class ="pre "> tilelang.transform.</ span > </ span > < span class ="sig-name descname "> < span class ="pre "> VectorizeLoop</ span > </ span > < span class ="sig-paren "> (</ span > < em class ="sig-param "> < span class ="n "> < span class ="pre "> enable_vectorize</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> True</ span > </ span > </ em > < span class ="sig-paren "> )</ span > < a class ="headerlink " href ="#tilelang.transform.VectorizeLoop " title ="Link to this definition "> ¶</ a > </ dt >
@@ -1254,6 +1271,7 @@ <h3>Returns:<a class="headerlink" href="#returns" title="Link to this heading">
12541271< li > < a class ="reference internal " href ="#tilelang.transform.LegalizeSafeMemoryAccess "> < code class ="docutils literal notranslate "> < span class ="pre "> LegalizeSafeMemoryAccess()</ span > </ code > </ a > </ li >
12551272< li > < a class ="reference internal " href ="#tilelang.transform.MakePackedAPI "> < code class ="docutils literal notranslate "> < span class ="pre "> MakePackedAPI()</ span > </ code > </ a > </ li >
12561273< li > < a class ="reference internal " href ="#tilelang.transform.AnnotateDeviceRegions "> < code class ="docutils literal notranslate "> < span class ="pre "> AnnotateDeviceRegions()</ span > </ code > </ a > </ li >
1274+ < li > < a class ="reference internal " href ="#tilelang.transform.SplitHostDevice "> < code class ="docutils literal notranslate "> < span class ="pre "> SplitHostDevice()</ span > </ code > </ a > </ li >
12571275< li > < a class ="reference internal " href ="#tilelang.transform.VectorizeLoop "> < code class ="docutils literal notranslate "> < span class ="pre "> VectorizeLoop()</ span > </ code > </ a > </ li >
12581276< li > < a class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy "> < code class ="docutils literal notranslate "> < span class ="pre "> InjectPTXAsyncCopy()</ span > </ code > </ a > </ li >
12591277< li > < a class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo "> < code class ="docutils literal notranslate "> < span class ="pre "> LowerDeviceStorageAccessInfo()</ span > </ code > </ a > </ li >
0 commit comments