Skip to content

Commit 61b0616

Browse files
author
GitHub Actions
committed
Update docs
1 parent 0ea65c0 commit 61b0616

File tree

7 files changed

+23
-23
lines changed

7 files changed

+23
-23
lines changed

_sources/deeplearning_operators/elementwise.md.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Please note that this tutorial does not delve deeply into the design principles
2727
def elementwise_add(N, threads=256, dtype="bfloat16"):
2828

2929
@T.prim_func
30-
def main(A: T.Buffer((N), dtype), B: T.Buffer((N), dtype), C: T.Buffer((N), dtype)):
30+
def main(A: T.Tensor((N), dtype), B: T.Tensor((N), dtype), C: T.Tensor((N), dtype)):
3131
with T.Kernel(T.ceildiv(N, threads), threads=threads) as (b_x):
3232
# vector add.
3333
for i in T.Parallel(threads):
@@ -67,9 +67,9 @@ def elementwise_add(
6767
):
6868
@T.prim_func
6969
def main(
70-
A: T.Buffer((M, N), in_dtype),
71-
B: T.Buffer((M, N), in_dtype),
72-
C: T.Buffer((M, N), out_dtype),
70+
A: T.Tensor((M, N), in_dtype),
71+
B: T.Tensor((M, N), in_dtype),
72+
C: T.Tensor((M, N), out_dtype),
7373
):
7474
with T.Kernel(T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=threads) as (bx, by):
7575
start_x = bx * block_N
@@ -105,7 +105,7 @@ When compiling the example below, let's set `N` to 2047:
105105
def elementwise_add(N, num_per_thread=8, threads=256, dtype="bfloat16"):
106106

107107
@T.prim_func
108-
def main(A: T.Buffer((N), dtype), B: T.Buffer((N), dtype), C: T.Buffer((N), dtype)):
108+
def main(A: T.Tensor((N), dtype), B: T.Tensor((N), dtype), C: T.Tensor((N), dtype)):
109109
with T.Kernel(T.ceildiv(N, threads * num_per_thread), threads=threads) as (b_x):
110110
# vector add.
111111
for i, j in T.Parallel(threads, num_per_thread):
@@ -179,7 +179,7 @@ In such scenarios, explicitly specifying the number of elements computed per thr
179179
def elementwise_add(N, num_per_thread=8, threads=256, dtype="bfloat16"):
180180

181181
@T.prim_func
182-
def main(A: T.Buffer((N), dtype), B: T.Buffer((N), dtype), C: T.Buffer((N), dtype)):
182+
def main(A: T.Tensor((N), dtype), B: T.Tensor((N), dtype), C: T.Tensor((N), dtype)):
183183
with T.Kernel(T.ceildiv(N, threads * num_per_thread), threads=threads) as (b_x):
184184
# vector add.
185185
for i, j in T.Parallel(threads, num_per_thread):
@@ -215,7 +215,7 @@ But what happens if we provide additional hints to TileLang? For instance, by ex
215215
def elementwise_add(N, NUM_ELE_PER_THREAD=8, threads=256, dtype="bfloat16"):
216216

217217
@T.prim_func
218-
def main(A: T.Buffer((N), dtype), B: T.Buffer((N), dtype), C: T.Buffer((N), dtype)):
218+
def main(A: T.Tensor((N), dtype), B: T.Tensor((N), dtype), C: T.Tensor((N), dtype)):
219219
with T.Kernel(T.ceildiv(N, threads * NUM_ELE_PER_THREAD), threads=threads) as (b_x):
220220
A_register = T.alloc_fragment((threads * NUM_ELE_PER_THREAD), dtype)
221221
B_register = T.alloc_fragment((threads * NUM_ELE_PER_THREAD), dtype)

_sources/deeplearning_operators/matmul.md.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ from tilelang.intrinsics import make_mma_swizzle_layout
6767
def matmul(M, N, K, block_M, block_N, block_K, dtype="float16", accum_dtype="float"):
6868
@T.prim_func
6969
def main(
70-
A: T.Buffer((M, K), dtype),
71-
B: T.Buffer((K, N), dtype),
72-
C: T.Buffer((M, N), dtype),
70+
A: T.Tensor((M, K), dtype),
71+
B: T.Tensor((K, N), dtype),
72+
C: T.Tensor((M, N), dtype),
7373
):
7474
# Initialize Kernel Context
7575
with T.Kernel(T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=128) as (bx, by):

_sources/tutorials/debug_tools_for_tilelang.md.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ For example, consider a case where a simple `T.copy` in 1D causes the lowering p
6666

6767
```python
6868
@T.prim_func
69-
def main(Q: T.Buffer(shape_q, dtype)):
69+
def main(Q: T.Tensor(shape_q, dtype)):
7070
# ...existing code...
7171
```
7272

deeplearning_operators/elementwise.html

Lines changed: 7 additions & 7 deletions
Large diffs are not rendered by default.

deeplearning_operators/matmul.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,9 @@ <h3>Basic Structure<a class="headerlink" href="#basic-structure" title="Permalin
284284
<span class="k">def</span> <span class="nf">matmul</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="n">N</span><span class="p">,</span> <span class="n">K</span><span class="p">,</span> <span class="n">block_M</span><span class="p">,</span> <span class="n">block_N</span><span class="p">,</span> <span class="n">block_K</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float16&quot;</span><span class="p">,</span> <span class="n">accum_dtype</span><span class="o">=</span><span class="s2">&quot;float&quot;</span><span class="p">):</span>
285285
<span class="nd">@T</span><span class="o">.</span><span class="n">prim_func</span>
286286
<span class="k">def</span> <span class="nf">main</span><span class="p">(</span>
287-
<span class="n">A</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Buffer</span><span class="p">((</span><span class="n">M</span><span class="p">,</span> <span class="n">K</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
288-
<span class="n">B</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Buffer</span><span class="p">((</span><span class="n">K</span><span class="p">,</span> <span class="n">N</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
289-
<span class="n">C</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Buffer</span><span class="p">((</span><span class="n">M</span><span class="p">,</span> <span class="n">N</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
287+
<span class="n">A</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Tensor</span><span class="p">((</span><span class="n">M</span><span class="p">,</span> <span class="n">K</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
288+
<span class="n">B</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Tensor</span><span class="p">((</span><span class="n">K</span><span class="p">,</span> <span class="n">N</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
289+
<span class="n">C</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Tensor</span><span class="p">((</span><span class="n">M</span><span class="p">,</span> <span class="n">N</span><span class="p">),</span> <span class="n">dtype</span><span class="p">),</span>
290290
<span class="p">):</span>
291291
<span class="c1"># Initialize Kernel Context</span>
292292
<span class="k">with</span> <span class="n">T</span><span class="o">.</span><span class="n">Kernel</span><span class="p">(</span><span class="n">T</span><span class="o">.</span><span class="n">ceildiv</span><span class="p">(</span><span class="n">N</span><span class="p">,</span> <span class="n">block_N</span><span class="p">),</span> <span class="n">T</span><span class="o">.</span><span class="n">ceildiv</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="n">block_M</span><span class="p">),</span> <span class="n">threads</span><span class="o">=</span><span class="mi">128</span><span class="p">)</span> <span class="k">as</span> <span class="p">(</span><span class="n">bx</span><span class="p">,</span> <span class="n">by</span><span class="p">):</span>

searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tutorials/debug_tools_for_tilelang.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ <h2>Debugging Generation Issues<a class="headerlink" href="#debugging-generation
279279
<p>When the code fails to generate (for instance, a compilation error occurs), you do <strong>not</strong> necessarily need to jump directly into C++ passes to debug. Instead, you can first inspect the intermediate representations (IR) in Python by printing them.</p>
280280
<p>For example, consider a case where a simple <code class="docutils literal notranslate"><span class="pre">T.copy</span></code> in 1D causes the lowering process to fail. The snippet below illustrates a simplified version of the problem (based on community Issue #35):</p>
281281
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="nd">@T</span><span class="o">.</span><span class="n">prim_func</span>
282-
<span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">Q</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Buffer</span><span class="p">(</span><span class="n">shape_q</span><span class="p">,</span> <span class="n">dtype</span><span class="p">)):</span>
282+
<span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">Q</span><span class="p">:</span> <span class="n">T</span><span class="o">.</span><span class="n">Tensor</span><span class="p">(</span><span class="n">shape_q</span><span class="p">,</span> <span class="n">dtype</span><span class="p">)):</span>
283283
<span class="c1"># ...existing code...</span>
284284
</pre></div>
285285
</div>

0 commit comments

Comments
 (0)