Skip to content

Commit 4b964ec

Browse files
committed
Add changes for 65939a6
1 parent 2241d8d commit 4b964ec

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

index.html

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,28 +122,27 @@ <h2>Example<a class="headerlink" href="#example" title="Permalink to this headin
122122
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&quot;kernel_float.h&quot;</span>
123123
<span class="k">namespace</span><span class="w"> </span><span class="nn">kf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nn">kernel_float</span><span class="p">;</span>
124124

125-
<span class="n">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">kf</span><span class="o">::</span><span class="n">vec</span><span class="o">&lt;</span><span class="n">half</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">constant</span><span class="p">,</span><span class="w"> </span><span class="n">kf</span><span class="o">::</span><span class="n">vec</span><span class="o">&lt;</span><span class="kt">float</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
125+
<span class="n">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="n">kf</span><span class="o">::</span><span class="n">vec_ptr</span><span class="o">&lt;</span><span class="k">const</span><span class="w"> </span><span class="n">half</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">constant</span><span class="p">,</span><span class="w"> </span><span class="n">kf</span><span class="o">::</span><span class="n">vec_ptr</span><span class="o">&lt;</span><span class="kt">float</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
126126
<span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">blockIdx</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">blockDim</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">threadIdx</span><span class="p">.</span><span class="n">x</span><span class="p">;</span>
127-
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">input</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">kf</span><span class="o">::</span><span class="n">cast</span><span class="o">&lt;</span><span class="n">half</span><span class="o">&gt;</span><span class="p">(</span><span class="n">constant</span><span class="p">);</span>
127+
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">+=</span><span class="w"> </span><span class="n">input</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">constant</span><span class="p">;</span>
128128
<span class="p">}</span>
129129
</pre></div>
130130
</div>
131131
<p>Here is how the same kernel would look for CUDA without Kernel Float.</p>
132-
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">__half</span><span class="o">*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">constant</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="o">*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
132+
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">half</span><span class="o">*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="n">constant</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="o">*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
133133
<span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">blockIdx</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">blockDim</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">threadIdx</span><span class="p">.</span><span class="n">x</span><span class="p">;</span>
134134
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">in0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">input</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">0</span><span class="p">];</span>
135135
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">in1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">input</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">];</span>
136136
<span class="w"> </span><span class="n">__half2</span><span class="w"> </span><span class="n">a</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__halves2half2</span><span class="p">(</span><span class="n">in0</span><span class="p">,</span><span class="w"> </span><span class="n">in1</span><span class="p">);</span>
137-
<span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kt">float</span><span class="p">(</span><span class="n">constant</span><span class="p">);</span>
138-
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__float2half</span><span class="p">(</span><span class="n">b</span><span class="p">);</span>
139-
<span class="w"> </span><span class="n">__half2</span><span class="w"> </span><span class="n">d</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2half2</span><span class="p">(</span><span class="n">c</span><span class="p">);</span>
140-
<span class="w"> </span><span class="n">__half2</span><span class="w"> </span><span class="n">e</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__hadd2</span><span class="p">(</span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="n">d</span><span class="p">);</span>
141-
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">f</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__low2half</span><span class="p">(</span><span class="n">e</span><span class="p">);</span>
142-
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">g</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__high2half</span><span class="p">(</span><span class="n">e</span><span class="p">);</span>
143-
<span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">out0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2float</span><span class="p">(</span><span class="n">f</span><span class="p">);</span>
144-
<span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">out1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2float</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
145-
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">out0</span><span class="p">;</span>
146-
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">out1</span><span class="p">;</span>
137+
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__int2half_rn</span><span class="p">(</span><span class="n">constant</span><span class="p">);</span>
138+
<span class="w"> </span><span class="n">__half2</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2half2</span><span class="p">(</span><span class="n">b</span><span class="p">);</span>
139+
<span class="w"> </span><span class="n">__half2</span><span class="w"> </span><span class="n">d</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__hmul2</span><span class="p">(</span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">);</span>
140+
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">e</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__low2half</span><span class="p">(</span><span class="n">d</span><span class="p">);</span>
141+
<span class="w"> </span><span class="n">__half</span><span class="w"> </span><span class="n">f</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__high2half</span><span class="p">(</span><span class="n">d</span><span class="p">);</span>
142+
<span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">out0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2float</span><span class="p">(</span><span class="n">e</span><span class="p">);</span>
143+
<span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">out1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__half2float</span><span class="p">(</span><span class="n">f</span><span class="p">);</span>
144+
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">+=</span><span class="w"> </span><span class="n">out0</span><span class="p">;</span>
145+
<span class="w"> </span><span class="n">output</span><span class="p">[</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">+=</span><span class="w"> </span><span class="n">out1</span><span class="p">;</span>
147146
<span class="p">}</span>
148147
</pre></div>
149148
</div>

searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)